Signed-off-by: Qu,Pengfei <pengfei...@intel.com>
---
 src/gen9_mfc_hevc.c | 317 +++++++++++++++++++++++++++-------------------------
 1 file changed, 164 insertions(+), 153 deletions(-)

diff --git a/src/gen9_mfc_hevc.c b/src/gen9_mfc_hevc.c
index 927e36b..94eb38a 100644
--- a/src/gen9_mfc_hevc.c
+++ b/src/gen9_mfc_hevc.c
@@ -1146,20 +1146,18 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP 
ctx,
                                       int qp, unsigned int *msg,
                                       int ctb_x, int ctb_y,
                                       int mb_x, int mb_y,
-                                      int ctb_width_in_mb, int width_in_ctb, 
int num_cu_record, int slice_type)
+                                      int ctb_width_in_mb, int width_in_ctb, 
int num_cu_record, int slice_type,int cu_index,int index)
 {
     /* here cu == mb, so we use mb address as the cu address */
     /* to fill the indirect cu by the vme out */
-    static int mb_addr_raster_to_zigzag_64[4][4] = { {0, 1, 4, 5}, {2, 3, 6, 
7}, {8, 9, 12, 13}, {10, 11, 14, 15} };
-    static int mb_addr_raster_to_zigzag_32[2][2] = { {0, 1}, {2, 3 } };
     static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
     static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
     unsigned char * cu_record_ptr = NULL;
     unsigned int * cu_msg = NULL;
     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
-    int mb_address_in_ctb = ((ctb_width_in_mb == 4) ? 
mb_addr_raster_to_zigzag_64[mb_x][mb_y] : ((ctb_width_in_mb == 2) ? 
mb_addr_raster_to_zigzag_32[mb_x][mb_y] : 0));
-    int cu_address = (ctb_address + mb_address_in_ctb) * 16 * 4;
+    int mb_address_in_ctb = 0;
+    int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
     int zero = 0;
     int is_inter = 0;
     int intraMbMode = 0;
@@ -1167,6 +1165,9 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP 
ctx,
     int intraMode[4];
     int inerpred_idc = 0;
     int intra_chroma_mode = 5;
+    int cu_size = 1;
+    int tu_size = 0x55;
+    int tu_count = 4;
 
     if (!is_inter) inerpred_idc = 0xff;
 
@@ -1176,29 +1177,37 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP 
ctx,
     if (intraMbMode == AVC_INTRA_16X16) {
         cu_part_mode = 0; //2Nx2N
         intra_chroma_mode = 5;
+        cu_size = 1;
+        tu_size = 0x55;
+        tu_count = 4;
         intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
         intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
         intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
         intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
     } else if (intraMbMode == AVC_INTRA_8X8) {
-        cu_part_mode = 3; //NxN
-        intra_chroma_mode = 0;
-        intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] & 0xf];
-        intraMode[1] = intra_mode_8x8_avc2hevc[(msg[1] >> 4) & 0xf];
-        intraMode[2] = intra_mode_8x8_avc2hevc[(msg[1] >> 8) & 0xf];
-        intraMode[3] = intra_mode_8x8_avc2hevc[(msg[1] >> 12) & 0xf];
+        cu_part_mode = 0; //2Nx2N
+        intra_chroma_mode = 5;
+        cu_size = 0;
+        tu_size = 0;
+        tu_count = 4;
+        intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
+        intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
+        intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
+        intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
 
     } else { // for 4x4 to use 8x8 replace
         cu_part_mode = 3; //NxN
         intra_chroma_mode = 0;
-        intraMode[0] = intra_mode_8x8_avc2hevc[0];
-        intraMode[1] = intra_mode_8x8_avc2hevc[0];
-        intraMode[2] = intra_mode_8x8_avc2hevc[0];
-        intraMode[3] = intra_mode_8x8_avc2hevc[0];
+        cu_size = 0;
+        tu_size = 0;
+        tu_count = 4;
+        intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 
0xf];
+        intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 
0xf];
+        intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 
0xf];
+        intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 
0xf];
 
     }
 
-    dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
     cu_record_ptr = (unsigned char 
*)mfc_context->hcp_indirect_cu_object.bo->virtual;
     /* get the mb info from the vme out */
     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
@@ -1212,7 +1221,7 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP 
ctx,
                  cu_part_mode << 4 |    /* cu_part_mode */
                  zero << 3 |    /* cu_transquant_bypass_flag */
                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
-                 1          /* cu_size */
+                 cu_size          /* cu_size */
                 );
     cu_msg[1] = (zero << 30 |   /* reserved  */
                  intraMode[3] << 24 |   /* intra_mode */
@@ -1260,8 +1269,8 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP 
ctx,
                   zero          /* ref_idx_l0[0] */
                  );
 
-    cu_msg[11] = 0x55; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
-    cu_msg[12] = (3 << 28 | /* tu count - 1 */
+    cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 
0x0*/
+    cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
                   zero << 16 |  /* reserved  */
                   zero          /* tu_xform_Yskip[15:0] */
                  );
@@ -1270,9 +1279,6 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP 
ctx,
                  );
     cu_msg[14] = zero ;
     cu_msg[15] = zero ;
-
-    dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
-
 }
 
 /* here 1 MB = 1CU = 16x16 */
@@ -1283,24 +1289,24 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP 
ctx,
                                       int qp, unsigned int *msg,
                                       int ctb_x, int ctb_y,
                                       int mb_x, int mb_y,
-                                      int ctb_width_in_mb, int width_in_ctb, 
int num_cu_record, int slice_type)
+                                      int ctb_width_in_mb, int width_in_ctb, 
int num_cu_record, int slice_type, int cu_index,int index)
 {
     /* here cu == mb, so we use mb address as the cu address */
     /* to fill the indirect cu by the vme out */
-    static int mb_addr_raster_to_zigzag_64[4][4] = { {0, 1, 4, 5}, {2, 3, 6, 
7}, {8, 9, 12, 13}, {10, 11, 14, 15} };
-    static int mb_addr_raster_to_zigzag_32[2][2] = { {0, 1}, {2, 3 } };
-
     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
     struct gen6_vme_context *vme_context = encoder_context->vme_context;
     unsigned char * cu_record_ptr = NULL;
     unsigned int * cu_msg = NULL;
     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
-    int mb_address_in_ctb = ((ctb_width_in_mb == 4) ? 
mb_addr_raster_to_zigzag_64[mb_x][mb_y] : ((ctb_width_in_mb == 2) ? 
mb_addr_raster_to_zigzag_32[mb_x][mb_y] : 0));
-    int cu_address = (ctb_address + mb_address_in_ctb) * 16 * 4;
+    int mb_address_in_ctb = 0;
+    int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
     int zero = 0;
     int cu_part_mode = 0;
     int submb_pre_mode = 0;
     int is_inter = 1;
+    int cu_size = 1;
+    int tu_size = 0x55;
+    int tu_count = 4;
 
     unsigned int *mv_ptr;
     {
@@ -1314,7 +1320,6 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP 
ctx,
         /* 0/2/4/6/8... ??? l0, 1/3/5/7...: l1 ; now it only support 
16x16,16x8,8x16,8x8*/
 
         if ((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_16X16) {
-            // MV[0] and MV[2] are replicated
             mv_ptr[4] = mv_ptr[0];
             mv_ptr[5] = mv_ptr[1];
             mv_ptr[2] = mv_ptr[0];
@@ -1322,8 +1327,10 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP 
ctx,
             mv_ptr[6] = mv_ptr[0];
             mv_ptr[7] = mv_ptr[1];
             cu_part_mode = 0;
+            cu_size = 1;
+            tu_size = 0x55;
+            tu_count = 4;
         } else if ((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_8X16) {
-            // MV[0] and MV[2] are replicated
             mv_ptr[4] = mv_ptr[0];
             mv_ptr[5] = mv_ptr[1];
             mv_ptr[2] = mv_ptr[8];
@@ -1331,8 +1338,10 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP 
ctx,
             mv_ptr[6] = mv_ptr[8];
             mv_ptr[7] = mv_ptr[9];
             cu_part_mode = 1;
+            cu_size = 1;
+            tu_size = 0x55;
+            tu_count = 4;
         } else if ((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_16X8) {
-            // MV[0] and MV[1] are replicated
             mv_ptr[2] = mv_ptr[0];
             mv_ptr[3] = mv_ptr[1];
             mv_ptr[4] = mv_ptr[16];
@@ -1340,30 +1349,39 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP 
ctx,
             mv_ptr[6] = mv_ptr[24];
             mv_ptr[7] = mv_ptr[25];
             cu_part_mode = 2;
-        } else if (((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_8X8) &&
-                   !(msg[1] & SUBMB_SHAPE_MASK)) {
-            // Don't touch MV[0] or MV[1]
-            mv_ptr[2] = mv_ptr[8];
-            mv_ptr[3] = mv_ptr[9];
-            mv_ptr[4] = mv_ptr[16];
-            mv_ptr[5] = mv_ptr[17];
-            mv_ptr[6] = mv_ptr[24];
-            mv_ptr[7] = mv_ptr[25];
-            cu_part_mode = 3;
-        } else {
-            // Don't touch MV[0] or MV[1]
-            // default use 8x8
-            mv_ptr[2] = mv_ptr[8];
-            mv_ptr[3] = mv_ptr[9];
-            mv_ptr[4] = mv_ptr[16];
-            mv_ptr[5] = mv_ptr[17];
-            mv_ptr[6] = mv_ptr[24];
-            mv_ptr[7] = mv_ptr[25];
-            cu_part_mode = 3;
+            cu_size = 1;
+            tu_size = 0x55;
+            tu_count = 4;
+        }else if((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_8X8) {
+            mv_ptr[0] = mv_ptr[index * 8 + 0 ];
+            mv_ptr[1] = mv_ptr[index * 8 + 1 ];
+            mv_ptr[2] = mv_ptr[index * 8 + 0 ];
+            mv_ptr[3] = mv_ptr[index * 8 + 1 ];
+            mv_ptr[4] = mv_ptr[index * 8 + 0 ];
+            mv_ptr[5] = mv_ptr[index * 8 + 1 ];
+            mv_ptr[6] = mv_ptr[index * 8 + 0 ];
+            mv_ptr[7] = mv_ptr[index * 8 + 1 ];
+            cu_part_mode = 0;
+            cu_size = 0;
+            tu_size = 0x0;
+            tu_count = 4;
+
+        }else
+        {
+            mv_ptr[4] = mv_ptr[0];
+            mv_ptr[5] = mv_ptr[1];
+            mv_ptr[2] = mv_ptr[0];
+            mv_ptr[3] = mv_ptr[1];
+            mv_ptr[6] = mv_ptr[0];
+            mv_ptr[7] = mv_ptr[1];
+            cu_part_mode = 0;
+            cu_size = 1;
+            tu_size = 0x55;
+            tu_count = 4;
+
         }
     }
 
-    dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
     cu_record_ptr = (unsigned char 
*)mfc_context->hcp_indirect_cu_object.bo->virtual;
     /* get the mb info from the vme out */
     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
@@ -1377,7 +1395,7 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP 
ctx,
                  cu_part_mode << 4 |    /* cu_part_mode */
                  zero << 3 |    /* cu_transquant_bypass_flag */
                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
-                 1          /* cu_size */
+                 cu_size          /* cu_size */
                 );
     cu_msg[1] = (zero << 30 |   /* reserved  */
                  zero << 24 |   /* intra_mode */
@@ -1425,8 +1443,8 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP 
ctx,
                   ((vme_context->ref_index_in_mb[0] >> 0) & 0xf)            /* 
ref_idx_l0[0] */
                  );
 
-    cu_msg[11] = 0x55; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
-    cu_msg[12] = (3 << 28 | /* tu count - 1 */
+    cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 
0x0*/
+    cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
                   zero << 16 |  /* reserved  */
                   zero          /* tu_xform_Yskip[15:0] */
                  );
@@ -1435,105 +1453,12 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP 
ctx,
                  );
     cu_msg[14] = zero ;
     cu_msg[15] = zero ;
-
-    dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
-
-}
-
-static void
-gen9_hcpe_hevc_vmeout_to_indirect_cu_buffer(VADriverContextP ctx,
-        struct encode_state *encode_state,
-        struct intel_encoder_context *encoder_context,
-        int slice_index)
-{
-    /* to do */
-    /* to fill the indirect cu by the vme out */
-    struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
-    struct gen6_vme_context *vme_context = encoder_context->vme_context;
-    VAEncSequenceParameterBufferHEVC *pSequenceParameter = 
(VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
-    VAEncPictureParameterBufferHEVC *pPicParameter = 
(VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
-    VAEncSliceParameterBufferHEVC *pSliceParameter = 
(VAEncSliceParameterBufferHEVC 
*)encode_state->slice_params_ext[slice_index]->buffer;
-    unsigned int *msg = NULL;
-    unsigned char *msg_ptr = NULL;
-    int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
-    unsigned int rate_control_mode = encoder_context->rate_control_mode;
-
-    int slice_type = pSliceParameter->slice_type;
-    int is_intra = slice_type == HEVC_SLICE_I;
-
-    int log2_cu_size = 
pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
-    int log2_ctb_size = 
pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
-    int ctb_size = 1 << log2_ctb_size;
-    int ctb_width_in_mb = (ctb_size + 15) / 16;
-    int num_mb_in_ctb = ctb_width_in_mb * ctb_width_in_mb;
-
-    int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + 
ctb_size - 1) / ctb_size;
-
-    int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
-
-    int num_cu_record = 64;
-
-    if (log2_ctb_size == 5) num_cu_record = 16;
-    else if (log2_ctb_size == 4) num_cu_record = 4;
-    else if (log2_ctb_size == 6) num_cu_record = 64;
-
-    int i_ctb;
-    int ctb_x, ctb_y;
-
-    int macroblock_address = 0;
-
-    if (rate_control_mode == VA_RC_CBR) {
-        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
-        pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
-    }
-
-    dri_bo_map(vme_context->vme_output.bo , 1);
-    msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
-
-    for (i_ctb = pSliceParameter->slice_segment_address; i_ctb < 
pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; 
i_ctb++) {
-        ctb_x = i_ctb % width_in_ctb;
-        ctb_y = i_ctb / width_in_ctb;
-
-        int mb_x, mb_y;
-        int mb_addr = 0;
-        macroblock_address = (i_ctb - ctb_x) * num_mb_in_ctb + ctb_x * 
ctb_width_in_mb;
-        for (mb_y = 0; mb_y < ctb_width_in_mb; mb_y++) {
-            mb_addr = macroblock_address + mb_y * width_in_mbs ;
-            for (mb_x = 0; mb_x < ctb_width_in_mb; mb_x++) {
-                mb_addr++;
-
-                /* get the mb info from the vme out */
-                msg = (unsigned int *)(msg_ptr + mb_addr * 
vme_context->vme_output.size_block);
-
-                /*fill to indirect cu */
-                /*to do */
-                if (is_intra) {
-                    /* fill intra cu */
-                    gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, 
encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, 
width_in_ctb, num_cu_record, slice_type);
-                } else {
-                    int inter_rdo, intra_rdo;
-                    inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
-                    intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
-                    if (intra_rdo < inter_rdo) {
-                        /* fill intra cu */
-                        gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type);
-                    } else {
-                        msg += AVC_INTER_MSG_OFFSET;
-                        /* fill inter cu */
-                        gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type);
-                    }
-                }
-
-            }
-        }
-    }
-
-    dri_bo_unmap(vme_context->vme_output.bo);
 }
 
 #define HEVC_SPLIT_CU_FLAG_64_64 
((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
 #define HEVC_SPLIT_CU_FLAG_32_32 
((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
 #define HEVC_SPLIT_CU_FLAG_16_16 
((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
+#define HEVC_SPLIT_CU_FLAG_8_8   
((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
 
 
 void
@@ -1646,6 +1571,7 @@ gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP 
ctx,
         struct intel_batchbuffer *slice_batch)
 {
     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
     VAEncSequenceParameterBufferHEVC *pSequenceParameter = 
(VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
     VAEncPictureParameterBufferHEVC *pPicParameter = 
(VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
     VAEncSliceParameterBufferHEVC *pSliceParameter = 
(VAEncSliceParameterBufferHEVC 
*)encode_state->slice_params_ext[slice_index]->buffer;
@@ -1666,7 +1592,24 @@ 
gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
     int num_mb_in_ctb = ctb_width_in_mb * ctb_width_in_mb;
     int i_ctb, ctb_x, ctb_y;
     unsigned int split_coding_unit_flag = 0;
+    int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+    int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % 
ctb_size)> 0 ? 1:0;
+
+    int is_intra = (slice_type == HEVC_SLICE_I);
+    unsigned int *msg = NULL;
+    unsigned char *msg_ptr = NULL;
+    int macroblock_address = 0;
+    int num_cu_record = 64;
+    int cu_count = 1;
+    int tmp_mb_mode = 0;
+    int mb_x = 0, mb_y = 0;
+    int mb_addr = 0;
+    int cu_index = 0;
+    int inter_rdo, intra_rdo;
 
+    if (log2_ctb_size == 5) num_cu_record = 16;
+    else if (log2_ctb_size == 4) num_cu_record = 4;
+    else if (log2_ctb_size == 6) num_cu_record = 64;
     if (rate_control_mode == VA_RC_CBR) {
         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1706,16 +1649,85 @@ 
gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
 
     split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 
: ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : 
HEVC_SPLIT_CU_FLAG_16_16);
 
-    for (i_ctb = pSliceParameter->slice_segment_address;
-         i_ctb < pSliceParameter->slice_segment_address + 
pSliceParameter->num_ctu_in_slice; i_ctb++) {
+    dri_bo_map(vme_context->vme_output.bo , 1);
+    msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
+    dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
+
+    for (i_ctb = pSliceParameter->slice_segment_address;i_ctb < 
pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; 
i_ctb++) {
         int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + 
pSliceParameter->num_ctu_in_slice - 1));
+        int ctb_height_in_mb = ctb_width_in_mb;
         ctb_x = i_ctb % width_in_ctb;
         ctb_y = i_ctb / width_in_ctb;
+        if(ctb_y == (height_in_ctb - 1) && row_pad_flag)  ctb_height_in_mb = 1;
+
+        mb_x = 0;
+        mb_y = 0;
+        macroblock_address = (i_ctb - ctb_x) * num_mb_in_ctb + ctb_x * 
ctb_width_in_mb;
+        split_coding_unit_flag = ((ctb_width_in_mb == 2) ? 
HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
+        cu_count = 1;
+        cu_index = 0;
+        mb_addr = 0;
+        msg = NULL;
+        for (mb_y = 0; mb_y < ctb_height_in_mb; mb_y++) 
+        {
+            mb_addr = macroblock_address + mb_y * width_in_mbs ;
+            for (mb_x = 0; mb_x < ctb_width_in_mb; mb_x++) 
+            {
+                split_coding_unit_flag = ((ctb_width_in_mb == 2) ? 
HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
+                /* get the mb info from the vme out */
+                msg = (unsigned int *)(msg_ptr + mb_addr * 
vme_context->vme_output.size_block);
 
-        gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, 
encoder_context, num_mb_in_ctb, split_coding_unit_flag, slice_batch);
+                inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
+                intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
+                /*fill to indirect cu */
+                /*to do */
+                if (is_intra || intra_rdo < inter_rdo) {
+                    /* fill intra cu */
+                    tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
+                    if (tmp_mb_mode == AVC_INTRA_16X16) {
+                        gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
+                    } else { // for 4x4 to use 8x8 replace
+                        gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
+                        gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
+                        gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
+                        gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
+                        if(ctb_width_in_mb == 2)
+                            split_coding_unit_flag |= 0x1 << (mb_x + mb_y * 
ctb_width_in_mb + 16);
+                        else if(ctb_width_in_mb == 1)
+                            split_coding_unit_flag |= 0x1 << 20;
+                    }
+                } else {
+                    msg += AVC_INTER_MSG_OFFSET;
+                    /* fill inter cu */
+                    tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
+                    if (tmp_mb_mode == AVC_INTER_8X8){
+                        gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
+                        gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
+                        gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
+                        gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
+                        if(ctb_width_in_mb == 2)
+                            split_coding_unit_flag |= 0x1 << (mb_x + mb_y * 
ctb_width_in_mb + 16);
+                        else if(ctb_width_in_mb == 1)
+                            split_coding_unit_flag |= 0x1 << 20;
+
+                    }else if(tmp_mb_mode == AVC_INTER_16X16 ||
+                        tmp_mb_mode == AVC_INTER_8X16 ||
+                        tmp_mb_mode == AVC_INTER_16X8) {
+                        gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, 
encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, 
ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
+                    }
+                }
+                mb_addr++;
+            }
+        }
 
+        cu_count = cu_index;
+        // PAK object fill accordingly.
+        gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, 
encoder_context, cu_count, split_coding_unit_flag, slice_batch);
     }
 
+    dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
+    dri_bo_unmap(vme_context->vme_output.bo);
+
     if (last_slice) {
         mfc_context->insert_object(ctx, encoder_context,
                                    tail_data, 2, 8,
@@ -1741,7 +1753,6 @@ gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
     batch_bo = batch->buffer;
 
     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
-        gen9_hcpe_hevc_vmeout_to_indirect_cu_buffer(ctx, encode_state, 
encoder_context, i);
         gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, 
encoder_context, i, batch);
     }
 
-- 
1.9.1

_______________________________________________
Libva mailing list
Libva@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libva

Reply via email to