Re: [Libva] [PATCH intel-driver 5/5] decoder: h264: optimize support for grayscale surfaces.

2014-05-11 Thread Zhao Yakui
On Sat, 2014-05-10 at 00:03 -0600, Gwenole Beauchesne wrote:
 Optimize support for grayscale surfaces in two aspects: (i) space
 by only allocating the luma component ; (ii) speed by avoiding
 initialization of the (now inexistent) chroma planes.
 
 Keep backward compatibility with older codec layers that only
 supported YUV 4:2:0 and not grayscale formats properly.

Some changes don't match the hardware spec.

Another issue is that you mix too many things in one patch. It will be
better that you can do one thing in one patch.
For example: 
The fix for Y800 surface.
The definition of h264 chroma formats supported by the driver.

Thanks.
   Yakui
 
 Signed-off-by: Gwenole Beauchesne gwenole.beauche...@intel.com
 ---
  src/gen6_mfd.c   | 21 +-
  src/gen75_mfd.c  | 19 -
  src/gen7_mfd.c   | 19 -
  src/gen8_mfd.c   | 19 -
  src/i965_decoder_utils.c | 71 
 ++--
  src/i965_decoder_utils.h |  8 ++
  src/i965_drv_video.c | 43 +
  src/i965_drv_video.h | 12 
  8 files changed, 151 insertions(+), 61 deletions(-)
 
 diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
 index 22d8a51..6ec2278 100755
 --- a/src/gen6_mfd.c
 +++ b/src/gen6_mfd.c
 @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
  {
  struct intel_batchbuffer *batch = gen6_mfd_context-base.batch;
  struct object_surface *obj_surface = decode_state-render_object;
 -
 +unsigned int surface_format;
 +
 +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ?
 +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;

This doesn't match the hardware spec.
(It only supports the PLANAR_420_8 in MFX_SURFACE_STATE).

 +
  BEGIN_BCS_BATCH(batch, 6);
  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  OUT_BCS_BATCH(batch, 0);
 @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
((obj_surface-orig_height - 1)  19) |
((obj_surface-orig_width - 1)  6));
  OUT_BCS_BATCH(batch,
 -  (MFX_SURFACE_PLANAR_420_8  28) | /* 420 planar YUV 
 surface */
 +  (surface_format  28) | /* 420 planar YUV surface */
(1  27) | /* must be 1 for interleave U/V, hardware 
 requirement */
(0  22) | /* surface object control state, FIXME??? */
((obj_surface-width - 1)  3) | /* pitch */
 @@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx,
  obj_surface-flags |= SURFACE_REFERENCED;
  else
  obj_surface-flags = ~SURFACE_REFERENCED;
 -i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, 
 SUBSAMPLE_YUV420);
 -
 -/* initial uv component for YUV400 case */
 -if (pic_param-seq_fields.bits.chroma_format_idc == 0) {
 - unsigned int uv_offset = obj_surface-width * obj_surface-height; 
 - unsigned int uv_size   = obj_surface-width * obj_surface-height / 
 2; 
 -
 - drm_intel_gem_bo_map_gtt(obj_surface-bo);
 - memset(obj_surface-bo-virtual + uv_offset, 0x80, uv_size);
 - drm_intel_gem_bo_unmap_gtt(obj_surface-bo);
 -}
 -
 +avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
  gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
  
  dri_bo_unreference(gen6_mfd_context-post_deblocking_output.bo);
 diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
 index cb85996..d2dbb69 100644
 --- a/src/gen75_mfd.c
 +++ b/src/gen75_mfd.c
 @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
  struct object_surface *obj_surface = decode_state-render_object;
  unsigned int y_cb_offset;
  unsigned int y_cr_offset;
 +unsigned int surface_format;
  
  assert(obj_surface);
  
  y_cb_offset = obj_surface-y_cb_offset;
  y_cr_offset = obj_surface-y_cr_offset;
  
 +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ?
 +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
 +
  BEGIN_BCS_BATCH(batch, 6);
  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  OUT_BCS_BATCH(batch, 0);
 @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
((obj_surface-orig_height - 1)  18) |
((obj_surface-orig_width - 1)  4));
  OUT_BCS_BATCH(batch,
 -  (MFX_SURFACE_PLANAR_420_8  28) | /* 420 planar YUV 
 surface */
 +  (surface_format  28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG)  27) | /* 
 interleave chroma, set to 0 for JPEG */
(0  22) | /* surface object control state, ignored */
((obj_surface-width - 1)  3) | /* pitch */
 @@ -1086,18 +1090,7 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx,
  obj_surface-flags |= SURFACE_REFERENCED;
  else
  obj_surface-flags = ~SURFACE_REFERENCED;
 -

Re: [Libva] [PATCH intel-driver 5/5] decoder: h264: optimize support for grayscale surfaces.

2014-05-11 Thread Gwenole Beauchesne
Hi,

2014-05-12 3:21 GMT+02:00 Zhao Yakui yakui.z...@intel.com:
 On Sat, 2014-05-10 at 00:03 -0600, Gwenole Beauchesne wrote:
 Optimize support for grayscale surfaces in two aspects: (i) space
 by only allocating the luma component ; (ii) speed by avoiding
 initialization of the (now inexistent) chroma planes.

 Keep backward compatibility with older codec layers that only
 supported YUV 4:2:0 and not grayscale formats properly.

 Some changes don't match the hardware spec.

 Another issue is that you mix too many things in one patch. It will be
 better that you can do one thing in one patch.

This matches the PRM, please read the last section of the chapter. :)
There are also additional notes for it in the inline description too.

 For example:
 The fix for Y800 surface.
 The definition of h264 chroma formats supported by the driver.

 Thanks.
Yakui

 Signed-off-by: Gwenole Beauchesne gwenole.beauche...@intel.com
 ---
  src/gen6_mfd.c   | 21 +-
  src/gen75_mfd.c  | 19 -
  src/gen7_mfd.c   | 19 -
  src/gen8_mfd.c   | 19 -
  src/i965_decoder_utils.c | 71 
 ++--
  src/i965_decoder_utils.h |  8 ++
  src/i965_drv_video.c | 43 +
  src/i965_drv_video.h | 12 
  8 files changed, 151 insertions(+), 61 deletions(-)

 diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
 index 22d8a51..6ec2278 100755
 --- a/src/gen6_mfd.c
 +++ b/src/gen6_mfd.c
 @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
  {
  struct intel_batchbuffer *batch = gen6_mfd_context-base.batch;
  struct object_surface *obj_surface = decode_state-render_object;
 -
 +unsigned int surface_format;
 +
 +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ?
 +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;

 This doesn't match the hardware spec.
 (It only supports the PLANAR_420_8 in MFX_SURFACE_STATE).

 +
  BEGIN_BCS_BATCH(batch, 6);
  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  OUT_BCS_BATCH(batch, 0);
 @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
((obj_surface-orig_height - 1)  19) |
((obj_surface-orig_width - 1)  6));
  OUT_BCS_BATCH(batch,
 -  (MFX_SURFACE_PLANAR_420_8  28) | /* 420 planar YUV 
 surface */
 +  (surface_format  28) | /* 420 planar YUV surface */
(1  27) | /* must be 1 for interleave U/V, hardware 
 requirement */
(0  22) | /* surface object control state, FIXME??? */
((obj_surface-width - 1)  3) | /* pitch */
 @@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx,
  obj_surface-flags |= SURFACE_REFERENCED;
  else
  obj_surface-flags = ~SURFACE_REFERENCED;
 -i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, 
 SUBSAMPLE_YUV420);
 -
 -/* initial uv component for YUV400 case */
 -if (pic_param-seq_fields.bits.chroma_format_idc == 0) {
 - unsigned int uv_offset = obj_surface-width * obj_surface-height;
 - unsigned int uv_size   = obj_surface-width * obj_surface-height 
 / 2;
 -
 - drm_intel_gem_bo_map_gtt(obj_surface-bo);
 - memset(obj_surface-bo-virtual + uv_offset, 0x80, uv_size);
 - drm_intel_gem_bo_unmap_gtt(obj_surface-bo);
 -}
 -
 +avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
  gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);

  dri_bo_unreference(gen6_mfd_context-post_deblocking_output.bo);
 diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
 index cb85996..d2dbb69 100644
 --- a/src/gen75_mfd.c
 +++ b/src/gen75_mfd.c
 @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
  struct object_surface *obj_surface = decode_state-render_object;
  unsigned int y_cb_offset;
  unsigned int y_cr_offset;
 +unsigned int surface_format;

  assert(obj_surface);

  y_cb_offset = obj_surface-y_cb_offset;
  y_cr_offset = obj_surface-y_cr_offset;

 +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ?
 +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
 +
  BEGIN_BCS_BATCH(batch, 6);
  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  OUT_BCS_BATCH(batch, 0);
 @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
((obj_surface-orig_height - 1)  18) |
((obj_surface-orig_width - 1)  4));
  OUT_BCS_BATCH(batch,
 -  (MFX_SURFACE_PLANAR_420_8  28) | /* 420 planar YUV 
 surface */
 +  (surface_format  28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG)  27) | /* 
 interleave chroma, set to 0 for JPEG */
(0  22) | /* surface object control state, ignored */
((obj_surface-width - 1)  3) | /* 

Re: [Libva] [PATCH intel-driver 5/5] decoder: h264: optimize support for grayscale surfaces.

2014-05-11 Thread Zhao, Yakui
On Sun, 2014-05-11 at 22:26 -0600, Gwenole Beauchesne wrote:
 Hi,
 
 2014-05-12 3:21 GMT+02:00 Zhao Yakui yakui.z...@intel.com:
  On Sat, 2014-05-10 at 00:03 -0600, Gwenole Beauchesne wrote:
  Optimize support for grayscale surfaces in two aspects: (i) space
  by only allocating the luma component ; (ii) speed by avoiding
  initialization of the (now inexistent) chroma planes.
 
  Keep backward compatibility with older codec layers that only
  supported YUV 4:2:0 and not grayscale formats properly.
 
  Some changes don't match the hardware spec.
 
  Another issue is that you mix too many things in one patch. It will be
  better that you can do one thing in one patch.
 
 This matches the PRM, please read the last section of the chapter. :)
 There are also additional notes for it in the inline description too.

Really?
   The following is from the PRM on Ivybridge.
   For video codec, it should set to 4 always.

Thanks.
Yakui
 
  For example:
  The fix for Y800 surface.
  The definition of h264 chroma formats supported by the driver.
 
  Thanks.
 Yakui
 
  Signed-off-by: Gwenole Beauchesne gwenole.beauche...@intel.com
  ---
   src/gen6_mfd.c   | 21 +-
   src/gen75_mfd.c  | 19 -
   src/gen7_mfd.c   | 19 -
   src/gen8_mfd.c   | 19 -
   src/i965_decoder_utils.c | 71 
  ++--
   src/i965_decoder_utils.h |  8 ++
   src/i965_drv_video.c | 43 +
   src/i965_drv_video.h | 12 
   8 files changed, 151 insertions(+), 61 deletions(-)
 
  diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
  index 22d8a51..6ec2278 100755
  --- a/src/gen6_mfd.c
  +++ b/src/gen6_mfd.c
  @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
   {
   struct intel_batchbuffer *batch = gen6_mfd_context-base.batch;
   struct object_surface *obj_surface = decode_state-render_object;
  -
  +unsigned int surface_format;
  +
  +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ?
  +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
 
  This doesn't match the hardware spec.
  (It only supports the PLANAR_420_8 in MFX_SURFACE_STATE).
 
  +
   BEGIN_BCS_BATCH(batch, 6);
   OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
   OUT_BCS_BATCH(batch, 0);
  @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
 ((obj_surface-orig_height - 1)  19) |
 ((obj_surface-orig_width - 1)  6));
   OUT_BCS_BATCH(batch,
  -  (MFX_SURFACE_PLANAR_420_8  28) | /* 420 planar YUV 
  surface */
  +  (surface_format  28) | /* 420 planar YUV surface */
 (1  27) | /* must be 1 for interleave U/V, hardware 
  requirement */
 (0  22) | /* surface object control state, FIXME??? */
 ((obj_surface-width - 1)  3) | /* pitch */
  @@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx,
   obj_surface-flags |= SURFACE_REFERENCED;
   else
   obj_surface-flags = ~SURFACE_REFERENCED;
  -i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, 
  SUBSAMPLE_YUV420);
  -
  -/* initial uv component for YUV400 case */
  -if (pic_param-seq_fields.bits.chroma_format_idc == 0) {
  - unsigned int uv_offset = obj_surface-width * 
  obj_surface-height;
  - unsigned int uv_size   = obj_surface-width * 
  obj_surface-height / 2;
  -
  - drm_intel_gem_bo_map_gtt(obj_surface-bo);
  - memset(obj_surface-bo-virtual + uv_offset, 0x80, uv_size);
  - drm_intel_gem_bo_unmap_gtt(obj_surface-bo);
  -}
  -
  +avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
   gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
 
   dri_bo_unreference(gen6_mfd_context-post_deblocking_output.bo);
  diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
  index cb85996..d2dbb69 100644
  --- a/src/gen75_mfd.c
  +++ b/src/gen75_mfd.c
  @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
   struct object_surface *obj_surface = decode_state-render_object;
   unsigned int y_cb_offset;
   unsigned int y_cr_offset;
  +unsigned int surface_format;
 
   assert(obj_surface);
 
   y_cb_offset = obj_surface-y_cb_offset;
   y_cr_offset = obj_surface-y_cr_offset;
 
  +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ?
  +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
  +
   BEGIN_BCS_BATCH(batch, 6);
   OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
   OUT_BCS_BATCH(batch, 0);
  @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
 ((obj_surface-orig_height - 1)  18) |
 ((obj_surface-orig_width - 1)  4));
   OUT_BCS_BATCH(batch,
  -  (MFX_SURFACE_PLANAR_420_8  28) | /* 420 planar YUV 
  surface */
  +

Re: [Libva] [PATCH intel-driver 5/5] decoder: h264: optimize support for grayscale surfaces.

2014-05-11 Thread Gwenole Beauchesne
2014-05-12 6:52 GMT+02:00 Zhao, Yakui yakui.z...@intel.com:
 On Sun, 2014-05-11 at 22:26 -0600, Gwenole Beauchesne wrote:
 Hi,

 2014-05-12 3:21 GMT+02:00 Zhao Yakui yakui.z...@intel.com:
  On Sat, 2014-05-10 at 00:03 -0600, Gwenole Beauchesne wrote:
  Optimize support for grayscale surfaces in two aspects: (i) space
  by only allocating the luma component ; (ii) speed by avoiding
  initialization of the (now inexistent) chroma planes.
 
  Keep backward compatibility with older codec layers that only
  supported YUV 4:2:0 and not grayscale formats properly.
 
  Some changes don't match the hardware spec.
 
  Another issue is that you mix too many things in one patch. It will be
  better that you can do one thing in one patch.

 This matches the PRM, please read the last section of the chapter. :)
 There are also additional notes for it in the inline description too.

 Really?
The following is from the PRM on Ivybridge.
For video codec, it should set to 4 always.

This looks to be the quote for the JPEG case, that required all 3
components, with the same and full stride of the luma component. I am
pretty sure we can improve this part if you think it's not clear
enough. The prevailing information for grayscale support is section
7.1.

Regards,
Gwenole.

  ---
   src/gen6_mfd.c   | 21 +-
   src/gen75_mfd.c  | 19 -
   src/gen7_mfd.c   | 19 -
   src/gen8_mfd.c   | 19 -
   src/i965_decoder_utils.c | 71 
  ++--
   src/i965_decoder_utils.h |  8 ++
   src/i965_drv_video.c | 43 +
   src/i965_drv_video.h | 12 
   8 files changed, 151 insertions(+), 61 deletions(-)
 
  diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
  index 22d8a51..6ec2278 100755
  --- a/src/gen6_mfd.c
  +++ b/src/gen6_mfd.c
  @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
   {
   struct intel_batchbuffer *batch = gen6_mfd_context-base.batch;
   struct object_surface *obj_surface = decode_state-render_object;
  -
  +unsigned int surface_format;
  +
  +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ?
  +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
 
  This doesn't match the hardware spec.
  (It only supports the PLANAR_420_8 in MFX_SURFACE_STATE).
 
  +
   BEGIN_BCS_BATCH(batch, 6);
   OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
   OUT_BCS_BATCH(batch, 0);
  @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
 ((obj_surface-orig_height - 1)  19) |
 ((obj_surface-orig_width - 1)  6));
   OUT_BCS_BATCH(batch,
  -  (MFX_SURFACE_PLANAR_420_8  28) | /* 420 planar YUV 
  surface */
  +  (surface_format  28) | /* 420 planar YUV surface */
 (1  27) | /* must be 1 for interleave U/V, hardware 
  requirement */
 (0  22) | /* surface object control state, FIXME??? 
  */
 ((obj_surface-width - 1)  3) | /* pitch */
  @@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx,
   obj_surface-flags |= SURFACE_REFERENCED;
   else
   obj_surface-flags = ~SURFACE_REFERENCED;
  -i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, 
  SUBSAMPLE_YUV420);
  -
  -/* initial uv component for YUV400 case */
  -if (pic_param-seq_fields.bits.chroma_format_idc == 0) {
  - unsigned int uv_offset = obj_surface-width * 
  obj_surface-height;
  - unsigned int uv_size   = obj_surface-width * 
  obj_surface-height / 2;
  -
  - drm_intel_gem_bo_map_gtt(obj_surface-bo);
  - memset(obj_surface-bo-virtual + uv_offset, 0x80, uv_size);
  - drm_intel_gem_bo_unmap_gtt(obj_surface-bo);
  -}
  -
  +avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
   gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
 
   dri_bo_unreference(gen6_mfd_context-post_deblocking_output.bo);
  diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
  index cb85996..d2dbb69 100644
  --- a/src/gen75_mfd.c
  +++ b/src/gen75_mfd.c
  @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
   struct object_surface *obj_surface = decode_state-render_object;
   unsigned int y_cb_offset;
   unsigned int y_cr_offset;
  +unsigned int surface_format;
 
   assert(obj_surface);
 
   y_cb_offset = obj_surface-y_cb_offset;
   y_cr_offset = obj_surface-y_cr_offset;
 
  +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ?
  +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
  +
   BEGIN_BCS_BATCH(batch, 6);
   OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
   OUT_BCS_BATCH(batch, 0);
  @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
 ((obj_surface-orig_height - 1)  18) |