Re: [Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-15 Thread Gwenole Beauchesne
2014-05-15 10:25 GMT+02:00 Xiang, Haihao :
> On Wed, 2014-05-14 at 15:13 +0200, Gwenole Beauchesne wrote:
>> Optimize support for grayscale surfaces in two aspects: (i) space
>> by only allocating the luminance component ; (ii) speed by avoiding
>> initialization of the (now inexistent) chrominance planes.
>>
>> Keep backward compatibility with older codec layers that only
>> supported YUV 4:2:0 and not grayscale formats properly.
>>
>> Signed-off-by: Gwenole Beauchesne 
>> ---
>>  src/gen6_mfd.c   |8 ++--
>>  src/gen75_mfd.c  |6 +-
>>  src/gen7_mfd.c   |6 +-
>>  src/gen8_mfd.c   |6 +-
>>  src/i965_decoder_utils.c |   23 +++
>>  src/i965_drv_video.c |   22 ++
>>  src/i965_drv_video.h |9 +
>>  7 files changed, 71 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
>> index 2092f69..f925d98 100755
>> --- a/src/gen6_mfd.c
>> +++ b/src/gen6_mfd.c
>> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>>  {
>>  struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
>>  struct object_surface *obj_surface = decode_state->render_object;
>> -
>> +unsigned int surface_format;
>> +
>> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>  BEGIN_BCS_BATCH(batch, 6);
>>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>  OUT_BCS_BATCH(batch, 0);
>> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>>((obj_surface->orig_height - 1) << 19) |
>>((obj_surface->orig_width - 1) << 6));
>>  OUT_BCS_BATCH(batch,
>> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> surface */
>> +  (surface_format << 28) | /* 420 planar YUV surface */
>>(1 << 27) | /* must be 1 for interleave U/V, hardware 
>> requirement */
>>(0 << 22) | /* surface object control state, FIXME??? */
>>((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
>> index 5b023cf..895b194 100644
>> --- a/src/gen75_mfd.c
>> +++ b/src/gen75_mfd.c
>> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>>  struct object_surface *obj_surface = decode_state->render_object;
>>  unsigned int y_cb_offset;
>>  unsigned int y_cr_offset;
>> +unsigned int surface_format;
>>
>>  assert(obj_surface);
>>
>>  y_cb_offset = obj_surface->y_cb_offset;
>>  y_cr_offset = obj_surface->y_cr_offset;
>>
>> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>  BEGIN_BCS_BATCH(batch, 6);
>>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>  OUT_BCS_BATCH(batch, 0);
>> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>>((obj_surface->orig_height - 1) << 18) |
>>((obj_surface->orig_width - 1) << 4));
>>  OUT_BCS_BATCH(batch,
>> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> surface */
>> +  (surface_format << 28) | /* 420 planar YUV surface */
>>((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
>> interleave chroma, set to 0 for JPEG */
>>(0 << 22) | /* surface object control state, ignored */
>>((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
>> index 70b1cec..2e0d653 100755
>> --- a/src/gen7_mfd.c
>> +++ b/src/gen7_mfd.c
>> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>>  struct object_surface *obj_surface = decode_state->render_object;
>>  unsigned int y_cb_offset;
>>  unsigned int y_cr_offset;
>> +unsigned int surface_format;
>>
>>  assert(obj_surface);
>>
>>  y_cb_offset = obj_surface->y_cb_offset;
>>  y_cr_offset = obj_surface->y_cr_offset;
>>
>> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>  BEGIN_BCS_BATCH(batch, 6);
>>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>  OUT_BCS_BATCH(batch, 0);
>> @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>>((obj_surface->orig_height - 1) << 18) |
>>((obj_surface->orig_width - 1) << 4));
>>  OUT_BCS_BATCH(batch,
>> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> surface */
>> +  (surface_format << 28) | /* 420 planar YUV surface */
>>((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
>> interleave chroma, set to 0 for JPEG */
>>(0 << 22) | /* surface object control state, ignored */
>>((obj_surface->width - 1) << 3) 

Re: [Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-15 Thread Xiang, Haihao
On Wed, 2014-05-14 at 15:13 +0200, Gwenole Beauchesne wrote: 
> Optimize support for grayscale surfaces in two aspects: (i) space
> by only allocating the luminance component ; (ii) speed by avoiding
> initialization of the (now inexistent) chrominance planes.
> 
> Keep backward compatibility with older codec layers that only
> supported YUV 4:2:0 and not grayscale formats properly.
> 
> Signed-off-by: Gwenole Beauchesne 
> ---
>  src/gen6_mfd.c   |8 ++--
>  src/gen75_mfd.c  |6 +-
>  src/gen7_mfd.c   |6 +-
>  src/gen8_mfd.c   |6 +-
>  src/i965_decoder_utils.c |   23 +++
>  src/i965_drv_video.c |   22 ++
>  src/i965_drv_video.h |9 +
>  7 files changed, 71 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
> index 2092f69..f925d98 100755
> --- a/src/gen6_mfd.c
> +++ b/src/gen6_mfd.c
> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>  {
>  struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
>  struct object_surface *obj_surface = decode_state->render_object;
> -
> +unsigned int surface_format;
> +
> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +
>  BEGIN_BCS_BATCH(batch, 6);
>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>  OUT_BCS_BATCH(batch, 0);
> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>((obj_surface->orig_height - 1) << 19) |
>((obj_surface->orig_width - 1) << 6));
>  OUT_BCS_BATCH(batch,
> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
> surface */
> +  (surface_format << 28) | /* 420 planar YUV surface */
>(1 << 27) | /* must be 1 for interleave U/V, hardware 
> requirement */
>(0 << 22) | /* surface object control state, FIXME??? */
>((obj_surface->width - 1) << 3) | /* pitch */
> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
> index 5b023cf..895b194 100644
> --- a/src/gen75_mfd.c
> +++ b/src/gen75_mfd.c
> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>  struct object_surface *obj_surface = decode_state->render_object;
>  unsigned int y_cb_offset;
>  unsigned int y_cr_offset;
> +unsigned int surface_format;
>  
>  assert(obj_surface);
>  
>  y_cb_offset = obj_surface->y_cb_offset;
>  y_cr_offset = obj_surface->y_cr_offset;
>  
> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +
>  BEGIN_BCS_BATCH(batch, 6);
>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>  OUT_BCS_BATCH(batch, 0);
> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>((obj_surface->orig_height - 1) << 18) |
>((obj_surface->orig_width - 1) << 4));
>  OUT_BCS_BATCH(batch,
> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
> surface */
> +  (surface_format << 28) | /* 420 planar YUV surface */
>((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
> interleave chroma, set to 0 for JPEG */
>(0 << 22) | /* surface object control state, ignored */
>((obj_surface->width - 1) << 3) | /* pitch */
> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
> index 70b1cec..2e0d653 100755
> --- a/src/gen7_mfd.c
> +++ b/src/gen7_mfd.c
> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>  struct object_surface *obj_surface = decode_state->render_object;
>  unsigned int y_cb_offset;
>  unsigned int y_cr_offset;
> +unsigned int surface_format;
>  
>  assert(obj_surface);
>  
>  y_cb_offset = obj_surface->y_cb_offset;
>  y_cr_offset = obj_surface->y_cr_offset;
>  
> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +
>  BEGIN_BCS_BATCH(batch, 6);
>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>  OUT_BCS_BATCH(batch, 0);
> @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>((obj_surface->orig_height - 1) << 18) |
>((obj_surface->orig_width - 1) << 4));
>  OUT_BCS_BATCH(batch,
> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
> surface */
> +  (surface_format << 28) | /* 420 planar YUV surface */
>((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
> interleave chroma, set to 0 for JPEG */
>(0 << 22) | /* surface object control state, ignored */
>((obj_surface->width - 1) << 3) | /* pitch */
> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
> index e3e71fb..10495d8 100644
> --- a/src/gen8_mfd.c
> +++ b/src/gen8_mf

Re: [Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-15 Thread Gwenole Beauchesne
Hi,

2014-05-15 9:10 GMT+02:00 Xiang, Haihao :
> On Thu, 2014-05-15 at 14:26 +0800, Zhao, Yakui wrote:
>> On Thu, 2014-05-15 at 00:05 -0600, Gwenole Beauchesne wrote:
>> > Hi,
>> >
>> > 2014-05-15 7:24 GMT+02:00 Zhao, Yakui :
>> > > On Wed, 2014-05-14 at 22:28 -0600, Gwenole Beauchesne wrote:
>> > >> Hi,
>> > >>
>> > >> 2014-05-15 3:34 GMT+02:00 Zhao, Yakui :
>> > >> > On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
>> > >> >> Optimize support for grayscale surfaces in two aspects: (i) space
>> > >> >> by only allocating the luminance component ; (ii) speed by avoiding
>> > >> >> initialization of the (now inexistent) chrominance planes.
>> > >> >>
>> > >> >> Keep backward compatibility with older codec layers that only
>> > >> >> supported YUV 4:2:0 and not grayscale formats properly.
>> > >> >
>> > >> > As a whole, I am OK to this version patch except two concerns.
>> > >> >
>> > >> >>
>> > >> >> Signed-off-by: Gwenole Beauchesne 
>> > >> >> ---
>> > >> >>  src/gen6_mfd.c   |8 ++--
>> > >> >>  src/gen75_mfd.c  |6 +-
>> > >> >>  src/gen7_mfd.c   |6 +-
>> > >> >>  src/gen8_mfd.c   |6 +-
>> > >> >>  src/i965_decoder_utils.c |   23 +++
>> > >> >>  src/i965_drv_video.c |   22 ++
>> > >> >>  src/i965_drv_video.h |9 +
>> > >> >>  7 files changed, 71 insertions(+), 9 deletions(-)
>> > >> >>
>> > >> >> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
>> > >> >> index 2092f69..f925d98 100755
>> > >> >> --- a/src/gen6_mfd.c
>> > >> >> +++ b/src/gen6_mfd.c
>> > >> >> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>> > >> >>  {
>> > >> >>  struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
>> > >> >>  struct object_surface *obj_surface = 
>> > >> >> decode_state->render_object;
>> > >> >> -
>> > >> >> +unsigned int surface_format;
>> > >> >> +
>> > >> >> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> > >> >> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> > >> >> +
>> > >> >
>> > >> > Can it work if it still set the PLANAR_420_8 format for the Y800
>> > >> > surface?
>> > >>
>> > >> No, MONO is the only specified and supported format to tell the MFX
>> > >> engine to disregard the chroma components. It does not seem to care of
>> > >> the supplied chroma_format_idc field to AVC_IMG_STATE.
>> > >
>> > > The following is what I got from the spec.
>> > >   >For video codec, it should  set to 4 always
>> >
>> > You are implying that the specific programming notes/section in the
>> > PRM for grayscale support are irrelevant, which is incorrect. So,
>> > please submit a change request to get that removed if you think it is
>> > useless.
>> >
>> > Reality is some of the existing state descriptions are a cumulative
>> > changes from the oldiest generations and the generated doc was
>> > probably missing appropriate ifs. The prevailing info is the one that
>> > has a specific section for it. Consider that as an amendment.
>> >
>> > Besides, it should also be possible to decode color streams into
>> > grayscale for other use cases. I dont' have a personal need for it,
>> > though the doc suggests them.
>>
>> Maybe your understanding is also correct. But I can't find it in the
>> spec.
>> It will be better if you can point out where I can get it from the spec.
>>
>> >
>> > > In such case the the chroma_format_idc field will be configured in
>> > > AVC_IMAGE_STATE to assure that the chroma components of grayscale are
>> > > not touched in decoding.
>> >
>> > That's the theory. In practice, it seems Haihao had to explicitly
>> > initialize the chroma components to 0x80. There might be a reason.
>> > Avoiding the memset() is what I want to reduce init latencies, and it
>> > is possible to address that in one shot with the supplied patches. If
>> > you see other ways to achieve that, you are welcome to provide patches
>> > for that.
>>
>> The initialization of chroma components to 0x80 is only for the display
>> purpose. If it is not for the display purpose, the initialization of
>> chroma compnent can be skipped.
>
> The implementation of vaPutSurface() doesn't use the chroma components
> for grayscale surfaces, it is just a workaround for the error in some
> codec layer.

Please be more specific. Keeping a pile of workarounds, and yet
inefficient, is not a viable long-term solution.

Thanks,
Gwenole.

>
>
>>
>> >
>> > >> >>  BEGIN_BCS_BATCH(batch, 6);
>> > >> >>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>> > >> >>  OUT_BCS_BATCH(batch, 0);
>> > >> >> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>> > >> >>((obj_surface->orig_height - 1) << 19) |
>> > >> >>((obj_surface->orig_width - 1) << 6));
>> > >> >>  OUT_BCS_BATCH(batch,
>> > >> >> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar 
>> > >> >> YUV surface */
>> > >> >>

Re: [Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-15 Thread Xiang, Haihao
On Thu, 2014-05-15 at 14:26 +0800, Zhao, Yakui wrote: 
> On Thu, 2014-05-15 at 00:05 -0600, Gwenole Beauchesne wrote:
> > Hi,
> > 
> > 2014-05-15 7:24 GMT+02:00 Zhao, Yakui :
> > > On Wed, 2014-05-14 at 22:28 -0600, Gwenole Beauchesne wrote:
> > >> Hi,
> > >>
> > >> 2014-05-15 3:34 GMT+02:00 Zhao, Yakui :
> > >> > On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
> > >> >> Optimize support for grayscale surfaces in two aspects: (i) space
> > >> >> by only allocating the luminance component ; (ii) speed by avoiding
> > >> >> initialization of the (now inexistent) chrominance planes.
> > >> >>
> > >> >> Keep backward compatibility with older codec layers that only
> > >> >> supported YUV 4:2:0 and not grayscale formats properly.
> > >> >
> > >> > As a whole, I am OK to this version patch except two concerns.
> > >> >
> > >> >>
> > >> >> Signed-off-by: Gwenole Beauchesne 
> > >> >> ---
> > >> >>  src/gen6_mfd.c   |8 ++--
> > >> >>  src/gen75_mfd.c  |6 +-
> > >> >>  src/gen7_mfd.c   |6 +-
> > >> >>  src/gen8_mfd.c   |6 +-
> > >> >>  src/i965_decoder_utils.c |   23 +++
> > >> >>  src/i965_drv_video.c |   22 ++
> > >> >>  src/i965_drv_video.h |9 +
> > >> >>  7 files changed, 71 insertions(+), 9 deletions(-)
> > >> >>
> > >> >> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
> > >> >> index 2092f69..f925d98 100755
> > >> >> --- a/src/gen6_mfd.c
> > >> >> +++ b/src/gen6_mfd.c
> > >> >> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
> > >> >>  {
> > >> >>  struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
> > >> >>  struct object_surface *obj_surface = decode_state->render_object;
> > >> >> -
> > >> >> +unsigned int surface_format;
> > >> >> +
> > >> >> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> > >> >> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> > >> >> +
> > >> >
> > >> > Can it work if it still set the PLANAR_420_8 format for the Y800
> > >> > surface?
> > >>
> > >> No, MONO is the only specified and supported format to tell the MFX
> > >> engine to disregard the chroma components. It does not seem to care of
> > >> the supplied chroma_format_idc field to AVC_IMG_STATE.
> > >
> > > The following is what I got from the spec.
> > >   >For video codec, it should  set to 4 always
> > 
> > You are implying that the specific programming notes/section in the
> > PRM for grayscale support are irrelevant, which is incorrect. So,
> > please submit a change request to get that removed if you think it is
> > useless.
> > 
> > Reality is some of the existing state descriptions are a cumulative
> > changes from the oldiest generations and the generated doc was
> > probably missing appropriate ifs. The prevailing info is the one that
> > has a specific section for it. Consider that as an amendment.
> > 
> > Besides, it should also be possible to decode color streams into
> > grayscale for other use cases. I dont' have a personal need for it,
> > though the doc suggests them.
> 
> Maybe your understanding is also correct. But I can't find it in the
> spec.
> It will be better if you can point out where I can get it from the spec.
> 
> > 
> > > In such case the the chroma_format_idc field will be configured in
> > > AVC_IMAGE_STATE to assure that the chroma components of grayscale are
> > > not touched in decoding.
> > 
> > That's the theory. In practice, it seems Haihao had to explicitly
> > initialize the chroma components to 0x80. There might be a reason.
> > Avoiding the memset() is what I want to reduce init latencies, and it
> > is possible to address that in one shot with the supplied patches. If
> > you see other ways to achieve that, you are welcome to provide patches
> > for that.
> 
> The initialization of chroma components to 0x80 is only for the display
> purpose. If it is not for the display purpose, the initialization of
> chroma compnent can be skipped. 

The implementation of vaPutSurface() doesn't use the chroma components
for grayscale surfaces, it is just a workaround for the error in some
codec layer.


> 
> > 
> > >> >>  BEGIN_BCS_BATCH(batch, 6);
> > >> >>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
> > >> >>  OUT_BCS_BATCH(batch, 0);
> > >> >> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
> > >> >>((obj_surface->orig_height - 1) << 19) |
> > >> >>((obj_surface->orig_width - 1) << 6));
> > >> >>  OUT_BCS_BATCH(batch,
> > >> >> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar 
> > >> >> YUV surface */
> > >> >> +  (surface_format << 28) | /* 420 planar YUV surface 
> > >> >> */
> > >> >>(1 << 27) | /* must be 1 for interleave U/V, 
> > >> >> hardware requirement */
> > >> >>(0 << 22) | /* surface object control state, 
> > >> >> FIXME??? */

Re: [Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-14 Thread Zhao, Yakui
On Thu, 2014-05-15 at 00:05 -0600, Gwenole Beauchesne wrote:
> Hi,
> 
> 2014-05-15 7:24 GMT+02:00 Zhao, Yakui :
> > On Wed, 2014-05-14 at 22:28 -0600, Gwenole Beauchesne wrote:
> >> Hi,
> >>
> >> 2014-05-15 3:34 GMT+02:00 Zhao, Yakui :
> >> > On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
> >> >> Optimize support for grayscale surfaces in two aspects: (i) space
> >> >> by only allocating the luminance component ; (ii) speed by avoiding
> >> >> initialization of the (now inexistent) chrominance planes.
> >> >>
> >> >> Keep backward compatibility with older codec layers that only
> >> >> supported YUV 4:2:0 and not grayscale formats properly.
> >> >
> >> > As a whole, I am OK to this version patch except two concerns.
> >> >
> >> >>
> >> >> Signed-off-by: Gwenole Beauchesne 
> >> >> ---
> >> >>  src/gen6_mfd.c   |8 ++--
> >> >>  src/gen75_mfd.c  |6 +-
> >> >>  src/gen7_mfd.c   |6 +-
> >> >>  src/gen8_mfd.c   |6 +-
> >> >>  src/i965_decoder_utils.c |   23 +++
> >> >>  src/i965_drv_video.c |   22 ++
> >> >>  src/i965_drv_video.h |9 +
> >> >>  7 files changed, 71 insertions(+), 9 deletions(-)
> >> >>
> >> >> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
> >> >> index 2092f69..f925d98 100755
> >> >> --- a/src/gen6_mfd.c
> >> >> +++ b/src/gen6_mfd.c
> >> >> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
> >> >>  {
> >> >>  struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
> >> >>  struct object_surface *obj_surface = decode_state->render_object;
> >> >> -
> >> >> +unsigned int surface_format;
> >> >> +
> >> >> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> >> >> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> >> >> +
> >> >
> >> > Can it work if it still set the PLANAR_420_8 format for the Y800
> >> > surface?
> >>
> >> No, MONO is the only specified and supported format to tell the MFX
> >> engine to disregard the chroma components. It does not seem to care of
> >> the supplied chroma_format_idc field to AVC_IMG_STATE.
> >
> > The following is what I got from the spec.
> >   >For video codec, it should  set to 4 always
> 
> You are implying that the specific programming notes/section in the
> PRM for grayscale support are irrelevant, which is incorrect. So,
> please submit a change request to get that removed if you think it is
> useless.
> 
> Reality is some of the existing state descriptions are a cumulative
> changes from the oldiest generations and the generated doc was
> probably missing appropriate ifs. The prevailing info is the one that
> has a specific section for it. Consider that as an amendment.
> 
> Besides, it should also be possible to decode color streams into
> grayscale for other use cases. I dont' have a personal need for it,
> though the doc suggests them.

Maybe your understanding is also correct. But I can't find it in the
spec.
It will be better if you can point out where I can get it from the spec.

> 
> > In such case the the chroma_format_idc field will be configured in
> > AVC_IMAGE_STATE to assure that the chroma components of grayscale are
> > not touched in decoding.
> 
> That's the theory. In practice, it seems Haihao had to explicitly
> initialize the chroma components to 0x80. There might be a reason.
> Avoiding the memset() is what I want to reduce init latencies, and it
> is possible to address that in one shot with the supplied patches. If
> you see other ways to achieve that, you are welcome to provide patches
> for that.

The initialization of chroma components to 0x80 is only for the display
purpose. If it is not for the display purpose, the initialization of
chroma compnent can be skipped. 


> 
> >> >>  BEGIN_BCS_BATCH(batch, 6);
> >> >>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
> >> >>  OUT_BCS_BATCH(batch, 0);
> >> >> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
> >> >>((obj_surface->orig_height - 1) << 19) |
> >> >>((obj_surface->orig_width - 1) << 6));
> >> >>  OUT_BCS_BATCH(batch,
> >> >> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
> >> >> surface */
> >> >> +  (surface_format << 28) | /* 420 planar YUV surface */
> >> >>(1 << 27) | /* must be 1 for interleave U/V, 
> >> >> hardware requirement */
> >> >>(0 << 22) | /* surface object control state, 
> >> >> FIXME??? */
> >> >>((obj_surface->width - 1) << 3) | /* pitch */
> >> >> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
> >> >> index 5b023cf..895b194 100644
> >> >> --- a/src/gen75_mfd.c
> >> >> +++ b/src/gen75_mfd.c
> >> >> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
> >> >>  struct object_surface *obj_surface = decode_state->render_object;
> >> >>  unsigned int y_cb_offset;
> >> >>  

Re: [Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-14 Thread Gwenole Beauchesne
Hi,

2014-05-15 7:24 GMT+02:00 Zhao, Yakui :
> On Wed, 2014-05-14 at 22:28 -0600, Gwenole Beauchesne wrote:
>> Hi,
>>
>> 2014-05-15 3:34 GMT+02:00 Zhao, Yakui :
>> > On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
>> >> Optimize support for grayscale surfaces in two aspects: (i) space
>> >> by only allocating the luminance component ; (ii) speed by avoiding
>> >> initialization of the (now inexistent) chrominance planes.
>> >>
>> >> Keep backward compatibility with older codec layers that only
>> >> supported YUV 4:2:0 and not grayscale formats properly.
>> >
>> > As a whole, I am OK to this version patch except two concerns.
>> >
>> >>
>> >> Signed-off-by: Gwenole Beauchesne 
>> >> ---
>> >>  src/gen6_mfd.c   |8 ++--
>> >>  src/gen75_mfd.c  |6 +-
>> >>  src/gen7_mfd.c   |6 +-
>> >>  src/gen8_mfd.c   |6 +-
>> >>  src/i965_decoder_utils.c |   23 +++
>> >>  src/i965_drv_video.c |   22 ++
>> >>  src/i965_drv_video.h |9 +
>> >>  7 files changed, 71 insertions(+), 9 deletions(-)
>> >>
>> >> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
>> >> index 2092f69..f925d98 100755
>> >> --- a/src/gen6_mfd.c
>> >> +++ b/src/gen6_mfd.c
>> >> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>> >>  {
>> >>  struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
>> >>  struct object_surface *obj_surface = decode_state->render_object;
>> >> -
>> >> +unsigned int surface_format;
>> >> +
>> >> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> >> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> >> +
>> >
>> > Can it work if it still set the PLANAR_420_8 format for the Y800
>> > surface?
>>
>> No, MONO is the only specified and supported format to tell the MFX
>> engine to disregard the chroma components. It does not seem to care of
>> the supplied chroma_format_idc field to AVC_IMG_STATE.
>
> The following is what I got from the spec.
>   >For video codec, it should  set to 4 always

You are implying that the specific programming notes/section in the
PRM for grayscale support are irrelevant, which is incorrect. So,
please submit a change request to get that removed if you think it is
useless.

Reality is some of the existing state descriptions are a cumulative
changes from the oldiest generations and the generated doc was
probably missing appropriate ifs. The prevailing info is the one that
has a specific section for it. Consider that as an amendment.

Besides, it should also be possible to decode color streams into
grayscale for other use cases. I dont' have a personal need for it,
though the doc suggests them.

> In such case the the chroma_format_idc field will be configured in
> AVC_IMAGE_STATE to assure that the chroma components of grayscale are
> not touched in decoding.

That's the theory. In practice, it seems Haihao had to explicitly
initialize the chroma components to 0x80. There might be a reason.
Avoiding the memset() is what I want to reduce init latencies, and it
is possible to address that in one shot with the supplied patches. If
you see other ways to achieve that, you are welcome to provide patches
for that.

>> >>  BEGIN_BCS_BATCH(batch, 6);
>> >>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>> >>  OUT_BCS_BATCH(batch, 0);
>> >> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>> >>((obj_surface->orig_height - 1) << 19) |
>> >>((obj_surface->orig_width - 1) << 6));
>> >>  OUT_BCS_BATCH(batch,
>> >> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> >> surface */
>> >> +  (surface_format << 28) | /* 420 planar YUV surface */
>> >>(1 << 27) | /* must be 1 for interleave U/V, hardware 
>> >> requirement */
>> >>(0 << 22) | /* surface object control state, FIXME??? 
>> >> */
>> >>((obj_surface->width - 1) << 3) | /* pitch */
>> >> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
>> >> index 5b023cf..895b194 100644
>> >> --- a/src/gen75_mfd.c
>> >> +++ b/src/gen75_mfd.c
>> >> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>> >>  struct object_surface *obj_surface = decode_state->render_object;
>> >>  unsigned int y_cb_offset;
>> >>  unsigned int y_cr_offset;
>> >> +unsigned int surface_format;
>> >>
>> >>  assert(obj_surface);
>> >>
>> >>  y_cb_offset = obj_surface->y_cb_offset;
>> >>  y_cr_offset = obj_surface->y_cr_offset;
>> >>
>> >> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> >> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> >> +
>> >>  BEGIN_BCS_BATCH(batch, 6);
>> >>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>> >>  OUT_BCS_BATCH(batch, 0);
>> >> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>> >>   

Re: [Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-14 Thread Zhao, Yakui
On Wed, 2014-05-14 at 22:28 -0600, Gwenole Beauchesne wrote:
> Hi,
> 
> 2014-05-15 3:34 GMT+02:00 Zhao, Yakui :
> > On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
> >> Optimize support for grayscale surfaces in two aspects: (i) space
> >> by only allocating the luminance component ; (ii) speed by avoiding
> >> initialization of the (now inexistent) chrominance planes.
> >>
> >> Keep backward compatibility with older codec layers that only
> >> supported YUV 4:2:0 and not grayscale formats properly.
> >
> > As a whole, I am OK to this version patch except two concerns.
> >
> >>
> >> Signed-off-by: Gwenole Beauchesne 
> >> ---
> >>  src/gen6_mfd.c   |8 ++--
> >>  src/gen75_mfd.c  |6 +-
> >>  src/gen7_mfd.c   |6 +-
> >>  src/gen8_mfd.c   |6 +-
> >>  src/i965_decoder_utils.c |   23 +++
> >>  src/i965_drv_video.c |   22 ++
> >>  src/i965_drv_video.h |9 +
> >>  7 files changed, 71 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
> >> index 2092f69..f925d98 100755
> >> --- a/src/gen6_mfd.c
> >> +++ b/src/gen6_mfd.c
> >> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
> >>  {
> >>  struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
> >>  struct object_surface *obj_surface = decode_state->render_object;
> >> -
> >> +unsigned int surface_format;
> >> +
> >> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> >> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> >> +
> >
> > Can it work if it still set the PLANAR_420_8 format for the Y800
> > surface?
> 
> No, MONO is the only specified and supported format to tell the MFX
> engine to disregard the chroma components. It does not seem to care of
> the supplied chroma_format_idc field to AVC_IMG_STATE.

The following is what I got from the spec.
  >For video codec, it should  set to 4 always

In such case the the chroma_format_idc field will be configured in
AVC_IMAGE_STATE to assure that the chroma components of grayscale are
not touched in decoding. So the 420_8 format is still appropriate for
the grayscale surface.

> 
> >>  BEGIN_BCS_BATCH(batch, 6);
> >>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
> >>  OUT_BCS_BATCH(batch, 0);
> >> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
> >>((obj_surface->orig_height - 1) << 19) |
> >>((obj_surface->orig_width - 1) << 6));
> >>  OUT_BCS_BATCH(batch,
> >> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
> >> surface */
> >> +  (surface_format << 28) | /* 420 planar YUV surface */
> >>(1 << 27) | /* must be 1 for interleave U/V, hardware 
> >> requirement */
> >>(0 << 22) | /* surface object control state, FIXME??? */
> >>((obj_surface->width - 1) << 3) | /* pitch */
> >> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
> >> index 5b023cf..895b194 100644
> >> --- a/src/gen75_mfd.c
> >> +++ b/src/gen75_mfd.c
> >> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
> >>  struct object_surface *obj_surface = decode_state->render_object;
> >>  unsigned int y_cb_offset;
> >>  unsigned int y_cr_offset;
> >> +unsigned int surface_format;
> >>
> >>  assert(obj_surface);
> >>
> >>  y_cb_offset = obj_surface->y_cb_offset;
> >>  y_cr_offset = obj_surface->y_cr_offset;
> >>
> >> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> >> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> >> +
> >>  BEGIN_BCS_BATCH(batch, 6);
> >>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
> >>  OUT_BCS_BATCH(batch, 0);
> >> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
> >>((obj_surface->orig_height - 1) << 18) |
> >>((obj_surface->orig_width - 1) << 4));
> >>  OUT_BCS_BATCH(batch,
> >> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
> >> surface */
> >> +  (surface_format << 28) | /* 420 planar YUV surface */
> >>((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
> >> interleave chroma, set to 0 for JPEG */
> >>(0 << 22) | /* surface object control state, ignored */
> >>((obj_surface->width - 1) << 3) | /* pitch */
> >> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
> >> index 70b1cec..2e0d653 100755
> >> --- a/src/gen7_mfd.c
> >> +++ b/src/gen7_mfd.c
> >> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
> >>  struct object_surface *obj_surface = decode_state->render_object;
> >>  unsigned int y_cb_offset;
> >>  unsigned int y_cr_offset;
> >> +unsigned int surface_format;
> >>
> >>  assert(obj_surface);
> >>
> >>  y_cb_offset = obj_surface->y_cb_offset;
> >

Re: [Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-14 Thread Gwenole Beauchesne
Hi,

2014-05-15 3:34 GMT+02:00 Zhao, Yakui :
> On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
>> Optimize support for grayscale surfaces in two aspects: (i) space
>> by only allocating the luminance component ; (ii) speed by avoiding
>> initialization of the (now inexistent) chrominance planes.
>>
>> Keep backward compatibility with older codec layers that only
>> supported YUV 4:2:0 and not grayscale formats properly.
>
> As a whole, I am OK to this version patch except two concerns.
>
>>
>> Signed-off-by: Gwenole Beauchesne 
>> ---
>>  src/gen6_mfd.c   |8 ++--
>>  src/gen75_mfd.c  |6 +-
>>  src/gen7_mfd.c   |6 +-
>>  src/gen8_mfd.c   |6 +-
>>  src/i965_decoder_utils.c |   23 +++
>>  src/i965_drv_video.c |   22 ++
>>  src/i965_drv_video.h |9 +
>>  7 files changed, 71 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
>> index 2092f69..f925d98 100755
>> --- a/src/gen6_mfd.c
>> +++ b/src/gen6_mfd.c
>> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>>  {
>>  struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
>>  struct object_surface *obj_surface = decode_state->render_object;
>> -
>> +unsigned int surface_format;
>> +
>> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>
> Can it work if it still set the PLANAR_420_8 format for the Y800
> surface?

No, MONO is the only specified and supported format to tell the MFX
engine to disregard the chroma components. It does not seem to care of
the supplied chroma_format_idc field to AVC_IMG_STATE.

>>  BEGIN_BCS_BATCH(batch, 6);
>>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>  OUT_BCS_BATCH(batch, 0);
>> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>>((obj_surface->orig_height - 1) << 19) |
>>((obj_surface->orig_width - 1) << 6));
>>  OUT_BCS_BATCH(batch,
>> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> surface */
>> +  (surface_format << 28) | /* 420 planar YUV surface */
>>(1 << 27) | /* must be 1 for interleave U/V, hardware 
>> requirement */
>>(0 << 22) | /* surface object control state, FIXME??? */
>>((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
>> index 5b023cf..895b194 100644
>> --- a/src/gen75_mfd.c
>> +++ b/src/gen75_mfd.c
>> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>>  struct object_surface *obj_surface = decode_state->render_object;
>>  unsigned int y_cb_offset;
>>  unsigned int y_cr_offset;
>> +unsigned int surface_format;
>>
>>  assert(obj_surface);
>>
>>  y_cb_offset = obj_surface->y_cb_offset;
>>  y_cr_offset = obj_surface->y_cr_offset;
>>
>> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>  BEGIN_BCS_BATCH(batch, 6);
>>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>  OUT_BCS_BATCH(batch, 0);
>> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>>((obj_surface->orig_height - 1) << 18) |
>>((obj_surface->orig_width - 1) << 4));
>>  OUT_BCS_BATCH(batch,
>> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> surface */
>> +  (surface_format << 28) | /* 420 planar YUV surface */
>>((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
>> interleave chroma, set to 0 for JPEG */
>>(0 << 22) | /* surface object control state, ignored */
>>((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
>> index 70b1cec..2e0d653 100755
>> --- a/src/gen7_mfd.c
>> +++ b/src/gen7_mfd.c
>> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>>  struct object_surface *obj_surface = decode_state->render_object;
>>  unsigned int y_cb_offset;
>>  unsigned int y_cr_offset;
>> +unsigned int surface_format;
>>
>>  assert(obj_surface);
>>
>>  y_cb_offset = obj_surface->y_cb_offset;
>>  y_cr_offset = obj_surface->y_cr_offset;
>>
>> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>  BEGIN_BCS_BATCH(batch, 6);
>>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>  OUT_BCS_BATCH(batch, 0);
>> @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>>((obj_surface->orig_height - 1) << 18) |
>>((obj_surface->orig_width - 1) << 4));
>>  OUT_BCS_BATCH(batch,
>> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 pla

Re: [Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-14 Thread Zhao, Yakui
On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
> Optimize support for grayscale surfaces in two aspects: (i) space
> by only allocating the luminance component ; (ii) speed by avoiding
> initialization of the (now inexistent) chrominance planes.
> 
> Keep backward compatibility with older codec layers that only
> supported YUV 4:2:0 and not grayscale formats properly.

As a whole, I am OK to this version patch except two concerns.

> 
> Signed-off-by: Gwenole Beauchesne 
> ---
>  src/gen6_mfd.c   |8 ++--
>  src/gen75_mfd.c  |6 +-
>  src/gen7_mfd.c   |6 +-
>  src/gen8_mfd.c   |6 +-
>  src/i965_decoder_utils.c |   23 +++
>  src/i965_drv_video.c |   22 ++
>  src/i965_drv_video.h |9 +
>  7 files changed, 71 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
> index 2092f69..f925d98 100755
> --- a/src/gen6_mfd.c
> +++ b/src/gen6_mfd.c
> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>  {
>  struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
>  struct object_surface *obj_surface = decode_state->render_object;
> -
> +unsigned int surface_format;
> +
> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +

Can it work if it still set the PLANAR_420_8 format for the Y800
surface?
If yes, I suggest that this is not updated as the original code follows
the spec. 

>  BEGIN_BCS_BATCH(batch, 6);
>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>  OUT_BCS_BATCH(batch, 0);
> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>((obj_surface->orig_height - 1) << 19) |
>((obj_surface->orig_width - 1) << 6));
>  OUT_BCS_BATCH(batch,
> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
> surface */
> +  (surface_format << 28) | /* 420 planar YUV surface */
>(1 << 27) | /* must be 1 for interleave U/V, hardware 
> requirement */
>(0 << 22) | /* surface object control state, FIXME??? */
>((obj_surface->width - 1) << 3) | /* pitch */
> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
> index 5b023cf..895b194 100644
> --- a/src/gen75_mfd.c
> +++ b/src/gen75_mfd.c
> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>  struct object_surface *obj_surface = decode_state->render_object;
>  unsigned int y_cb_offset;
>  unsigned int y_cr_offset;
> +unsigned int surface_format;
>  
>  assert(obj_surface);
>  
>  y_cb_offset = obj_surface->y_cb_offset;
>  y_cr_offset = obj_surface->y_cr_offset;
>  
> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +
>  BEGIN_BCS_BATCH(batch, 6);
>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>  OUT_BCS_BATCH(batch, 0);
> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>((obj_surface->orig_height - 1) << 18) |
>((obj_surface->orig_width - 1) << 4));
>  OUT_BCS_BATCH(batch,
> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
> surface */
> +  (surface_format << 28) | /* 420 planar YUV surface */
>((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
> interleave chroma, set to 0 for JPEG */
>(0 << 22) | /* surface object control state, ignored */
>((obj_surface->width - 1) << 3) | /* pitch */
> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
> index 70b1cec..2e0d653 100755
> --- a/src/gen7_mfd.c
> +++ b/src/gen7_mfd.c
> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>  struct object_surface *obj_surface = decode_state->render_object;
>  unsigned int y_cb_offset;
>  unsigned int y_cr_offset;
> +unsigned int surface_format;
>  
>  assert(obj_surface);
>  
>  y_cb_offset = obj_surface->y_cb_offset;
>  y_cr_offset = obj_surface->y_cr_offset;
>  
> +surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +
>  BEGIN_BCS_BATCH(batch, 6);
>  OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>  OUT_BCS_BATCH(batch, 0);
> @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>((obj_surface->orig_height - 1) << 18) |
>((obj_surface->orig_width - 1) << 4));
>  OUT_BCS_BATCH(batch,
> -  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
> surface */
> +  (surface_format << 28) | /* 420 planar YUV surface */
>((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
> interleave chroma, set to 0 for JPEG */
>(0 << 22) | /* surfac

[Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

2014-05-14 Thread Gwenole Beauchesne
Optimize support for grayscale surfaces in two aspects: (i) space
by only allocating the luminance component ; (ii) speed by avoiding
initialization of the (now inexistent) chrominance planes.

Keep backward compatibility with older codec layers that only
supported YUV 4:2:0 and not grayscale formats properly.

Signed-off-by: Gwenole Beauchesne 
---
 src/gen6_mfd.c   |8 ++--
 src/gen75_mfd.c  |6 +-
 src/gen7_mfd.c   |6 +-
 src/gen8_mfd.c   |6 +-
 src/i965_decoder_utils.c |   23 +++
 src/i965_drv_video.c |   22 ++
 src/i965_drv_video.h |9 +
 7 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
index 2092f69..f925d98 100755
--- a/src/gen6_mfd.c
+++ b/src/gen6_mfd.c
@@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
 {
 struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
 struct object_surface *obj_surface = decode_state->render_object;
-
+unsigned int surface_format;
+
+surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
 BEGIN_BCS_BATCH(batch, 6);
 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
 OUT_BCS_BATCH(batch, 0);
@@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
   ((obj_surface->orig_height - 1) << 19) |
   ((obj_surface->orig_width - 1) << 6));
 OUT_BCS_BATCH(batch,
-  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface 
*/
+  (surface_format << 28) | /* 420 planar YUV surface */
   (1 << 27) | /* must be 1 for interleave U/V, hardware 
requirement */
   (0 << 22) | /* surface object control state, FIXME??? */
   ((obj_surface->width - 1) << 3) | /* pitch */
diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
index 5b023cf..895b194 100644
--- a/src/gen75_mfd.c
+++ b/src/gen75_mfd.c
@@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
 struct object_surface *obj_surface = decode_state->render_object;
 unsigned int y_cb_offset;
 unsigned int y_cr_offset;
+unsigned int surface_format;
 
 assert(obj_surface);
 
 y_cb_offset = obj_surface->y_cb_offset;
 y_cr_offset = obj_surface->y_cr_offset;
 
+surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
 BEGIN_BCS_BATCH(batch, 6);
 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
 OUT_BCS_BATCH(batch, 0);
@@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
   ((obj_surface->orig_height - 1) << 18) |
   ((obj_surface->orig_width - 1) << 4));
 OUT_BCS_BATCH(batch,
-  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface 
*/
+  (surface_format << 28) | /* 420 planar YUV surface */
   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave 
chroma, set to 0 for JPEG */
   (0 << 22) | /* surface object control state, ignored */
   ((obj_surface->width - 1) << 3) | /* pitch */
diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
index 70b1cec..2e0d653 100755
--- a/src/gen7_mfd.c
+++ b/src/gen7_mfd.c
@@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
 struct object_surface *obj_surface = decode_state->render_object;
 unsigned int y_cb_offset;
 unsigned int y_cr_offset;
+unsigned int surface_format;
 
 assert(obj_surface);
 
 y_cb_offset = obj_surface->y_cb_offset;
 y_cr_offset = obj_surface->y_cr_offset;
 
+surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
 BEGIN_BCS_BATCH(batch, 6);
 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
 OUT_BCS_BATCH(batch, 0);
@@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
   ((obj_surface->orig_height - 1) << 18) |
   ((obj_surface->orig_width - 1) << 4));
 OUT_BCS_BATCH(batch,
-  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface 
*/
+  (surface_format << 28) | /* 420 planar YUV surface */
   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave 
chroma, set to 0 for JPEG */
   (0 << 22) | /* surface object control state, ignored */
   ((obj_surface->width - 1) << 3) | /* pitch */
diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
index e3e71fb..10495d8 100644
--- a/src/gen8_mfd.c
+++ b/src/gen8_mfd.c
@@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx,
 struct object_surface *obj_surface = decode_state->render_object;
 unsigned int y_cb_offset;
 unsigned int y_cr_offset;
+unsigned int surface_format;
 
 assert(obj_surface);
 
 y_cb_offset = obj