Hi,

2014-05-15 7:24 GMT+02:00 Zhao, Yakui <yakui.z...@intel.com>:
> On Wed, 2014-05-14 at 22:28 -0600, Gwenole Beauchesne wrote:
>> Hi,
>>
>> 2014-05-15 3:34 GMT+02:00 Zhao, Yakui <yakui.z...@intel.com>:
>> > On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
>> >> Optimize support for grayscale surfaces in two aspects: (i) space
>> >> by only allocating the luminance component ; (ii) speed by avoiding
>> >> initialization of the (now inexistent) chrominance planes.
>> >>
>> >> Keep backward compatibility with older codec layers that only
>> >> supported YUV 4:2:0 and not grayscale formats properly.
>> >
>> > As a whole, I am OK to this version patch except two concerns.
>> >
>> >>
>> >> Signed-off-by: Gwenole Beauchesne <gwenole.beauche...@intel.com>
>> >> ---
>> >>  src/gen6_mfd.c           |    8 ++++++--
>> >>  src/gen75_mfd.c          |    6 +++++-
>> >>  src/gen7_mfd.c           |    6 +++++-
>> >>  src/gen8_mfd.c           |    6 +++++-
>> >>  src/i965_decoder_utils.c |   23 +++++++++++++++++++----
>> >>  src/i965_drv_video.c     |   22 ++++++++++++++++++++++
>> >>  src/i965_drv_video.h     |    9 +++++++++
>> >>  7 files changed, 71 insertions(+), 9 deletions(-)
>> >>
>> >> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
>> >> index 2092f69..f925d98 100755
>> >> --- a/src/gen6_mfd.c
>> >> +++ b/src/gen6_mfd.c
>> >> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>> >>  {
>> >>      struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
>> >>      struct object_surface *obj_surface = decode_state->render_object;
>> >> -
>> >> +    unsigned int surface_format;
>> >> +
>> >> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> >> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> >> +
>> >
>> > Can it work if it still set the PLANAR_420_8 format for the Y800
>> > surface?
>>
>> No, MONO is the only specified and supported format to tell the MFX
>> engine to disregard the chroma components. It does not seem to care of
>> the supplied chroma_format_idc field to AVC_IMG_STATE.
>
> The following is what I got from the spec.
>   >For video codec, it should  set to 4 always

You are implying that the specific programming notes/section in the
PRM for grayscale support are irrelevant, which is incorrect. So,
please submit a change request to get that removed if you think it is
useless.

Reality is some of the existing state descriptions are a cumulative
changes from the oldiest generations and the generated doc was
probably missing appropriate ifs. The prevailing info is the one that
has a specific section for it. Consider that as an amendment.

Besides, it should also be possible to decode color streams into
grayscale for other use cases. I dont' have a personal need for it,
though the doc suggests them.

> In such case the the chroma_format_idc field will be configured in
> AVC_IMAGE_STATE to assure that the chroma components of grayscale are
> not touched in decoding.

That's the theory. In practice, it seems Haihao had to explicitly
initialize the chroma components to 0x80. There might be a reason.
Avoiding the memset() is what I want to reduce init latencies, and it
is possible to address that in one shot with the supplied patches. If
you see other ways to achieve that, you are welcome to provide patches
for that.

>> >>      BEGIN_BCS_BATCH(batch, 6);
>> >>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>> >>      OUT_BCS_BATCH(batch, 0);
>> >> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>> >>                    ((obj_surface->orig_height - 1) << 19) |
>> >>                    ((obj_surface->orig_width - 1) << 6));
>> >>      OUT_BCS_BATCH(batch,
>> >> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> >> surface */
>> >> +                  (surface_format << 28) | /* 420 planar YUV surface */
>> >>                    (1 << 27) | /* must be 1 for interleave U/V, hardware 
>> >> requirement */
>> >>                    (0 << 22) | /* surface object control state, FIXME??? 
>> >> */
>> >>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> >> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
>> >> index 5b023cf..895b194 100644
>> >> --- a/src/gen75_mfd.c
>> >> +++ b/src/gen75_mfd.c
>> >> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>> >>      struct object_surface *obj_surface = decode_state->render_object;
>> >>      unsigned int y_cb_offset;
>> >>      unsigned int y_cr_offset;
>> >> +    unsigned int surface_format;
>> >>
>> >>      assert(obj_surface);
>> >>
>> >>      y_cb_offset = obj_surface->y_cb_offset;
>> >>      y_cr_offset = obj_surface->y_cr_offset;
>> >>
>> >> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> >> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> >> +
>> >>      BEGIN_BCS_BATCH(batch, 6);
>> >>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>> >>      OUT_BCS_BATCH(batch, 0);
>> >> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>> >>                    ((obj_surface->orig_height - 1) << 18) |
>> >>                    ((obj_surface->orig_width - 1) << 4));
>> >>      OUT_BCS_BATCH(batch,
>> >> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> >> surface */
>> >> +                  (surface_format << 28) | /* 420 planar YUV surface */
>> >>                    ((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
>> >> interleave chroma, set to 0 for JPEG */
>> >>                    (0 << 22) | /* surface object control state, ignored */
>> >>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> >> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
>> >> index 70b1cec..2e0d653 100755
>> >> --- a/src/gen7_mfd.c
>> >> +++ b/src/gen7_mfd.c
>> >> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>> >>      struct object_surface *obj_surface = decode_state->render_object;
>> >>      unsigned int y_cb_offset;
>> >>      unsigned int y_cr_offset;
>> >> +    unsigned int surface_format;
>> >>
>> >>      assert(obj_surface);
>> >>
>> >>      y_cb_offset = obj_surface->y_cb_offset;
>> >>      y_cr_offset = obj_surface->y_cr_offset;
>> >>
>> >> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> >> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> >> +
>> >>      BEGIN_BCS_BATCH(batch, 6);
>> >>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>> >>      OUT_BCS_BATCH(batch, 0);
>> >> @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>> >>                    ((obj_surface->orig_height - 1) << 18) |
>> >>                    ((obj_surface->orig_width - 1) << 4));
>> >>      OUT_BCS_BATCH(batch,
>> >> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> >> surface */
>> >> +                  (surface_format << 28) | /* 420 planar YUV surface */
>> >>                    ((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
>> >> interleave chroma, set to 0 for JPEG */
>> >>                    (0 << 22) | /* surface object control state, ignored */
>> >>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> >> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
>> >> index e3e71fb..10495d8 100644
>> >> --- a/src/gen8_mfd.c
>> >> +++ b/src/gen8_mfd.c
>> >> @@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx,
>> >>      struct object_surface *obj_surface = decode_state->render_object;
>> >>      unsigned int y_cb_offset;
>> >>      unsigned int y_cr_offset;
>> >> +    unsigned int surface_format;
>> >>
>> >>      assert(obj_surface);
>> >>
>> >>      y_cb_offset = obj_surface->y_cb_offset;
>> >>      y_cr_offset = obj_surface->y_cr_offset;
>> >>
>> >> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> >> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> >> +
>> >>      BEGIN_BCS_BATCH(batch, 6);
>> >>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>> >>      OUT_BCS_BATCH(batch, 0);
>> >> @@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx,
>> >>                    ((obj_surface->orig_height - 1) << 18) |
>> >>                    ((obj_surface->orig_width - 1) << 4));
>> >>      OUT_BCS_BATCH(batch,
>> >> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV 
>> >> surface */
>> >> +                  (surface_format << 28) | /* 420 planar YUV surface */
>> >>                    ((standard_select != MFX_FORMAT_JPEG) << 27) | /* 
>> >> interleave chroma, set to 0 for JPEG */
>> >>                    (0 << 22) | /* surface object control state, ignored */
>> >>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> >> diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
>> >> index 6cec08b..ae01d13 100644
>> >> --- a/src/i965_decoder_utils.c
>> >> +++ b/src/i965_decoder_utils.c
>> >> @@ -185,25 +185,40 @@ avc_ensure_surface_bo(
>> >>  )
>> >>  {
>> >>      VAStatus va_status;
>> >> -    uint32_t hw_fourcc, fourcc, subsample;
>> >> +    uint32_t hw_fourcc, fourcc, subsample, chroma_format;
>> >>
>> >>      /* Validate chroma format */
>> >>      switch (pic_param->seq_fields.bits.chroma_format_idc) {
>> >>      case 0: // Grayscale
>> >>          fourcc = VA_FOURCC_Y800;
>> >>          subsample = SUBSAMPLE_YUV400;
>> >> +        chroma_format = VA_RT_FORMAT_YUV400;
>> >>          break;
>> >>      case 1: // YUV 4:2:0
>> >>          fourcc = VA_FOURCC_NV12;
>> >>          subsample = SUBSAMPLE_YUV420;
>> >> +        chroma_format = VA_RT_FORMAT_YUV420;
>> >>          break;
>> >>      default:
>> >>          return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
>> >>      }
>> >>
>> >
>> > When calling the vaCreateSurfaces to create the surface for the decoding
>> > purpose, the chroma_format will also be passed. In such case can we use
>> > the passed chroma_format for the following operation instead of the
>> > chroma_format derived in the driver?
>> > That is to say: if the YUV420 is passed, we still allocate t>
> Thanks.
>     Yakui
>>
he NV12
>> > surface. If the YUV400 is passed, we can allocate the Y800 surface for
>> > the decoding.
>> >
>> > How do you think?
>>
>> We used to disregard the user specified format, and defer allocation
>> until it is actually needed so that to cope with possible misguided
>> parameters. If we want to trust user provided formats now, we'd need
>> to review and provide additional patches to cover robustness needs in
>> other areas not limited to H.264 decoding. This is beyond the scope of
>> this patch series.
>
> Understand your concern. (Maybe it is redundant/complex to use the
> format in this scenario).
>
> Can we simplify it as the following and avoid the derivation in driver?
>    >If the Y800 fourcc is already passed to create the surfaces when
> calling vaCreateSurfaces, the allocation is not deferred and it still
> uses the Y800 for the following operation.
>    >If the allocation of the surface is deferred, it will use the NV12
> surface for the followeing operation.

If the actual codec parameters tell otherwise, you would have to
re-allocate the surface anyway. So, at the end of the day, this is not
really a simplification since you'd have to keep the same code, and
additionally try to allocate buffer storage buffer hand. No added
value, IMHO.

There are indeed desired changes in the VA surface allocation process
too, but again, this is beyond the scope of this patch series, which
fully aligns with the existing semantics.

Regards,
Gwenole.

>> >> -    /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */
>> >> -    hw_fourcc = VA_FOURCC_NV12;
>> >> -    subsample = SUBSAMPLE_YUV420;
>> >> +    /* Determine the HW surface format, bound to VA config needs */
>> >> +    if ((decode_state->base.chroma_formats & chroma_format) == 
>> >> chroma_format)
>> >> +        hw_fourcc = fourcc;
>> >> +    else {
>> >> +        hw_fourcc = 0;
>> >> +        switch (fourcc) {
>> >> +        case VA_FOURCC_Y800: // Implement with an NV12 surface
>> >> +            if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) 
>> >> {
>> >> +                hw_fourcc = VA_FOURCC_NV12;
>> >> +                subsample = SUBSAMPLE_YUV420;
>> >> +            }
>> >> +            break;
>> >> +        }
>> >> +    }
>> >> +    if (!hw_fourcc)
>> >> +        return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
>> >>
>> >>      /* (Re-)allocate the underlying surface buffer store, if necessary */
>> >>      if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) {
>> >> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
>> >> index 768469a..686f4e3 100755
>> >> --- a/src/i965_drv_video.c
>> >> +++ b/src/i965_drv_video.c
>> >> @@ -214,6 +214,10 @@ get_subpic_format(const VAImageFormat *va_format)
>> >>      return NULL;
>> >>  }
>> >>
>> >> +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 
>> >> 4:2:0) */
>> >> +#define EXTRA_H264_DEC_CHROMA_FORMATS \
>> >> +    (VA_RT_FORMAT_YUV400)
>> >> +
>> >>  /* Extra set of chroma formats supported for JPEG decoding (beyond YUV 
>> >> 4:2:0) */
>> >>  #define EXTRA_JPEG_DEC_CHROMA_FORMATS \
>> >>      (VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444)
>> >> @@ -257,6 +261,8 @@ static struct hw_codec_info gen6_hw_codec_info = {
>> >>      .max_width = 2048,
>> >>      .max_height = 2048,
>> >>
>> >> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>> >> +
>> >>      .has_mpeg2_decoding = 1,
>> >>      .has_h264_decoding = 1,
>> >>      .has_h264_encoding = 1,
>> >> @@ -282,6 +288,7 @@ static struct hw_codec_info gen7_hw_codec_info = {
>> >>      .max_width = 4096,
>> >>      .max_height = 4096,
>> >>
>> >> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>> >>      .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>> >>
>> >>      .has_mpeg2_decoding = 1,
>> >> @@ -311,6 +318,7 @@ static struct hw_codec_info gen75_hw_codec_info = {
>> >>      .max_width = 4096,
>> >>      .max_height = 4096,
>> >>
>> >> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>> >>      .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>> >>
>> >>      .has_mpeg2_decoding = 1,
>> >> @@ -344,6 +352,7 @@ static struct hw_codec_info gen8_hw_codec_info = {
>> >>      .max_width = 4096,
>> >>      .max_height = 4096,
>> >>
>> >> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>> >>      .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>> >>
>> >>      .has_mpeg2_decoding = 1,
>> >> @@ -602,6 +611,13 @@ i965_get_default_chroma_formats(VADriverContextP 
>> >> ctx, VAProfile profile,
>> >>      uint32_t chroma_formats = VA_RT_FORMAT_YUV420;
>> >>
>> >>      switch (profile) {
>> >> +    case VAProfileH264ConstrainedBaseline:
>> >> +    case VAProfileH264Main:
>> >> +    case VAProfileH264High:
>> >> +        if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
>> >> +            chroma_formats |= i965->codec_info->h264_dec_chroma_formats;
>> >> +        break;
>> >> +
>> >>      case VAProfileJPEGBaseline:
>> >>          if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
>> >>              chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats;
>> >> @@ -1686,6 +1702,7 @@ i965_CreateContext(VADriverContextP ctx,
>> >>      struct i965_render_state *render_state = &i965->render_state;
>> >>      struct object_config *obj_config = CONFIG(config_id);
>> >>      struct object_context *obj_context = NULL;
>> >> +    VAConfigAttrib *attrib;
>> >>      VAStatus vaStatus = VA_STATUS_SUCCESS;
>> >>      int contextID;
>> >>      int i;
>> >> @@ -1779,6 +1796,11 @@ i965_CreateContext(VADriverContextP ctx,
>> >>          }
>> >>      }
>> >>
>> >> +    attrib = i965_lookup_config_attribute(obj_config, 
>> >> VAConfigAttribRTFormat);
>> >> +    if (!attrib)
>> >> +        return VA_STATUS_ERROR_INVALID_CONFIG;
>> >> +    obj_context->codec_state.base.chroma_formats = attrib->value;
>> >> +
>> >>      /* Error recovery */
>> >>      if (VA_STATUS_SUCCESS != vaStatus) {
>> >>          i965_destroy_context(&i965->context_heap, (struct object_base 
>> >> *)obj_context);
>> >> diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
>> >> index e70852b..e14dcf8 100644
>> >> --- a/src/i965_drv_video.h
>> >> +++ b/src/i965_drv_video.h
>> >> @@ -101,8 +101,13 @@ struct object_config
>> >>
>> >>  #define NUM_SLICES     10
>> >>
>> >> +struct codec_state_base {
>> >> +    uint32_t chroma_formats;
>> >> +};
>> >> +
>> >>  struct decode_state
>> >>  {
>> >> +    struct codec_state_base base;
>> >>      struct buffer_store *pic_param;
>> >>      struct buffer_store **slice_params;
>> >>      struct buffer_store *iq_matrix;
>> >> @@ -122,6 +127,7 @@ struct decode_state
>> >>
>> >>  struct encode_state
>> >>  {
>> >> +    struct codec_state_base base;
>> >>      struct buffer_store *seq_param;
>> >>      struct buffer_store *pic_param;
>> >>      struct buffer_store *pic_control;
>> >> @@ -152,6 +158,7 @@ struct encode_state
>> >>
>> >>  struct proc_state
>> >>  {
>> >> +    struct codec_state_base base;
>> >>      struct buffer_store *pipeline_param;
>> >>
>> >>      VASurfaceID current_render_target;
>> >> @@ -163,6 +170,7 @@ struct proc_state
>> >>
>> >>  union codec_state
>> >>  {
>> >> +    struct codec_state_base base;
>> >>      struct decode_state decode;
>> >>      struct encode_state encode;
>> >>      struct proc_state proc;
>> >> @@ -289,6 +297,7 @@ struct hw_codec_info
>> >>      int max_width;
>> >>      int max_height;
>> >>
>> >> +    unsigned int h264_dec_chroma_formats;
>> >>      unsigned int jpeg_dec_chroma_formats;
>> >>
>> >>      unsigned int has_mpeg2_decoding:1;
>> >
>> >
>
>
_______________________________________________
Libva mailing list
Libva@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libva

Reply via email to