On Wed, 2014-05-14 at 15:13 +0200, Gwenole Beauchesne wrote: > Optimize support for grayscale surfaces in two aspects: (i) space > by only allocating the luminance component ; (ii) speed by avoiding > initialization of the (now inexistent) chrominance planes. > > Keep backward compatibility with older codec layers that only > supported YUV 4:2:0 and not grayscale formats properly. > > Signed-off-by: Gwenole Beauchesne <gwenole.beauche...@intel.com> > --- > src/gen6_mfd.c | 8 ++++++-- > src/gen75_mfd.c | 6 +++++- > src/gen7_mfd.c | 6 +++++- > src/gen8_mfd.c | 6 +++++- > src/i965_decoder_utils.c | 23 +++++++++++++++++++---- > src/i965_drv_video.c | 22 ++++++++++++++++++++++ > src/i965_drv_video.h | 9 +++++++++ > 7 files changed, 71 insertions(+), 9 deletions(-) > > diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c > index 2092f69..f925d98 100755 > --- a/src/gen6_mfd.c > +++ b/src/gen6_mfd.c > @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, > { > struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; > struct object_surface *obj_surface = decode_state->render_object; > - > + unsigned int surface_format; > + > + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? > + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; > + > BEGIN_BCS_BATCH(batch, 6); > OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); > OUT_BCS_BATCH(batch, 0); > @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, > ((obj_surface->orig_height - 1) << 19) | > ((obj_surface->orig_width - 1) << 6)); > OUT_BCS_BATCH(batch, > - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV > surface */ > + (surface_format << 28) | /* 420 planar YUV surface */ > (1 << 27) | /* must be 1 for interleave U/V, hardware > requirement */ > (0 << 22) | /* surface object control state, FIXME??? */ > ((obj_surface->width - 1) << 3) | /* pitch */ > diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c > index 5b023cf..895b194 100644 > --- a/src/gen75_mfd.c > +++ b/src/gen75_mfd.c > @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, > struct object_surface *obj_surface = decode_state->render_object; > unsigned int y_cb_offset; > unsigned int y_cr_offset; > + unsigned int surface_format; > > assert(obj_surface); > > y_cb_offset = obj_surface->y_cb_offset; > y_cr_offset = obj_surface->y_cr_offset; > > + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? > + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; > + > BEGIN_BCS_BATCH(batch, 6); > OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); > OUT_BCS_BATCH(batch, 0); > @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, > ((obj_surface->orig_height - 1) << 18) | > ((obj_surface->orig_width - 1) << 4)); > OUT_BCS_BATCH(batch, > - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV > surface */ > + (surface_format << 28) | /* 420 planar YUV surface */ > ((standard_select != MFX_FORMAT_JPEG) << 27) | /* > interleave chroma, set to 0 for JPEG */ > (0 << 22) | /* surface object control state, ignored */ > ((obj_surface->width - 1) << 3) | /* pitch */ > diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c > index 70b1cec..2e0d653 100755 > --- a/src/gen7_mfd.c > +++ b/src/gen7_mfd.c > @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx, > struct object_surface *obj_surface = decode_state->render_object; > unsigned int y_cb_offset; > unsigned int y_cr_offset; > + unsigned int surface_format; > > assert(obj_surface); > > y_cb_offset = obj_surface->y_cb_offset; > y_cr_offset = obj_surface->y_cr_offset; > > + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? > + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; > + > BEGIN_BCS_BATCH(batch, 6); > OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); > OUT_BCS_BATCH(batch, 0); > @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx, > ((obj_surface->orig_height - 1) << 18) | > ((obj_surface->orig_width - 1) << 4)); > OUT_BCS_BATCH(batch, > - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV > surface */ > + (surface_format << 28) | /* 420 planar YUV surface */ > ((standard_select != MFX_FORMAT_JPEG) << 27) | /* > interleave chroma, set to 0 for JPEG */ > (0 << 22) | /* surface object control state, ignored */ > ((obj_surface->width - 1) << 3) | /* pitch */ > diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c > index e3e71fb..10495d8 100644 > --- a/src/gen8_mfd.c > +++ b/src/gen8_mfd.c > @@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx, > struct object_surface *obj_surface = decode_state->render_object; > unsigned int y_cb_offset; > unsigned int y_cr_offset; > + unsigned int surface_format; > > assert(obj_surface); > > y_cb_offset = obj_surface->y_cb_offset; > y_cr_offset = obj_surface->y_cr_offset; > > + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? > + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; > + > BEGIN_BCS_BATCH(batch, 6); > OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); > OUT_BCS_BATCH(batch, 0); > @@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx, > ((obj_surface->orig_height - 1) << 18) | > ((obj_surface->orig_width - 1) << 4)); > OUT_BCS_BATCH(batch, > - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV > surface */ > + (surface_format << 28) | /* 420 planar YUV surface */ > ((standard_select != MFX_FORMAT_JPEG) << 27) | /* > interleave chroma, set to 0 for JPEG */ > (0 << 22) | /* surface object control state, ignored */ > ((obj_surface->width - 1) << 3) | /* pitch */ > diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c > index 6cec08b..ae01d13 100644 > --- a/src/i965_decoder_utils.c > +++ b/src/i965_decoder_utils.c > @@ -185,25 +185,40 @@ avc_ensure_surface_bo( > ) > { > VAStatus va_status; > - uint32_t hw_fourcc, fourcc, subsample; > + uint32_t hw_fourcc, fourcc, subsample, chroma_format; > > /* Validate chroma format */ > switch (pic_param->seq_fields.bits.chroma_format_idc) { > case 0: // Grayscale > fourcc = VA_FOURCC_Y800; > subsample = SUBSAMPLE_YUV400; > + chroma_format = VA_RT_FORMAT_YUV400; > break; > case 1: // YUV 4:2:0 > fourcc = VA_FOURCC_NV12; > subsample = SUBSAMPLE_YUV420; > + chroma_format = VA_RT_FORMAT_YUV420; > break; > default: > return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; > } > > - /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */ > - hw_fourcc = VA_FOURCC_NV12; > - subsample = SUBSAMPLE_YUV420; > + /* Determine the HW surface format, bound to VA config needs */ > + if ((decode_state->base.chroma_formats & chroma_format) == chroma_format) > + hw_fourcc = fourcc; > + else { > + hw_fourcc = 0; > + switch (fourcc) { > + case VA_FOURCC_Y800: // Implement with an NV12 surface > + if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) { > + hw_fourcc = VA_FOURCC_NV12; > + subsample = SUBSAMPLE_YUV420; > + } > + break; > + } > + } > + if (!hw_fourcc) > + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; > > /* (Re-)allocate the underlying surface buffer store, if necessary */ > if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) { > diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c > index 768469a..686f4e3 100755 > --- a/src/i965_drv_video.c > +++ b/src/i965_drv_video.c > @@ -214,6 +214,10 @@ get_subpic_format(const VAImageFormat *va_format) > return NULL; > } > > +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV > 4:2:0) */ > +#define EXTRA_H264_DEC_CHROMA_FORMATS \ > + (VA_RT_FORMAT_YUV400) > + > /* Extra set of chroma formats supported for JPEG decoding (beyond YUV > 4:2:0) */ > #define EXTRA_JPEG_DEC_CHROMA_FORMATS \ > (VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444) > @@ -257,6 +261,8 @@ static struct hw_codec_info gen6_hw_codec_info = { > .max_width = 2048, > .max_height = 2048, > > + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, > + > .has_mpeg2_decoding = 1, > .has_h264_decoding = 1, > .has_h264_encoding = 1, > @@ -282,6 +288,7 @@ static struct hw_codec_info gen7_hw_codec_info = { > .max_width = 4096, > .max_height = 4096, > > + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, > .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, > > .has_mpeg2_decoding = 1, > @@ -311,6 +318,7 @@ static struct hw_codec_info gen75_hw_codec_info = { > .max_width = 4096, > .max_height = 4096, > > + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, > .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, > > .has_mpeg2_decoding = 1, > @@ -344,6 +352,7 @@ static struct hw_codec_info gen8_hw_codec_info = { > .max_width = 4096, > .max_height = 4096, > > + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, > .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, > > .has_mpeg2_decoding = 1, > @@ -602,6 +611,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, > VAProfile profile, > uint32_t chroma_formats = VA_RT_FORMAT_YUV420; > > switch (profile) { > + case VAProfileH264ConstrainedBaseline: > + case VAProfileH264Main: > + case VAProfileH264High: > + if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
s/JPEG/H264 > + chroma_formats |= i965->codec_info->h264_dec_chroma_formats; > + break; > + > case VAProfileJPEGBaseline: > if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) > chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats; > @@ -1686,6 +1702,7 @@ i965_CreateContext(VADriverContextP ctx, > struct i965_render_state *render_state = &i965->render_state; > struct object_config *obj_config = CONFIG(config_id); > struct object_context *obj_context = NULL; > + VAConfigAttrib *attrib; > VAStatus vaStatus = VA_STATUS_SUCCESS; > int contextID; > int i; > @@ -1779,6 +1796,11 @@ i965_CreateContext(VADriverContextP ctx, > } > } > > + attrib = i965_lookup_config_attribute(obj_config, > VAConfigAttribRTFormat); > + if (!attrib) > + return VA_STATUS_ERROR_INVALID_CONFIG; > + obj_context->codec_state.base.chroma_formats = attrib->value; > + > /* Error recovery */ > if (VA_STATUS_SUCCESS != vaStatus) { > i965_destroy_context(&i965->context_heap, (struct object_base > *)obj_context); > diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h > index e70852b..e14dcf8 100644 > --- a/src/i965_drv_video.h > +++ b/src/i965_drv_video.h > @@ -101,8 +101,13 @@ struct object_config > > #define NUM_SLICES 10 > > +struct codec_state_base { > + uint32_t chroma_formats; > +}; > + > struct decode_state > { > + struct codec_state_base base; > struct buffer_store *pic_param; > struct buffer_store **slice_params; > struct buffer_store *iq_matrix; > @@ -122,6 +127,7 @@ struct decode_state > > struct encode_state > { > + struct codec_state_base base; > struct buffer_store *seq_param; > struct buffer_store *pic_param; > struct buffer_store *pic_control; > @@ -152,6 +158,7 @@ struct encode_state > > struct proc_state > { > + struct codec_state_base base; > struct buffer_store *pipeline_param; > > VASurfaceID current_render_target; > @@ -163,6 +170,7 @@ struct proc_state > > union codec_state > { > + struct codec_state_base base; > struct decode_state decode; > struct encode_state encode; > struct proc_state proc; > @@ -289,6 +297,7 @@ struct hw_codec_info > int max_width; > int max_height; > > + unsigned int h264_dec_chroma_formats; > unsigned int jpeg_dec_chroma_formats; > > unsigned int has_mpeg2_decoding:1; _______________________________________________ Libva mailing list Libva@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libva