2014-05-15 10:25 GMT+02:00 Xiang, Haihao <haihao.xi...@intel.com>: > On Wed, 2014-05-14 at 15:13 +0200, Gwenole Beauchesne wrote: >> Optimize support for grayscale surfaces in two aspects: (i) space >> by only allocating the luminance component ; (ii) speed by avoiding >> initialization of the (now inexistent) chrominance planes. >> >> Keep backward compatibility with older codec layers that only >> supported YUV 4:2:0 and not grayscale formats properly. >> >> Signed-off-by: Gwenole Beauchesne <gwenole.beauche...@intel.com> >> --- >> src/gen6_mfd.c | 8 ++++++-- >> src/gen75_mfd.c | 6 +++++- >> src/gen7_mfd.c | 6 +++++- >> src/gen8_mfd.c | 6 +++++- >> src/i965_decoder_utils.c | 23 +++++++++++++++++++---- >> src/i965_drv_video.c | 22 ++++++++++++++++++++++ >> src/i965_drv_video.h | 9 +++++++++ >> 7 files changed, 71 insertions(+), 9 deletions(-) >> >> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c >> index 2092f69..f925d98 100755 >> --- a/src/gen6_mfd.c >> +++ b/src/gen6_mfd.c >> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, >> { >> struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; >> struct object_surface *obj_surface = decode_state->render_object; >> - >> + unsigned int surface_format; >> + >> + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? >> + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; >> + >> BEGIN_BCS_BATCH(batch, 6); >> OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); >> OUT_BCS_BATCH(batch, 0); >> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, >> ((obj_surface->orig_height - 1) << 19) | >> ((obj_surface->orig_width - 1) << 6)); >> OUT_BCS_BATCH(batch, >> - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV >> surface */ >> + (surface_format << 28) | /* 420 planar YUV surface */ >> (1 << 27) | /* must be 1 for interleave U/V, hardware >> requirement */ >> (0 << 22) | /* surface object control state, FIXME??? */ >> ((obj_surface->width - 1) << 3) | /* pitch */ >> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c >> index 5b023cf..895b194 100644 >> --- a/src/gen75_mfd.c >> +++ b/src/gen75_mfd.c >> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, >> struct object_surface *obj_surface = decode_state->render_object; >> unsigned int y_cb_offset; >> unsigned int y_cr_offset; >> + unsigned int surface_format; >> >> assert(obj_surface); >> >> y_cb_offset = obj_surface->y_cb_offset; >> y_cr_offset = obj_surface->y_cr_offset; >> >> + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? >> + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; >> + >> BEGIN_BCS_BATCH(batch, 6); >> OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); >> OUT_BCS_BATCH(batch, 0); >> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, >> ((obj_surface->orig_height - 1) << 18) | >> ((obj_surface->orig_width - 1) << 4)); >> OUT_BCS_BATCH(batch, >> - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV >> surface */ >> + (surface_format << 28) | /* 420 planar YUV surface */ >> ((standard_select != MFX_FORMAT_JPEG) << 27) | /* >> interleave chroma, set to 0 for JPEG */ >> (0 << 22) | /* surface object control state, ignored */ >> ((obj_surface->width - 1) << 3) | /* pitch */ >> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c >> index 70b1cec..2e0d653 100755 >> --- a/src/gen7_mfd.c >> +++ b/src/gen7_mfd.c >> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx, >> struct object_surface *obj_surface = decode_state->render_object; >> unsigned int y_cb_offset; >> unsigned int y_cr_offset; >> + unsigned int surface_format; >> >> assert(obj_surface); >> >> y_cb_offset = obj_surface->y_cb_offset; >> y_cr_offset = obj_surface->y_cr_offset; >> >> + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? >> + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; >> + >> BEGIN_BCS_BATCH(batch, 6); >> OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); >> OUT_BCS_BATCH(batch, 0); >> @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx, >> ((obj_surface->orig_height - 1) << 18) | >> ((obj_surface->orig_width - 1) << 4)); >> OUT_BCS_BATCH(batch, >> - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV >> surface */ >> + (surface_format << 28) | /* 420 planar YUV surface */ >> ((standard_select != MFX_FORMAT_JPEG) << 27) | /* >> interleave chroma, set to 0 for JPEG */ >> (0 << 22) | /* surface object control state, ignored */ >> ((obj_surface->width - 1) << 3) | /* pitch */ >> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c >> index e3e71fb..10495d8 100644 >> --- a/src/gen8_mfd.c >> +++ b/src/gen8_mfd.c >> @@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx, >> struct object_surface *obj_surface = decode_state->render_object; >> unsigned int y_cb_offset; >> unsigned int y_cr_offset; >> + unsigned int surface_format; >> >> assert(obj_surface); >> >> y_cb_offset = obj_surface->y_cb_offset; >> y_cr_offset = obj_surface->y_cr_offset; >> >> + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? >> + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; >> + >> BEGIN_BCS_BATCH(batch, 6); >> OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); >> OUT_BCS_BATCH(batch, 0); >> @@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx, >> ((obj_surface->orig_height - 1) << 18) | >> ((obj_surface->orig_width - 1) << 4)); >> OUT_BCS_BATCH(batch, >> - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV >> surface */ >> + (surface_format << 28) | /* 420 planar YUV surface */ >> ((standard_select != MFX_FORMAT_JPEG) << 27) | /* >> interleave chroma, set to 0 for JPEG */ >> (0 << 22) | /* surface object control state, ignored */ >> ((obj_surface->width - 1) << 3) | /* pitch */ >> diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c >> index 6cec08b..ae01d13 100644 >> --- a/src/i965_decoder_utils.c >> +++ b/src/i965_decoder_utils.c >> @@ -185,25 +185,40 @@ avc_ensure_surface_bo( >> ) >> { >> VAStatus va_status; >> - uint32_t hw_fourcc, fourcc, subsample; >> + uint32_t hw_fourcc, fourcc, subsample, chroma_format; >> >> /* Validate chroma format */ >> switch (pic_param->seq_fields.bits.chroma_format_idc) { >> case 0: // Grayscale >> fourcc = VA_FOURCC_Y800; >> subsample = SUBSAMPLE_YUV400; >> + chroma_format = VA_RT_FORMAT_YUV400; >> break; >> case 1: // YUV 4:2:0 >> fourcc = VA_FOURCC_NV12; >> subsample = SUBSAMPLE_YUV420; >> + chroma_format = VA_RT_FORMAT_YUV420; >> break; >> default: >> return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; >> } >> >> - /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */ >> - hw_fourcc = VA_FOURCC_NV12; >> - subsample = SUBSAMPLE_YUV420; >> + /* Determine the HW surface format, bound to VA config needs */ >> + if ((decode_state->base.chroma_formats & chroma_format) == >> chroma_format) >> + hw_fourcc = fourcc; >> + else { >> + hw_fourcc = 0; >> + switch (fourcc) { >> + case VA_FOURCC_Y800: // Implement with an NV12 surface >> + if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) { >> + hw_fourcc = VA_FOURCC_NV12; >> + subsample = SUBSAMPLE_YUV420; >> + } >> + break; >> + } >> + } >> + if (!hw_fourcc) >> + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; >> >> /* (Re-)allocate the underlying surface buffer store, if necessary */ >> if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) { >> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c >> index 768469a..686f4e3 100755 >> --- a/src/i965_drv_video.c >> +++ b/src/i965_drv_video.c >> @@ -214,6 +214,10 @@ get_subpic_format(const VAImageFormat *va_format) >> return NULL; >> } >> >> +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV >> 4:2:0) */ >> +#define EXTRA_H264_DEC_CHROMA_FORMATS \ >> + (VA_RT_FORMAT_YUV400) >> + >> /* Extra set of chroma formats supported for JPEG decoding (beyond YUV >> 4:2:0) */ >> #define EXTRA_JPEG_DEC_CHROMA_FORMATS \ >> (VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444) >> @@ -257,6 +261,8 @@ static struct hw_codec_info gen6_hw_codec_info = { >> .max_width = 2048, >> .max_height = 2048, >> >> + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, >> + >> .has_mpeg2_decoding = 1, >> .has_h264_decoding = 1, >> .has_h264_encoding = 1, >> @@ -282,6 +288,7 @@ static struct hw_codec_info gen7_hw_codec_info = { >> .max_width = 4096, >> .max_height = 4096, >> >> + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, >> .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, >> >> .has_mpeg2_decoding = 1, >> @@ -311,6 +318,7 @@ static struct hw_codec_info gen75_hw_codec_info = { >> .max_width = 4096, >> .max_height = 4096, >> >> + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, >> .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, >> >> .has_mpeg2_decoding = 1, >> @@ -344,6 +352,7 @@ static struct hw_codec_info gen8_hw_codec_info = { >> .max_width = 4096, >> .max_height = 4096, >> >> + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, >> .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, >> >> .has_mpeg2_decoding = 1, >> @@ -602,6 +611,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, >> VAProfile profile, >> uint32_t chroma_formats = VA_RT_FORMAT_YUV420; >> >> switch (profile) { >> + case VAProfileH264ConstrainedBaseline: >> + case VAProfileH264Main: >> + case VAProfileH264High: >> + if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) > > s/JPEG/H264
Thanks for spotting this stupid error. Locally fixed. Regards, Gwenole. > >> + chroma_formats |= i965->codec_info->h264_dec_chroma_formats; >> + break; >> + >> case VAProfileJPEGBaseline: >> if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) >> chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats; >> @@ -1686,6 +1702,7 @@ i965_CreateContext(VADriverContextP ctx, >> struct i965_render_state *render_state = &i965->render_state; >> struct object_config *obj_config = CONFIG(config_id); >> struct object_context *obj_context = NULL; >> + VAConfigAttrib *attrib; >> VAStatus vaStatus = VA_STATUS_SUCCESS; >> int contextID; >> int i; >> @@ -1779,6 +1796,11 @@ i965_CreateContext(VADriverContextP ctx, >> } >> } >> >> + attrib = i965_lookup_config_attribute(obj_config, >> VAConfigAttribRTFormat); >> + if (!attrib) >> + return VA_STATUS_ERROR_INVALID_CONFIG; >> + obj_context->codec_state.base.chroma_formats = attrib->value; >> + >> /* Error recovery */ >> if (VA_STATUS_SUCCESS != vaStatus) { >> i965_destroy_context(&i965->context_heap, (struct object_base >> *)obj_context); >> diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h >> index e70852b..e14dcf8 100644 >> --- a/src/i965_drv_video.h >> +++ b/src/i965_drv_video.h >> @@ -101,8 +101,13 @@ struct object_config >> >> #define NUM_SLICES 10 >> >> +struct codec_state_base { >> + uint32_t chroma_formats; >> +}; >> + >> struct decode_state >> { >> + struct codec_state_base base; >> struct buffer_store *pic_param; >> struct buffer_store **slice_params; >> struct buffer_store *iq_matrix; >> @@ -122,6 +127,7 @@ struct decode_state >> >> struct encode_state >> { >> + struct codec_state_base base; >> struct buffer_store *seq_param; >> struct buffer_store *pic_param; >> struct buffer_store *pic_control; >> @@ -152,6 +158,7 @@ struct encode_state >> >> struct proc_state >> { >> + struct codec_state_base base; >> struct buffer_store *pipeline_param; >> >> VASurfaceID current_render_target; >> @@ -163,6 +170,7 @@ struct proc_state >> >> union codec_state >> { >> + struct codec_state_base base; >> struct decode_state decode; >> struct encode_state encode; >> struct proc_state proc; >> @@ -289,6 +297,7 @@ struct hw_codec_info >> int max_width; >> int max_height; >> >> + unsigned int h264_dec_chroma_formats; >> unsigned int jpeg_dec_chroma_formats; >> >> unsigned int has_mpeg2_decoding:1; > > _______________________________________________ Libva mailing list Libva@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libva