Re: [Libva] [PATCH intel-driver 5/5] decoder: h264: optimize support for grayscale surfaces.
On Sat, 2014-05-10 at 00:03 -0600, Gwenole Beauchesne wrote: Optimize support for grayscale surfaces in two aspects: (i) space by only allocating the luma component ; (ii) speed by avoiding initialization of the (now inexistent) chroma planes. Keep backward compatibility with older codec layers that only supported YUV 4:2:0 and not grayscale formats properly. Some changes don't match the hardware spec. Another issue is that you mix too many things in one patch. It will be better that you can do one thing in one patch. For example: The fix for Y800 surface. The definition of h264 chroma formats supported by the driver. Thanks. Yakui Signed-off-by: Gwenole Beauchesne gwenole.beauche...@intel.com --- src/gen6_mfd.c | 21 +- src/gen75_mfd.c | 19 - src/gen7_mfd.c | 19 - src/gen8_mfd.c | 19 - src/i965_decoder_utils.c | 71 ++-- src/i965_decoder_utils.h | 8 ++ src/i965_drv_video.c | 43 + src/i965_drv_video.h | 12 8 files changed, 151 insertions(+), 61 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 22d8a51..6ec2278 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context-base.batch; struct object_surface *obj_surface = decode_state-render_object; - +unsigned int surface_format; + +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ? +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; This doesn't match the hardware spec. (It only supports the PLANAR_420_8 in MFX_SURFACE_STATE). + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, ((obj_surface-orig_height - 1) 19) | ((obj_surface-orig_width - 1) 6)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 28) | /* 420 planar YUV surface */ + (surface_format 28) | /* 420 planar YUV surface */ (1 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 22) | /* surface object control state, FIXME??? */ ((obj_surface-width - 1) 3) | /* pitch */ @@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, obj_surface-flags |= SURFACE_REFERENCED; else obj_surface-flags = ~SURFACE_REFERENCED; -i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - -/* initial uv component for YUV400 case */ -if (pic_param-seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface-width * obj_surface-height; - unsigned int uv_size = obj_surface-width * obj_surface-height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface-bo); - memset(obj_surface-bo-virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface-bo); -} - +avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen6_mfd_context-post_deblocking_output.bo); diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index cb85996..d2dbb69 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state-render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; +unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface-y_cb_offset; y_cr_offset = obj_surface-y_cr_offset; +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ? +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, ((obj_surface-orig_height - 1) 18) | ((obj_surface-orig_width - 1) 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 28) | /* 420 planar YUV surface */ + (surface_format 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) 27) | /* interleave chroma, set to 0 for JPEG */ (0 22) | /* surface object control state, ignored */ ((obj_surface-width - 1) 3) | /* pitch */ @@ -1086,18 +1090,7 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx, obj_surface-flags |= SURFACE_REFERENCED; else obj_surface-flags = ~SURFACE_REFERENCED; -
Re: [Libva] [PATCH intel-driver 5/5] decoder: h264: optimize support for grayscale surfaces.
Hi, 2014-05-12 3:21 GMT+02:00 Zhao Yakui yakui.z...@intel.com: On Sat, 2014-05-10 at 00:03 -0600, Gwenole Beauchesne wrote: Optimize support for grayscale surfaces in two aspects: (i) space by only allocating the luma component ; (ii) speed by avoiding initialization of the (now inexistent) chroma planes. Keep backward compatibility with older codec layers that only supported YUV 4:2:0 and not grayscale formats properly. Some changes don't match the hardware spec. Another issue is that you mix too many things in one patch. It will be better that you can do one thing in one patch. This matches the PRM, please read the last section of the chapter. :) There are also additional notes for it in the inline description too. For example: The fix for Y800 surface. The definition of h264 chroma formats supported by the driver. Thanks. Yakui Signed-off-by: Gwenole Beauchesne gwenole.beauche...@intel.com --- src/gen6_mfd.c | 21 +- src/gen75_mfd.c | 19 - src/gen7_mfd.c | 19 - src/gen8_mfd.c | 19 - src/i965_decoder_utils.c | 71 ++-- src/i965_decoder_utils.h | 8 ++ src/i965_drv_video.c | 43 + src/i965_drv_video.h | 12 8 files changed, 151 insertions(+), 61 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 22d8a51..6ec2278 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context-base.batch; struct object_surface *obj_surface = decode_state-render_object; - +unsigned int surface_format; + +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ? +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; This doesn't match the hardware spec. (It only supports the PLANAR_420_8 in MFX_SURFACE_STATE). + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, ((obj_surface-orig_height - 1) 19) | ((obj_surface-orig_width - 1) 6)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 28) | /* 420 planar YUV surface */ + (surface_format 28) | /* 420 planar YUV surface */ (1 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 22) | /* surface object control state, FIXME??? */ ((obj_surface-width - 1) 3) | /* pitch */ @@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, obj_surface-flags |= SURFACE_REFERENCED; else obj_surface-flags = ~SURFACE_REFERENCED; -i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - -/* initial uv component for YUV400 case */ -if (pic_param-seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface-width * obj_surface-height; - unsigned int uv_size = obj_surface-width * obj_surface-height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface-bo); - memset(obj_surface-bo-virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface-bo); -} - +avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen6_mfd_context-post_deblocking_output.bo); diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index cb85996..d2dbb69 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state-render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; +unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface-y_cb_offset; y_cr_offset = obj_surface-y_cr_offset; +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ? +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, ((obj_surface-orig_height - 1) 18) | ((obj_surface-orig_width - 1) 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 28) | /* 420 planar YUV surface */ + (surface_format 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) 27) | /* interleave chroma, set to 0 for JPEG */ (0 22) | /* surface object control state, ignored */ ((obj_surface-width - 1) 3) | /*
Re: [Libva] [PATCH intel-driver 5/5] decoder: h264: optimize support for grayscale surfaces.
On Sun, 2014-05-11 at 22:26 -0600, Gwenole Beauchesne wrote: Hi, 2014-05-12 3:21 GMT+02:00 Zhao Yakui yakui.z...@intel.com: On Sat, 2014-05-10 at 00:03 -0600, Gwenole Beauchesne wrote: Optimize support for grayscale surfaces in two aspects: (i) space by only allocating the luma component ; (ii) speed by avoiding initialization of the (now inexistent) chroma planes. Keep backward compatibility with older codec layers that only supported YUV 4:2:0 and not grayscale formats properly. Some changes don't match the hardware spec. Another issue is that you mix too many things in one patch. It will be better that you can do one thing in one patch. This matches the PRM, please read the last section of the chapter. :) There are also additional notes for it in the inline description too. Really? The following is from the PRM on Ivybridge. For video codec, it should set to 4 always. Thanks. Yakui For example: The fix for Y800 surface. The definition of h264 chroma formats supported by the driver. Thanks. Yakui Signed-off-by: Gwenole Beauchesne gwenole.beauche...@intel.com --- src/gen6_mfd.c | 21 +- src/gen75_mfd.c | 19 - src/gen7_mfd.c | 19 - src/gen8_mfd.c | 19 - src/i965_decoder_utils.c | 71 ++-- src/i965_decoder_utils.h | 8 ++ src/i965_drv_video.c | 43 + src/i965_drv_video.h | 12 8 files changed, 151 insertions(+), 61 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 22d8a51..6ec2278 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context-base.batch; struct object_surface *obj_surface = decode_state-render_object; - +unsigned int surface_format; + +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ? +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; This doesn't match the hardware spec. (It only supports the PLANAR_420_8 in MFX_SURFACE_STATE). + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, ((obj_surface-orig_height - 1) 19) | ((obj_surface-orig_width - 1) 6)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 28) | /* 420 planar YUV surface */ + (surface_format 28) | /* 420 planar YUV surface */ (1 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 22) | /* surface object control state, FIXME??? */ ((obj_surface-width - 1) 3) | /* pitch */ @@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, obj_surface-flags |= SURFACE_REFERENCED; else obj_surface-flags = ~SURFACE_REFERENCED; -i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - -/* initial uv component for YUV400 case */ -if (pic_param-seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface-width * obj_surface-height; - unsigned int uv_size = obj_surface-width * obj_surface-height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface-bo); - memset(obj_surface-bo-virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface-bo); -} - +avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen6_mfd_context-post_deblocking_output.bo); diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index cb85996..d2dbb69 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state-render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; +unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface-y_cb_offset; y_cr_offset = obj_surface-y_cr_offset; +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ? +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, ((obj_surface-orig_height - 1) 18) | ((obj_surface-orig_width - 1) 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 28) | /* 420 planar YUV surface */ +
Re: [Libva] [PATCH intel-driver 5/5] decoder: h264: optimize support for grayscale surfaces.
2014-05-12 6:52 GMT+02:00 Zhao, Yakui yakui.z...@intel.com: On Sun, 2014-05-11 at 22:26 -0600, Gwenole Beauchesne wrote: Hi, 2014-05-12 3:21 GMT+02:00 Zhao Yakui yakui.z...@intel.com: On Sat, 2014-05-10 at 00:03 -0600, Gwenole Beauchesne wrote: Optimize support for grayscale surfaces in two aspects: (i) space by only allocating the luma component ; (ii) speed by avoiding initialization of the (now inexistent) chroma planes. Keep backward compatibility with older codec layers that only supported YUV 4:2:0 and not grayscale formats properly. Some changes don't match the hardware spec. Another issue is that you mix too many things in one patch. It will be better that you can do one thing in one patch. This matches the PRM, please read the last section of the chapter. :) There are also additional notes for it in the inline description too. Really? The following is from the PRM on Ivybridge. For video codec, it should set to 4 always. This looks to be the quote for the JPEG case, that required all 3 components, with the same and full stride of the luma component. I am pretty sure we can improve this part if you think it's not clear enough. The prevailing information for grayscale support is section 7.1. Regards, Gwenole. --- src/gen6_mfd.c | 21 +- src/gen75_mfd.c | 19 - src/gen7_mfd.c | 19 - src/gen8_mfd.c | 19 - src/i965_decoder_utils.c | 71 ++-- src/i965_decoder_utils.h | 8 ++ src/i965_drv_video.c | 43 + src/i965_drv_video.h | 12 8 files changed, 151 insertions(+), 61 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 22d8a51..6ec2278 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context-base.batch; struct object_surface *obj_surface = decode_state-render_object; - +unsigned int surface_format; + +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ? +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; This doesn't match the hardware spec. (It only supports the PLANAR_420_8 in MFX_SURFACE_STATE). + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, ((obj_surface-orig_height - 1) 19) | ((obj_surface-orig_width - 1) 6)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 28) | /* 420 planar YUV surface */ + (surface_format 28) | /* 420 planar YUV surface */ (1 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 22) | /* surface object control state, FIXME??? */ ((obj_surface-width - 1) 3) | /* pitch */ @@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, obj_surface-flags |= SURFACE_REFERENCED; else obj_surface-flags = ~SURFACE_REFERENCED; -i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - -/* initial uv component for YUV400 case */ -if (pic_param-seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface-width * obj_surface-height; - unsigned int uv_size = obj_surface-width * obj_surface-height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface-bo); - memset(obj_surface-bo-virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface-bo); -} - +avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen6_mfd_context-post_deblocking_output.bo); diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index cb85996..d2dbb69 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state-render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; +unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface-y_cb_offset; y_cr_offset = obj_surface-y_cr_offset; +surface_format = obj_surface-fourcc == VA_FOURCC_Y800 ? +MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, ((obj_surface-orig_height - 1) 18) |