GCC's read access for single bits in a bitfield is reasonable (just a move + flag comparison), so let's save some cachelines by packing the write-once/read-many booleans together.
text data bss dec hex filename 6490134 191992 26192 6708318 665c5e lib64/i965_dri.so 6491766 191992 26192 6709950 6662be lib64/i965_dri.so Small inflation due to the extra immediate masks and entirely dubious as to whether it is worth it. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> --- src/mesa/drivers/dri/i965/brw_context.h | 76 +++++++++++++++-------------- src/mesa/drivers/dri/i965/brw_device_info.h | 32 ++++++------ src/mesa/drivers/dri/i965/intel_screen.h | 18 +++---- 3 files changed, 65 insertions(+), 61 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index cd43ac5..62e39be 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1088,16 +1088,6 @@ struct brw_context GLuint stats_wm; - /** - * drirc options: - * @{ - */ - bool no_rast; - bool always_flush_batch; - bool always_flush_cache; - bool disable_throttling; - bool precompile; - driOptionCache optionCache; /** @} */ @@ -1105,36 +1095,33 @@ struct brw_context GLenum reduced_primitive; - /** - * Set if we're either a debug context or the INTEL_DEBUG=perf environment - * variable is set, this is the flag indicating to do expensive work that - * might lead to a perf_debug() call. - */ - bool perf_debug; - uint32_t max_gtt_map_object_size; int gen; int gt; - bool is_g4x; - bool is_baytrail; - bool is_haswell; - bool is_cherryview; - bool is_broxton; - - bool has_hiz; - bool has_separate_stencil; - bool must_use_separate_stencil; - bool has_llc; - bool has_swizzling; - bool has_surface_tile_offset; - bool has_compr4; - bool has_negative_rhw_bug; - bool has_pln; - bool no_simd8; - bool use_rep_send; - bool use_resource_streamer; + /* So long as we do not frequently write to these booleans, we can + * pack them into a bitfield with fair efficient (GCC converting the + * access into a mov + flag test). + */ + bool is_g4x : 1; + bool is_baytrail : 1; + bool is_haswell : 1; + bool is_cherryview : 1; + bool is_broxton : 1; + + bool has_hiz : 1; + bool has_separate_stencil : 1; + bool must_use_separate_stencil : 1; + bool has_llc : 1; + bool has_swizzling : 1; + bool has_surface_tile_offset : 1; + bool has_compr4 : 1; + bool has_negative_rhw_bug : 1; + bool has_pln : 1; + bool no_simd8 : 1; + bool use_rep_send : 1; + bool use_resource_streamer : 1; /** * Some versions of Gen hardware don't do centroid interpolation correctly @@ -1143,7 +1130,24 @@ struct brw_context * non-centroid interpolation for unlit pixels, at the expense of two extra * fragment shader instructions. */ - bool needs_unlit_centroid_workaround; + bool needs_unlit_centroid_workaround : 1; + + /** + * Set if we're either a debug context or the INTEL_DEBUG=perf environment + * variable is set, this is the flag indicating to do expensive work that + * might lead to a perf_debug() call. + */ + bool perf_debug : 1; + + /** + * drirc options: + * @{ + */ + bool no_rast : 1; + bool always_flush_batch : 1; + bool always_flush_cache : 1; + bool disable_throttling : 1; + bool precompile : 1; GLuint NewGLState; struct { diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h index 2a73e93..b5502b8 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@ -30,29 +30,29 @@ struct brw_device_info int gen; /**< Generation number: 4, 5, 6, 7, ... */ int gt; - bool is_g4x; - bool is_ivybridge; - bool is_baytrail; - bool is_haswell; - bool is_cherryview; - bool is_broxton; + bool is_g4x : 1; + bool is_ivybridge : 1; + bool is_baytrail : 1; + bool is_haswell : 1; + bool is_cherryview : 1; + bool is_broxton : 1; - bool has_hiz_and_separate_stencil; - bool must_use_separate_stencil; + bool has_hiz_and_separate_stencil : 1; + bool must_use_separate_stencil : 1; - bool has_llc; + bool has_llc : 1; - bool has_pln; - bool has_compr4; - bool has_surface_tile_offset; - bool supports_simd16_3src; - bool has_resource_streamer; + bool has_pln : 1; + bool has_compr4 : 1; + bool has_surface_tile_offset : 1; + bool supports_simd16_3src : 1; + bool has_resource_streamer : 1; /** * Quirks: * @{ */ - bool has_negative_rhw_bug; + bool has_negative_rhw_bug : 1; /** * Some versions of Gen hardware don't do centroid interpolation correctly @@ -61,7 +61,7 @@ struct brw_device_info * non-centroid interpolation for unlit pixels, at the expense of two extra * fragment shader instructions. */ - bool needs_unlit_centroid_workaround; + bool needs_unlit_centroid_workaround : 1; /** @} */ /** diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index fd5143e..0bae95e 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -46,14 +46,14 @@ struct intel_screen __DRIscreen *driScrnPriv; - bool no_hw; - - bool hw_must_use_separate_stencil; - - bool hw_has_swizzling; - - int hw_has_timestamp; - + /* So long as we do not frequently write to these booleans, we can + * pack them into a bitfield with fair efficient (GCC converting the + * access into a mov + flag test). + */ + bool no_hw : 1; + bool hw_must_use_separate_stencil : 1; + bool hw_has_swizzling : 1; + unsigned hw_has_timestamp : 2; /** * Does the kernel support resource streamer? */ @@ -62,7 +62,7 @@ struct intel_screen /** * Does the kernel support context reset notifications? */ - bool has_context_reset_notification; + bool has_context_reset_notification : 1; dri_bufmgr *bufmgr; -- 2.5.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev