Module: Mesa Branch: master Commit: 630154e77b778ccb594be9e572988b05b0fc28e1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=630154e77b778ccb594be9e572988b05b0fc28e1
Author: Mathias Fröhlich <mathias.froehl...@web.de> Date: Tue Dec 11 18:45:43 2018 +0100 i965: Move down genX_upload_sbe in profiles. Avoid looping over all VARYING_SLOT_MAX urb_setup array entries from genX_upload_sbe. Prepare an array indirection to the active entries of urb_setup already in the compile step. On upload only walk the active arrays. v2: Use uint8_t to store the attribute numbers. v3: Change loop to build up the array indirection. v4: Rebase. v5: Style fix. Reviewed-by: Matt Turner <matts...@gmail.com> Reviewed-by: Ian Romanick <ian.d.roman...@intel.com> Signed-off-by: Mathias Fröhlich <mathias.froehl...@web.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/308> --- src/intel/compiler/brw_compiler.h | 8 ++++++++ src/intel/compiler/brw_fs.cpp | 24 ++++++++++++++++++++++++ src/intel/compiler/brw_fs.h | 1 + src/intel/compiler/brw_fs_visitor.cpp | 1 + src/mesa/drivers/dri/i965/genX_state_upload.c | 6 +++--- 5 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 1b34ed2d6bb..517afe96788 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -788,6 +788,14 @@ struct brw_wm_prog_data { * For varying slots that are not used by the FS, the value is -1. */ int urb_setup[VARYING_SLOT_MAX]; + + /** + * Cache structure into the urb_setup array above that contains the + * attribute numbers of active varyings out of urb_setup. + * The actual count is stored in urb_setup_attribs_count. + */ + uint8_t urb_setup_attribs[VARYING_SLOT_MAX]; + uint8_t urb_setup_attribs_count; }; /** Returns the SIMD width corresponding to a given KSP index diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index f284a2b6644..b2a563fd94e 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1641,6 +1641,26 @@ fs_visitor::assign_curb_setup() this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length; } +/* + * Build up an array of indices into the urb_setup array that + * references the active entries of the urb_setup array. + * Used to accelerate walking the active entries of the urb_setup array + * on each upload. + */ +void +brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data) +{ + /* Make sure uint8_t is sufficient */ + STATIC_ASSERT(VARYING_SLOT_MAX <= 0xff); + uint8_t index = 0; + for (uint8_t attr = 0; attr < VARYING_SLOT_MAX; attr++) { + if (wm_prog_data->urb_setup[attr] >= 0) { + wm_prog_data->urb_setup_attribs[index++] = attr; + } + } + wm_prog_data->urb_setup_attribs_count = index; +} + static void calculate_urb_setup(const struct gen_device_info *devinfo, const struct brw_wm_prog_key *key, @@ -1728,6 +1748,8 @@ calculate_urb_setup(const struct gen_device_info *devinfo, } prog_data->num_varying_inputs = urb_next; + + brw_compute_urb_setup_index(prog_data); } void @@ -8164,6 +8186,8 @@ gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data) wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0; wm_prog_data->num_varying_inputs = 1; + + brw_compute_urb_setup_index(wm_prog_data); } bool diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index e0717eebb0e..c09c4eb8759 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -654,5 +654,6 @@ enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode, uint32_t brw_fb_write_msg_control(const fs_inst *inst, const struct brw_wm_prog_data *prog_data); +void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data); #endif /* BRW_FS_H */ diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index fdaabf43c48..e276227f0ac 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -122,6 +122,7 @@ fs_visitor::emit_dummy_fs() wm_prog_data->num_varying_inputs = devinfo->gen < 6 ? 1 : 0; memset(wm_prog_data->urb_setup, -1, sizeof(wm_prog_data->urb_setup[0]) * VARYING_SLOT_MAX); + brw_compute_urb_setup_index(wm_prog_data); /* We don't have any uniforms. */ stage_prog_data->nr_params = 0; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 635d314ef2b..0bd124011f4 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -1099,11 +1099,11 @@ genX(calculate_attr_overrides)(const struct brw_context *brw, */ bool drawing_points = brw_is_drawing_points(brw); - for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) { + uint8_t attr = wm_prog_data->urb_setup_attribs[idx]; int input_index = wm_prog_data->urb_setup[attr]; - if (input_index < 0) - continue; + assert(0 <= input_index); /* _NEW_POINT */ bool point_sprite = false; _______________________________________________ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit