Module: Mesa Branch: master Commit: c3c251d98feab2906a1e42b1c5c5b7ec33dd5545 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3c251d98feab2906a1e42b1c5c5b7ec33dd5545
Author: Iago Toral Quiroga <[email protected]> Date: Thu Mar 25 14:43:20 2021 +0100 broadcom/compiler: flag TMU reads with a read dependency on last TMU config We were using a write dependency to ensure ordering since LDTMUs sequences are ordered, but by using a write dependency with TMU config we were also preserving ordering with TMU config writes that are not a sequence terminator, which is not required and reduces scheduling flexibility. Instead, use a write dependency to ensure strict ordering of TMU reads, but only a read depdency with TMU config. With this change we also need to update CS barriers to also have a write dependency with TMU reads to ensure that we don't move TMU reads around CS barriers. total instructions in shared programs: 13602500 -> 13597851 (-0.03%) instructions in affected programs: 2681428 -> 2676779 (-0.17%) helped: 6567 HURT: 4960 Instructions are helped. total max-temps in shared programs: 2317927 -> 2317914 (<.01%) max-temps in affected programs: 13861 -> 13848 (-0.09%) helped: 355 HURT: 300 Inconclusive result (value mean confidence interval includes 0). total sfu-stalls in shared programs: 32074 -> 32247 (0.54%) sfu-stalls in affected programs: 848 -> 1021 (20.40%) helped: 160 HURT: 327 Inconclusive result (%-change mean confidence interval includes 0). total inst-and-stalls in shared programs: 13634574 -> 13630098 (-0.03%) inst-and-stalls in affected programs: 2703041 -> 2698565 (-0.17%) helped: 6558 HURT: 5020 Inst-and-stalls are helped. Reviewed-by: Alejandro PiƱeiro <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9856> --- src/broadcom/compiler/qpu_schedule.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index a6430ac2195..e6a07723618 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -79,6 +79,7 @@ struct schedule_state { struct schedule_node *last_vpm_read; struct schedule_node *last_tmu_write; struct schedule_node *last_tmu_config; + struct schedule_node *last_tmu_read; struct schedule_node *last_tlb; struct schedule_node *last_vpm; struct schedule_node *last_unif; @@ -248,6 +249,7 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n, * barriers to affect ALU operations. */ add_write_dep(state, &state->last_tmu_write, n); + add_write_dep(state, &state->last_tmu_read, n); break; case V3D_QPU_WADDR_UNIFA: @@ -407,7 +409,9 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) if (v3d_qpu_waits_on_tmu(inst)) { /* TMU loads are coming from a FIFO, so ordering is important. */ - add_write_dep(state, &state->last_tmu_config, n); + add_write_dep(state, &state->last_tmu_read, n); + /* Keep TMU loads after their TMU lookup terminator */ + add_read_dep(state, state->last_tmu_config, n); } /* Allow wrtmuc to be reordered with other instructions in the _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
