Re: [Mesa-dev] [PATCH 2/2] i965: Use is_scheduling_barrier instead of schedule_node::is_barier.
On Wed, Oct 18, 2017 at 4:59 PM, Kenneth Graunke wrote: > Commit a73116ecc60414ade89802150b tried to make add_barrier_deps() > walk to the next barrier, and stop. To accomplish that, it added an > is_barrier flag. Unfortunately, this only works half of the time. > > The issue is that add_barrier_deps() walks both backward (to the > previous barrier), and forward (to the next barrier). It also sets > is_barrier. Assuming that we're processing instructions in forward > order, this means that is_barrier will be set for previous instructions, > but not future ones. So we'll never see it, and walk further than we > need to. Dang. I should have realized that in my original patch. With the typo Dylan pointed out fixed, both are: Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] i965: Use is_scheduling_barrier instead of schedule_node::is_barier.
typo in subject "is_barier" -> "is_barrier" Quoting Kenneth Graunke (2017-10-18 16:59:12) > Commit a73116ecc60414ade89802150b tried to make add_barrier_deps() > walk to the next barrier, and stop. To accomplish that, it added an > is_barrier flag. Unfortunately, this only works half of the time. > > The issue is that add_barrier_deps() walks both backward (to the > previous barrier), and forward (to the next barrier). It also sets > is_barrier. Assuming that we're processing instructions in forward > order, this means that is_barrier will be set for previous instructions, > but not future ones. So we'll never see it, and walk further than we > need to. > > dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 > now compiles its shaders in 3.6 seconds instead of 3.3 minutes. > --- > src/intel/compiler/brw_schedule_instructions.cpp | 32 > > 1 file changed, 10 insertions(+), 22 deletions(-) > > diff --git a/src/intel/compiler/brw_schedule_instructions.cpp > b/src/intel/compiler/brw_schedule_instructions.cpp > index b3f7e877c80..a1e825c661c 100644 > --- a/src/intel/compiler/brw_schedule_instructions.cpp > +++ b/src/intel/compiler/brw_schedule_instructions.cpp > @@ -94,8 +94,6 @@ public: > * successors is an exit node. > */ > schedule_node *exit; > - > - bool is_barrier; > }; > > /** > @@ -800,7 +798,6 @@ schedule_node::schedule_node(backend_instruction *inst, > this->cand_generation = 0; > this->delay = 0; > this->exit = NULL; > - this->is_barrier = false; > > /* We can't measure Gen6 timings directly but expect them to be much > * closer to Gen7 than Gen4. > @@ -921,6 +918,14 @@ instruction_scheduler::add_dep(schedule_node *before, > schedule_node *after) > add_dep(before, after, before->latency); > } > > +static bool > +is_scheduling_barrier(const backend_instruction *inst) > +{ > + return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || > + inst->is_control_flow() || > + inst->has_side_effects(); > +} > + > /** > * Sometimes we really want this node to execute after everything that > * was before it and before everything that followed it. This adds > @@ -932,12 +937,10 @@ instruction_scheduler::add_barrier_deps(schedule_node > *n) > schedule_node *prev = (schedule_node *)n->prev; > schedule_node *next = (schedule_node *)n->next; > > - n->is_barrier = true; > - > if (prev) { >while (!prev->is_head_sentinel()) { > add_dep(prev, n, 0); > - if (prev->is_barrier) > + if (is_scheduling_barrier(prev->inst)) > break; > prev = (schedule_node *)prev->prev; >} > @@ -946,7 +949,7 @@ instruction_scheduler::add_barrier_deps(schedule_node *n) > if (next) { >while (!next->is_tail_sentinel()) { > add_dep(n, next, 0); > - if (next->is_barrier) > + if (is_scheduling_barrier(next->inst)) > break; > next = (schedule_node *)next->next; >} > @@ -962,14 +965,6 @@ fs_instruction_scheduler::is_compressed(fs_inst *inst) > return inst->exec_size == 16; > } > > -static bool > -is_scheduling_barrier(const fs_inst *inst) > -{ > - return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || > - inst->is_control_flow() || > - inst->has_side_effects(); > -} > - > void > fs_instruction_scheduler::calculate_deps() > { > @@ -1233,13 +1228,6 @@ fs_instruction_scheduler::calculate_deps() > } > } > > -static bool > -is_scheduling_barrier(const vec4_instruction *inst) > -{ > - return inst->is_control_flow() || > - inst->has_side_effects(); > -} > - > void > vec4_instruction_scheduler::calculate_deps() > { > -- > 2.14.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i965: Use is_scheduling_barrier instead of schedule_node::is_barier.
Commit a73116ecc60414ade89802150b tried to make add_barrier_deps() walk to the next barrier, and stop. To accomplish that, it added an is_barrier flag. Unfortunately, this only works half of the time. The issue is that add_barrier_deps() walks both backward (to the previous barrier), and forward (to the next barrier). It also sets is_barrier. Assuming that we're processing instructions in forward order, this means that is_barrier will be set for previous instructions, but not future ones. So we'll never see it, and walk further than we need to. dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 now compiles its shaders in 3.6 seconds instead of 3.3 minutes. --- src/intel/compiler/brw_schedule_instructions.cpp | 32 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index b3f7e877c80..a1e825c661c 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -94,8 +94,6 @@ public: * successors is an exit node. */ schedule_node *exit; - - bool is_barrier; }; /** @@ -800,7 +798,6 @@ schedule_node::schedule_node(backend_instruction *inst, this->cand_generation = 0; this->delay = 0; this->exit = NULL; - this->is_barrier = false; /* We can't measure Gen6 timings directly but expect them to be much * closer to Gen7 than Gen4. @@ -921,6 +918,14 @@ instruction_scheduler::add_dep(schedule_node *before, schedule_node *after) add_dep(before, after, before->latency); } +static bool +is_scheduling_barrier(const backend_instruction *inst) +{ + return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || + inst->is_control_flow() || + inst->has_side_effects(); +} + /** * Sometimes we really want this node to execute after everything that * was before it and before everything that followed it. This adds @@ -932,12 +937,10 @@ instruction_scheduler::add_barrier_deps(schedule_node *n) schedule_node *prev = (schedule_node *)n->prev; schedule_node *next = (schedule_node *)n->next; - n->is_barrier = true; - if (prev) { while (!prev->is_head_sentinel()) { add_dep(prev, n, 0); - if (prev->is_barrier) + if (is_scheduling_barrier(prev->inst)) break; prev = (schedule_node *)prev->prev; } @@ -946,7 +949,7 @@ instruction_scheduler::add_barrier_deps(schedule_node *n) if (next) { while (!next->is_tail_sentinel()) { add_dep(n, next, 0); - if (next->is_barrier) + if (is_scheduling_barrier(next->inst)) break; next = (schedule_node *)next->next; } @@ -962,14 +965,6 @@ fs_instruction_scheduler::is_compressed(fs_inst *inst) return inst->exec_size == 16; } -static bool -is_scheduling_barrier(const fs_inst *inst) -{ - return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || - inst->is_control_flow() || - inst->has_side_effects(); -} - void fs_instruction_scheduler::calculate_deps() { @@ -1233,13 +1228,6 @@ fs_instruction_scheduler::calculate_deps() } } -static bool -is_scheduling_barrier(const vec4_instruction *inst) -{ - return inst->is_control_flow() || - inst->has_side_effects(); -} - void vec4_instruction_scheduler::calculate_deps() { -- 2.14.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev