On Tue, 2016-12-20 at 16:31 -0800, Jason Ekstrand wrote: > On Sun, Dec 18, 2016 at 9:47 PM, Timothy Arceri <timothy.arceri@colla > bora.com> wrote: > > V2: > > - tidy ups suggested by Connor. > > - tidy up cloning logic and handle copy propagation > > based of suggestion by Connor. > > - use nir_ssa_def_rewrite_uses to fix up lcssa phis > > suggested by Connor. > > - add support for complex loop unrolling (two terminators) > > - handle case were the ssa defs use outside the loop is already a > > phi > > - support unrolling loops with multiple terminators when trip count > > is know for each terminator > > > > V3: > > - set correct num_components when creating phi in complex unroll > > - rewrite update remap table based on Jasons suggestions. > > - remove unrequired extract_loop_body() helper as suggested by > > Jason. > > - simplify the lcssa phi fix up code for simple loops as per Jasons > > suggestions. > > - use mem context to keep track of hash table memory as suggested > > by Jason. > > - move is_{complex,simple}_loop helpers to the unroll code > > - require nir_metadata_block_index > > - partially rewrote complex unroll to be simpler and easier to > > follow. > > > > V4: > > - use rzalloc() when creating nir_phi_src but not setting pred > > right away > > fixes regression cause by ralloc() no longer zeroing memory. > > > > V5: > > - simplify calling of complex_unroll() > > - use new loop terminator fields to get the break/continue from > > blocks > > and simplify loop unrolling code > > - handle slightly less trivial loop terminators. if branches can > > now have instructions but can only contain a single block. > > - use nir print type IR snippets in unroll function descriptions > > - add better explanation and variable for why we need to clone > > additional times when the second terminator it the limiting > > terminator. > > - partially convert out of ssa before unrolling loops (suggested by > > Jason) > > --- > > src/compiler/Makefile.sources | 1 + > > src/compiler/nir/nir.h | 2 + > > src/compiler/nir/nir_opt_loop_unroll.c | 559 > > +++++++++++++++++++++++++++++++++ > > 3 files changed, 562 insertions(+) > > create mode 100644 src/compiler/nir/nir_opt_loop_unroll.c > > > > diff --git a/src/compiler/Makefile.sources > > b/src/compiler/Makefile.sources > > index e8f7b02..ae3e5f0 100644 > > --- a/src/compiler/Makefile.sources > > +++ b/src/compiler/Makefile.sources > > @@ -239,6 +239,7 @@ NIR_FILES = \ > > nir/nir_opt_dead_cf.c \ > > nir/nir_opt_gcm.c \ > > nir/nir_opt_global_to_local.c \ > > + nir/nir_opt_loop_unroll.c \ > > nir/nir_opt_peephole_select.c \ > > nir/nir_opt_remove_phis.c \ > > nir/nir_opt_undef.c \ > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > > index 75a91ea..51bc6b2 100644 > > --- a/src/compiler/nir/nir.h > > +++ b/src/compiler/nir/nir.h > > @@ -2552,6 +2552,8 @@ bool nir_opt_dead_cf(nir_shader *shader); > > > > bool nir_opt_gcm(nir_shader *shader, bool value_number); > > > > +bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode > > indirect_mask); > > + > > bool nir_opt_peephole_select(nir_shader *shader, unsigned limit); > > > > bool nir_opt_remove_phis(nir_shader *shader); > > diff --git a/src/compiler/nir/nir_opt_loop_unroll.c > > b/src/compiler/nir/nir_opt_loop_unroll.c > > new file mode 100644 > > index 0000000..7eb44cb > > --- /dev/null > > +++ b/src/compiler/nir/nir_opt_loop_unroll.c > > @@ -0,0 +1,559 @@ > > +/* > > + * Copyright © 2016 Intel Corporation > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > + * copy of this software and associated documentation files (the > > "Software"), > > + * to deal in the Software without restriction, including without > > limitation > > + * the rights to use, copy, modify, merge, publish, distribute, > > sublicense, > > + * and/or sell copies of the Software, and to permit persons to > > whom the > > + * Software is furnished to do so, subject to the following > > conditions: > > + * > > + * The above copyright notice and this permission notice > > (including the next > > + * paragraph) shall be included in all copies or substantial > > portions of the > > + * Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > > OTHER > > + * DEALINGS IN THE SOFTWARE. > > + */ > > + > > +#include "nir.h" > > +#include "nir_builder.h" > > +#include "nir_control_flow.h" > > +#include "nir_loop_analyze.h" > > + > > +/* Convert all phis in the give block to regs, here we insert a > > mov in the > > + * pred block of the phi source to copy the src to the reg, then > > we rewrite > > + * all uses of the phi to the new reg. > > + */ > > +static void > > +convert_phis_to_regs(nir_builder *b, nir_block *block) > > +{ > > In patch 1/6 of the series I sent yesterday I added a slightly more > advanced version of this helper to nir_from_ssa as well as another > one I found useful in my pass. You can either pull that into your > series or I can split it into three patches: Pull yours into to_ssa, > improve it, add the second. I don't really care which so long as we > end up sharing the code. > > > + nir_foreach_instr_safe(instr, block) { > > + if (instr->type != nir_instr_type_phi) > > + break; > > + > > + nir_phi_instr *phi = nir_instr_as_phi(instr); > > + > > + nir_register *reg = nir_local_reg_create(b->impl); > > + reg->num_components = phi->dest.ssa.num_components; > > + reg->bit_size = phi->dest.ssa.bit_size; > > + > > + nir_foreach_phi_src(src, phi) { > > + nir_alu_instr *mov = nir_alu_instr_create(b->shader, > > nir_op_imov); > > + nir_src_copy(&mov->src[0].src, &src->src, mov); > > + mov->dest.dest = nir_dest_for_reg(reg); > > + mov->dest.write_mask = (1 << reg->num_components) - 1; > > + > > + nir_instr_insert(nir_after_block_before_jump(src->pred), > > + &mov->instr); > > + } > > + > > + nir_alu_instr *mov = nir_alu_instr_create(b->shader, > > nir_op_imov); > > + mov->src[0].src = nir_src_for_reg(reg); > > + mov->dest.write_mask = (1 << reg->num_components) - 1; > > + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, reg- > > >num_components, > > + reg->bit_size, NULL); > > + nir_instr_insert(nir_after_instr(&phi->instr), &mov->instr); > > + > > + nir_ssa_def_rewrite_uses(&phi->dest.ssa, > > + nir_src_for_ssa(&mov- > > >dest.dest.ssa)); > > + nir_instr_remove(&phi->instr); > > + } > > +} > > + > > +/* Convert the phis from the loops first block and the block that > > follows the > > + * loop into regs. Partially converting out of SSA allows us to > > unroll the > > + * loop without having to keep track of and update phis along the > > way which > > + * gets tricky and doesn't add much value over conveting to regs. > > + */ > > +static void > > +convert_loop_phis_to_reg(nir_builder *b, nir_loop *loop, > > + nir_block *header_blk) > > +{ > > + convert_phis_to_regs(b, header_blk); > > + > > + nir_block *block_after_loop = > > + nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); > > + > > + convert_phis_to_regs(b, block_after_loop); > > +} > > + > > +/** > > + * Unroll a loop where we know exactly how many iterations there > > are and there > > + * is only a single exit point. Note here we can unroll loops > > with multiple > > + * theoretical exits that only have a single terminating exit that > > we always > > + * know is the "real" exit. > > + * > > + * loop { > > + * ...instrs... > > + * } > > + * > > + * And the iteration count is 3, the output will be: > > + * > > + * ...instrs... ...instrs... ...instrs... > > + */ > > +static void > > +simple_unroll(nir_loop *loop, nir_builder *b) > > +{ > > + nir_loop_terminator *limiting_term = loop->info- > > >limiting_terminator; > > + assert(nir_is_trivial_loop_if(limiting_term->nif, > > + limiting_term->break_block)); > > + > > + nir_convert_loop_to_lcssa(loop); > > + > > + nir_block *header_blk = nir_loop_first_block(loop); > > + convert_loop_phis_to_reg(b, loop, header_blk); > > + > > + /* Skip over loop terminator and get the loop body. */ > > + list_for_each_entry(nir_loop_terminator, terminator, > > + &loop->info->loop_terminator_list, > > loop_terminator_link) { > > This hits 81 characters > > > + > > + /* Remove all but the limiting terminator as we know the > > other exit > > + * conditions can never be met. Note we need to extract any > > instructions > > + * in the continue from branch and insert then into the loop > > body before > > + * removing it. > > + */ > > + if (terminator->nif != limiting_term->nif) { > > + assert(nir_is_trivial_loop_if(terminator->nif, > > + terminator->break_block)); > > + > > + nir_cf_list continue_from_blk; > > + nir_cf_extract(&continue_from_blk, > > + nir_before_block(terminator- > > >continue_from_block), > > + nir_after_block(terminator- > > >continue_from_block)); > > + b->cursor = nir_after_cf_node(&terminator->nif->cf_node); > > + nir_cf_reinsert(&continue_from_blk, b->cursor); > > + > > + nir_cf_node_remove(&terminator->nif->cf_node); > > + } > > + } > > + > > + /* Pluck out the loop header */ > > + nir_cf_list lp_header; > > + nir_cf_extract(&lp_header, nir_before_block(header_blk), > > + nir_before_cf_node(&limiting_term->nif- > > >cf_node)); > > + > > + /* Add the continue from block of the limiting terminator to > > the loop body > > + */ > > + nir_cf_list continue_from_blk; > > + nir_cf_extract(&continue_from_blk, > > + nir_before_block(limiting_term- > > >continue_from_block), > > + nir_after_block(limiting_term- > > >continue_from_block)); > > + b->cursor = nir_after_cf_node(&limiting_term->nif->cf_node); > > + nir_cf_reinsert(&continue_from_blk, b->cursor); > > + > > + /* Pluck out the loop body */ > > + nir_cf_list loop_body; > > + nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term- > > >nif->cf_node), > > + nir_after_block(nir_loop_last_block(loop))); > > + > > + struct hash_table *remap_table = > > + _mesa_hash_table_create(NULL, _mesa_hash_pointer, > > + _mesa_key_pointer_equal); > > + > > + /* Clone the loop header */ > > + nir_cf_list cloned_header; > > + nir_cf_list_clone(&cloned_header, &lp_header, loop- > > >cf_node.parent, > > + remap_table); > > + > > + /* Insert cloned loop header before the loop */ > > + b->cursor = nir_before_cf_node(&loop->cf_node); > > + nir_cf_reinsert(&cloned_header, b->cursor); > > + > > + /* Temp list to store the cloned loop body as we unroll */ > > + nir_cf_list unrolled_lp_body; > > + > > + /* Clone loop header and append to the loop body */ > > + for (unsigned i = 0; i < loop->info->trip_count; i++) { > > + /* Clone loop body */ > > + nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop- > > >cf_node.parent, > > + remap_table); > > + > > + /* Insert unrolled loop body before the loop */ > > + b->cursor = nir_before_cf_node(&loop->cf_node); > > + nir_cf_reinsert(&unrolled_lp_body, b->cursor); > > + > > + /* Clone loop header */ > > + nir_cf_list_clone(&cloned_header, &lp_header, loop- > > >cf_node.parent, > > + remap_table); > > + > > + /* Insert loop header after loop body */ > > + b->cursor = nir_before_cf_node(&loop->cf_node); > > Why are we using a builder? Why not just have a nir_cursor cursor > variable? We're not inserting any instructions. > > > + nir_cf_reinsert(&cloned_header, b->cursor); > > For that matter, this could just be "nir_cf_reinsert(&cloned_header, > nir_before_cf_node(&loop->cf_node))" > > > + } > > + > > + /* Remove the break from the loop terminator and add > > instructions from > > + * the break block after the unrolled loop. > > + */ > > + nir_instr *break_instr = nir_block_last_instr(limiting_term- > > >break_block); > > + nir_instr_remove(break_instr); > > + nir_cf_list break_blk; > > + nir_cf_extract(&break_blk, > > + nir_before_block(limiting_term->break_block), > > + nir_after_block(limiting_term->break_block)); > > + > > + /* Clone so things get properly remapped */ > > + nir_cf_list cloned_break_blk; > > + nir_cf_list_clone(&cloned_break_blk, &break_blk, loop- > > >cf_node.parent, > > + remap_table); > > + > > + b->cursor = nir_before_cf_node(&loop->cf_node); > > + nir_cf_reinsert(&cloned_break_blk, b->cursor); > > + > > + /* Remove the loop */ > > + nir_cf_node_remove(&loop->cf_node); > > + > > + /* Delete the original loop body, break block & header */ > > + nir_cf_delete(&lp_header); > > + nir_cf_delete(&loop_body); > > + nir_cf_delete(&break_blk); > > + > > + _mesa_hash_table_destroy(remap_table, NULL); > > + > > I *think* you can just convert to SSA once at the end but this is > probably safer.
Yeah I think we has discussed this before. Since we only unroll a single loop per pass I don't think it matters where we call this. > > > + nir_convert_to_ssa_impl(b->impl); > > +} > > + > > +static void > > +move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator > > *term) > > +{ > > + /* Move the rest of the loop inside the continue-from-block */ > > + nir_cf_reinsert(lst, nir_after_block(term- > > >continue_from_block)); > > + > > + /* Remove the break */ > > + nir_instr_remove(nir_block_last_instr(term->break_block)); > > +} > > + > > +/** > > + * Unroll a loop with two exists when the trip count of one of the > > exits is > > + * unknown. If continue_from_then is true, the loop is repeated > > only when the > > + * "then" branch of the if is taken; otherwise it is repeated only > > + * when the "else" branch of the if is taken. > > + * > > + * For example, if the input is: > > + * > > + * loop { > > + * ...phis/condition... > > + * if condition { > > + * ...then instructions... > > + * } else { > > + * ...continue instructions... > > + * break > > + * } > > + * ...body... > > + * } > > + * > > + * And the iteration count is 3, and unlimit_term- > > >continue_from_then is true, > > + * then the output will be: > > + * > > + * ...condition... > > + * if condition { > > + * ...then instructions... > > + * ...body... > > + * if condition { > > + * ...then instructions... > > + * ...body... > > + * if condition { > > + * ...then instructions... > > + * ...body... > > + * } else { > > + * ...continue instructions... > > + * } > > + * } else { > > + * ...continue instructions... > > + * } > > + * } else { > > + * ...continue instructions... > > + * } > > + */ > > +static void > > +complex_unroll(nir_loop *loop, nir_builder *b, > > + nir_loop_terminator *unlimit_term, bool > > limiting_term_second) > > +{ > > + assert(nir_is_trivial_loop_if(unlimit_term->nif, > > + unlimit_term->break_block)); > > + > > + nir_loop_terminator *limiting_term = loop->info- > > >limiting_terminator; > > + assert(nir_is_trivial_loop_if(limiting_term->nif, > > + limiting_term->break_block)); > > + > > + nir_convert_loop_to_lcssa(loop); > > + > > + nir_block *header_blk = nir_loop_first_block(loop); > > + convert_loop_phis_to_reg(b, loop, header_blk); > > + > > + /* Pluck out the loop header */ > > + nir_cf_list lp_header; > > + nir_cf_extract(&lp_header, nir_before_block(header_blk), > > + nir_before_cf_node(nir_cf_node_next(&header_blk- > > >cf_node))); > > Why not just use nir_after_block(header_blk)? Agreed. > Much simpler. Also, do we really only want to grab the header block > and not everything up until the first terminator? Yeah I think you are right. > > > + > > + nir_cf_list limit_break_blk; > > + unsigned num_times_to_clone; > > + if (limiting_term_second) { > > + /* We need some special handling when its the second > > terminator causing > > + * us to exit the loop for example: > > + * > > + * for (int i = 0; i < uniform_lp_count; i++) { > > + * colour = vec4(0.0, 1.0, 0.0, 1.0); > > + * > > + * if (i == 1) { > > + * break; > > + * } > > + * ... any further code is unreachable after i == 1 ... > > + * } > > + */ > > + nir_cf_list after_lt; > > + nir_if *limit_if = limiting_term->nif; > > + nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if- > > >cf_node), > > + nir_after_block(nir_loop_last_block(loop))); > > + move_cf_list_into_loop_term(&after_lt, limiting_term); > > + > > + /* Because the trip count is the number of times we pass > > over the extire > > + * loop before hitting a break when the second terminator is > > the > > + * limiting terminator we can actually execute code inside > > the loop when > > + * trip count == 0 e.g. the code above the break. So we > > need to bump > > + * the trip_count in order for the code below to clone > > anything. When > > + * trip count == 1 we execute the code above the loop twice > > and the code > > + * below it once so we need clone things twice and so on. > > This comment is way better than I remember it being. Thanks! > > > + */ > > + num_times_to_clone = loop->info->trip_count + 1; > > + } else { > > + /* Remove the break then extract instructions from the break > > block so we > > + * can insert them in the innermost else of the unrolled > > loop. > > + */ > > + nir_instr *break_instr = nir_block_last_instr(limiting_term- > > >break_block); > > + nir_instr_remove(break_instr); > > + nir_cf_extract(&limit_break_blk, > > + nir_before_block(limiting_term->break_block), > > + nir_after_block(limiting_term->break_block)); > > + > > + nir_cf_list continue_blk; > > + nir_cf_extract(&continue_blk, > > + nir_before_block(limiting_term- > > >continue_from_block), > > + nir_after_block(limiting_term- > > >continue_from_block)); > > These two extracts assume that the continue and break sides of the if > each contain a single block. Is this a valid assumption? Hmm is was until I made the analysis pass better in this lastest version. Will fix. > > > + > > + b->cursor = nir_after_cf_node(&limiting_term->nif->cf_node); > > + nir_cf_reinsert(&continue_blk, b->cursor); > > This is a nice simplification, but I'm not sure it's valid... If you > have real stuff in the continue I think it will end up getting > executed in the last iteration. I think that's what we want though, as here trip count == num_times_to_clone which means we iterate the number of times we did a full pass over the loop exiting from the continue. There is further special handing below to add the header/break branch instructions to the end of unrolling. > > > + > > + nir_cf_node_remove(&limiting_term->nif->cf_node); > > + > > + num_times_to_clone = loop->info->trip_count; > > + } > > + > > + /* In the terminator that we have no trip count for move > > everything after > > + * the terminator into the continue from branch. > > + */ > > + nir_cf_list loop_end; > > + nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif- > > >cf_node), > > + nir_after_block(nir_loop_last_block(loop))); > > + move_cf_list_into_loop_term(&loop_end, unlimit_term); > > + > > + /* Pluck out the loop body. */ > > + nir_cf_list loop_body; > > + nir_cf_extract(&loop_body, > > nir_before_block(nir_loop_first_block(loop)), > > + nir_after_block(nir_loop_last_block(loop))); > > + > > + struct hash_table *remap_table = > > + _mesa_hash_table_create(NULL, _mesa_hash_pointer, > > + _mesa_key_pointer_equal); > > + > > + /* Set the cursor to before the loop */ > > + b->cursor = nir_before_cf_node(&loop->cf_node); > > + > > + /* Temp lists to store the cloned loop as we unroll */ > > + nir_cf_list unrolled_lp_body; > > + nir_cf_list cloned_header; > > + > > + for (unsigned i = 0; i < num_times_to_clone; i++) { > > + /* Clone loop header */ > > + nir_cf_list_clone(&cloned_header, &lp_header, loop- > > >cf_node.parent, > > + remap_table); > > + > > + /* Insert cloned loop header */ > > + nir_cf_reinsert(&cloned_header, b->cursor); > > + > > + /* Clone loop body */ > > + nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop- > > >cf_node.parent, > > + remap_table); > > + > > + nir_cf_node *last_node = > > + exec_node_data(nir_cf_node, > > + > > exec_list_get_tail(&unrolled_lp_body.list), node); > > + assert(last_node->type == nir_cf_node_block && > > + exec_list_is_empty(&nir_cf_node_as_block(last_node)- > > >instr_list)); > > + > > + /* Insert unrolled loop body */ > > + nir_cf_reinsert(&unrolled_lp_body, b->cursor); > > I don't believe your use of cursors here is valid. Once you've > inserted something at a cursor that cursor should be assumed to be > invalid. Instead, I think you probably want to keep track of the > current if (if any) and always insert after the last block in the if > or before the loop as needed. Yes, that's annoying, but I'm not > quite sure how what you have here even works and I wouldn't trust it > to with changes in nir_cursor. That's what I had but it worked without it. I'll add it back in. > > > + > > + nir_cf_node *if_node = nir_cf_node_prev(last_node); > > + assert(if_node->type == nir_cf_node_if); > > The cast helper does this for you now. > > > + nir_if *if_stmt = nir_cf_node_as_if(if_node); > > + > > + /* Set the cursor to the last if in the loop body we just > > unrolled ready > > + * for the next iteration. > > + */ > > + if (unlimit_term->continue_from_then) { > > + b->cursor = > > nir_after_block(nir_if_last_then_block(if_stmt)); > > + } else { > > + b->cursor = > > nir_after_block(nir_if_last_else_block(if_stmt)); > > + } > > + } > > + > > + if (!limiting_term_second) { > > + nir_cf_list_clone(&cloned_header, &lp_header, loop- > > >cf_node.parent, > > + remap_table); > > + > > + /* Insert cloned loop header */ > > + nir_cf_reinsert(&cloned_header, b->cursor); > > + > > + /* Clone so things get properly remapped, and insert break > > block from > > + * the limiting terminator. > > + */ > > + nir_cf_list cloned_break_blk; > > + nir_cf_list_clone(&cloned_break_blk, &limit_break_blk, > > + loop->cf_node.parent, remap_table); > > + > > + nir_cf_reinsert(&cloned_break_blk, b->cursor); > > + nir_cf_delete(&limit_break_blk); > > + } > > + > > + /* The loop has been unrolled so remove it. */ > > + nir_cf_node_remove(&loop->cf_node); > > + > > + /* Delete the original loop header and body */ > > + nir_cf_delete(&lp_header); > > + nir_cf_delete(&loop_body); > > + > > + _mesa_hash_table_destroy(remap_table, NULL); > > + > > + nir_convert_to_ssa_impl(b->impl); > > +} > > + > > +static bool > > +is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info > > *li) > > +{ > > + unsigned max_iter = shader->options->max_unroll_iterations; > > + > > + if (li->trip_count > max_iter) > > + return false; > > + > > + if (li->force_unroll) > > + return true; > > + > > + bool loop_not_too_large = > > + li->num_instructions * li->trip_count <= max_iter * 25; > > + > > + return loop_not_too_large; > > +} > > + > > +static bool > > +process_loops(nir_cf_node *cf_node, nir_builder *b, bool > > *innermost_loop) > > +{ > > + bool progress = false; > > + nir_loop *loop; > > + > > + switch (cf_node->type) { > > + case nir_cf_node_block: > > + return progress; > > + case nir_cf_node_if: { > > + nir_if *if_stmt = nir_cf_node_as_if(cf_node); > > + foreach_list_typed_safe(nir_cf_node, nested_node, node, > > &if_stmt->then_list) > > + progress |= process_loops(nested_node, b, > > innermost_loop); > > + foreach_list_typed_safe(nir_cf_node, nested_node, node, > > &if_stmt->else_list) > > + progress |= process_loops(nested_node, b, > > innermost_loop); > > + return progress; > > + } > > + case nir_cf_node_loop: { > > + loop = nir_cf_node_as_loop(cf_node); > > + foreach_list_typed_safe(nir_cf_node, nested_node, node, > > &loop->body) > > + progress |= process_loops(nested_node, b, > > innermost_loop); > > + break; > > + } > > + default: > > + unreachable("unknown cf node type"); > > + } > > + > > + if (*innermost_loop) { > > + /* Don't attempt to unroll outer loops or a second inner > > loop in > > + * this pass wait until the next pass as we have altered the > > cf. > > + */ > > + *innermost_loop = false; > > + > > + if (loop->info->limiting_terminator == NULL) > > + return progress; > > + > > + if (!is_loop_small_enough_to_unroll(b->shader, loop->info)) > > + return progress; > > + > > + if (loop->info->is_trip_count_known) { > > + simple_unroll(loop, b); > > + progress = true; > > + } else { > > + /* Attempt to unroll loops with two terminators. */ > > + unsigned num_lt = list_length(&loop->info- > > >loop_terminator_list); > > + if (num_lt == 2) { > > + bool limiting_term_second = true; > > + nir_loop_terminator *terminator = > > + list_last_entry(&loop->info->loop_terminator_list, > > + nir_loop_terminator, > > loop_terminator_link); > > + > > + > > + if (terminator->nif == loop->info- > > >limiting_terminator->nif) { > > + limiting_term_second = false; > > + terminator = > > + list_first_entry(&loop->info- > > >loop_terminator_list, > > + nir_loop_terminator, > > loop_terminator_link); > > + } > > + > > + /* If the first terminator has a trip count of zero > > and is the > > + * limiting terminator just do a simple unroll as the > > second > > + * terminator can never be reached. > > + */ > > + if (loop->info->trip_count == 0 && > > !limiting_term_second) { > > + simple_unroll(loop, b); > > + } else { > > + complex_unroll(loop, b, terminator, > > limiting_term_second); > > + } > > + progress = true; > > + } > > + } > > + } > > + > > + return progress; > > +} > > + > > +static bool > > +nir_opt_loop_unroll_impl(nir_function_impl *impl, > > + nir_variable_mode indirect_mask) > > +{ > > + bool progress = false; > > + nir_metadata_require(impl, nir_metadata_loop_analysis, > > indirect_mask); > > + nir_metadata_require(impl, nir_metadata_block_index); > > + > > + nir_builder b; > > + nir_builder_init(&b, impl); > > + > > + foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { > > + bool innermost_loop = true; > > + progress |= process_loops(node, &b, &innermost_loop); > > + } > > + > > + return progress; > > +} > > + > > +bool > > +nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode > > indirect_mask) > > +{ > > + bool progress = false; > > + > > + nir_foreach_function(function, shader) { > > + if (function->impl) { > > + progress |= nir_opt_loop_unroll_impl(function->impl, > > indirect_mask); > > + } > > + } > > + return false; > > +} > > -- > > 2.9.3 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev