Hi Juzhe, general remark upfront: Please add function-level comments for all functions. This makes reading and reviewing much easier. I had to sweep back and forth quite a bit.
> + > +static int > +get_last_live_range (const vec<var_live_range> &live_ranges, tree var) > +{ > + unsigned int ix; > + var_live_range *live_range; > + FOR_EACH_VEC_ELT_REVERSE (live_ranges, ix, live_range) > + if (live_range->var == var) > + return ix; > + return -1; > +} >From reading the usage site of this function it looks like we could benefit from having the live ranges be a hash_map as well? That way we wouldn't need to scan through the list every time. Something like hash_map<tree, pair<int, int>>. It looks like we only consider the range end anyway. > + int index = get_last_live_range (live_ranges, var); That way we could avoid some worst-case behavior here for pathological inputs. > + if (index == -1) > + { > + var_live_range range = {var, 0, point}; > + live_ranges.safe_push (range); > + } Please add a comment that we assume the variable is live from the start of this block. > + else > + live_ranges[index].end = point; And here a comment that we will grow the live range for each use. > +static bool > +live_range_conflict_p (const var_live_range &live_range1, > + const var_live_range &live_range2) > +{ > + if (live_range1.start >= live_range2.end) > + return false; > + if (live_range1.end <= live_range2.start) > + return false; > + if (live_range2.start >= live_range1.end) > + return false; > + if (live_range2.end <= live_range1.start) > + return false; > + return true; > +} Rename to live_range_overlap_p and simplify to return a.end >= b.start || b.end >= a.start; > + > +static unsigned int > +max_number_of_live_regs (const basic_block bb, > + const vec<var_live_range> &live_ranges, > + machine_mode biggest_mode, int lmul) > +{ > + unsigned int max_nregs = 0; > + unsigned int i, j, k; > + unsigned int live_point = 0; > + for (i = 0; i < live_ranges.length (); i++) > + { > + auto_vec<var_live_range> conflict_live_ranges; > + var_live_range live_range = live_ranges[i]; > + conflict_live_ranges.safe_push (live_range); > + unsigned int min_point = live_range.start; > + unsigned int max_point = live_range.end; > + for (j = 0; j < live_ranges.length (); j++) > + { > + if (j == i) > + continue; > + if (live_range_conflict_p (live_range, live_ranges[j])) > + { > + conflict_live_ranges.safe_push (live_ranges[j]); > + min_point > + = std::min (min_point, (unsigned int) live_ranges[j].start); > + max_point > + = std::max (max_point, (unsigned int) live_ranges[j].end); > + } > + } > + for (j = min_point; j <= max_point; j++) > + { > + unsigned int nregs = 0; > + for (k = 0; k < conflict_live_ranges.length (); k++) > + { > + if (j >= (unsigned int) conflict_live_ranges[k].start > + && j <= (unsigned int) conflict_live_ranges[k].end) > + { > + machine_mode mode > + = TYPE_MODE (TREE_TYPE (conflict_live_ranges[k].var)); > + nregs += compute_nregs_for_mode (mode, biggest_mode, lmul); > + } > + } > + if (nregs > max_nregs) > + { > + max_nregs = nregs; > + live_point = j; > + } > + } > + } This looks pretty quadratic in the number of live ranges (or even cubic?). Can't it be done more efficiently using a sliding-window approach by sorting the live ranges according to their start point before? Also std::min/max -> MIN/MAX. > + > + /* Collect user explicit RVV type. */ > + hash_set<basic_block> all_preds = get_all_predecessors (bb); > + hash_set<basic_block> all_succs = get_all_successors (bb); As mentioned before, maybe dominator info could help here? > + for (i = 0; i < cfun->gimple_df->ssa_names->length (); i++) > + { > + tree t = ssa_name (i); > + if (!t) > + continue; > + machine_mode mode = TYPE_MODE (TREE_TYPE (t)); > + if (!lookup_vector_type_attribute (TREE_TYPE (t)) > + && !riscv_v_ext_vls_mode_p (mode)) > + continue; > + > + gimple *def = SSA_NAME_DEF_STMT (t); > + if (gimple_bb (def) && !all_preds.contains (gimple_bb (def))) > + continue; > + const ssa_use_operand_t *const head = &(SSA_NAME_IMM_USE_NODE (t)); > + const ssa_use_operand_t *ptr; > + > + for (ptr = head->next; ptr != head; ptr = ptr->next) > + { > + if (USE_STMT (ptr) && !is_gimple_debug (USE_STMT (ptr))) > + { > + if (all_succs.contains (gimple_bb (USE_STMT (ptr)))) > + { Reverse the conditions and continue, i.e. if (!USE_STMT || is_gimple_debug || !all_succs.contains). > + > +static int > +compute_lmul (class loop *loop) > +{ > + unsigned int current_lmul = loop_autovec_infos.get (loop)->current_lmul; > + return current_lmul; > +} return loop_autovec_infos.get (loop)->current_lmul; Besides, the function does not compute so maybe change its name? And assert that we have loop_autovec_infos for that loop. Or maybe inline it altogether. > +bool > +costs::prefer_new_lmul_p (const vector_costs *uncast_other) const > +{ I don't remember how often the cost hooks are actually called so maybe this is unnecessary: Does it make sense to cache the computation results here in case we are called more than once for the same loop? Regarding tests: I would like to have at least one test with global variables that overlap local ones. Regards Robin