; > > +/* Determine whether LOOP contains floating-point computation. */ > +bool > +loop_has_FP_comp(struct loop *loop) > +{ > + rtx set, dest;
This probably should be extended to detect other long latency operations in the future. > + > + if (ix86_tune != PROCESSOR_COREI7_64 && > + ix86_tune != PROCESSOR_COREI7_32) > + return nunroll; Is it better to generalize it and model the LSD and LSD size in the target model description? -- probably a different patch for that. > + > + /* Look for instructions that store a constant into HImode (16-bit) > + memory. These require a length-changing prefix and on corei7 are > + prone to LCP stalls. These stalls can be avoided if the loop > + is streamed from the loop stream detector. */ > + body = get_loop_body (loop); > + for (i = 0; i < loop->num_nodes && !found; i++) > + { > + bb = body[i]; > + > + FOR_BB_INSNS (bb, insn) > + { > + rtx set_expr; > + set_expr = single_set (insn); > + if (set_expr != NULL_RTX > + && GET_MODE (SET_DEST (set_expr)) == HImode > + && CONST_INT_P (SET_SRC (set_expr)) > + && MEM_P (SET_DEST (set_expr))) > + { > + found = true; > + break; > + } > + } > + } > + free (body); Probably generalize this to handle other long latency FE stalls -- for now it only handles LCP stalls. > + > + if (!found) > + return nunroll; > + > + /* Don't reduce unroll factor in loops with floating point > + computation, which tend to benefit more heavily from > + larger unroll factors and are less likely to bottleneck > + at the decoder. */ > + has_FP = loop_has_FP_comp(loop); > + if (has_FP) > + return nunroll; > + > + if (dump_file) > + { > + fprintf (dump_file, > + ";; Loop contains HImode store of const (possible LCP > stalls),\n"); > + fprintf (dump_file, > + " reduce unroll factor to fit into Loop Stream Detector\n"); > + } > + > + /* On corei7 the loop stream detector can hold about 28 instructions, so > + don't allow unrolling to exceed that. */ > + newunroll = 28 / loop->av_ninsns; Is 28 number of instructions or number of uOps? thanks, David > + if (newunroll < nunroll) > + return newunroll; > + > + return nunroll; > +} > + > /* Initialize the GCC target structure. */ > #undef TARGET_RETURN_IN_MEMORY > #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory > @@ -38685,6 +38755,9 @@ ix86_autovectorize_vector_sizes (void) > #define TARGET_INIT_LIBFUNCS darwin_rename_builtins > #endif > > +#undef TARGET_LOOP_UNROLL_ADJUST > +#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust > + > struct gcc_target targetm = TARGET_INITIALIZER; > ^L > #include "gt-i386.h" > > -- > Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413