On Thu, Apr 14, 2011 at 5:18 PM, Michael Matz <m...@suse.de> wrote: > Hello, > > as my patch for stack-arrays in fortran reveals we a problem that VLA > objects basically prevent all inlining to happen. They're transformed > into alloca calls and those are considered to disable inlining. The > (correct) fear being that inlining a bare alloca call into a loop leads to > unbounded stack growth. > > But the situation is different for alloca calls emitted for dealing with > VLA objects. They always are wrapped with stack_save/stack_restore calls. > Inlining such regions always is okay, even into loops. The stack space > usage will be exactly the same at runtime. > > We have a flag for this already on the CALL_EXPR. But we don't retain it > over tuples, and hence we also don't look at it in > inline_forbidden_p_stmt. This patch fixes both. (The strange testing of > builtin-ness is because in CALL_EXPR the ALLOCA_FOR_VAR_P and > CALL_FROM_THUNK_P flags are overloaded) > > (This fixes the regression of fatigue with the stack-arrays patch) > > regstrapping on x86_64-linux in progress, okay for trunk?
Ok. Thanks, Richard. > > Ciao, > Michael. > > * gimple.h (enum gf_mask): Add GF_CALL_ALLOCA_FOR_VAR. > (gimple_call_set_alloca_for_var): New inline function. > (gimple_call_alloca_for_var_p): Ditto. > * gimple.c (gimple_build_call_from_tree): Remember ALLOCA_FOR_VAR_P > state. > * cfgexpand.c (expand_call_stmt): Restore ALLOCA_FOR_VAR_P state. > > * tree-inline.c (inline_forbidden_p_stmt): Don't reject alloca > calls if they were for VLA objects. > > Index: cfgexpand.c > =================================================================== > *** cfgexpand.c (revision 172431) > --- cfgexpand.c (working copy) > *************** expand_call_stmt (gimple stmt) > *** 1873,1879 **** > > CALL_EXPR_TAILCALL (exp) = gimple_call_tail_p (stmt); > CALL_EXPR_RETURN_SLOT_OPT (exp) = gimple_call_return_slot_opt_p (stmt); > ! CALL_FROM_THUNK_P (exp) = gimple_call_from_thunk_p (stmt); > CALL_CANNOT_INLINE_P (exp) = gimple_call_cannot_inline_p (stmt); > CALL_EXPR_VA_ARG_PACK (exp) = gimple_call_va_arg_pack_p (stmt); > SET_EXPR_LOCATION (exp, gimple_location (stmt)); > --- 1873,1884 ---- > > CALL_EXPR_TAILCALL (exp) = gimple_call_tail_p (stmt); > CALL_EXPR_RETURN_SLOT_OPT (exp) = gimple_call_return_slot_opt_p (stmt); > ! if (decl > ! && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL > ! && DECL_FUNCTION_CODE (decl) == BUILT_IN_ALLOCA) > ! ALLOCA_FOR_VAR_P (exp) = gimple_call_alloca_for_var_p (stmt); > ! else > ! CALL_FROM_THUNK_P (exp) = gimple_call_from_thunk_p (stmt); > CALL_CANNOT_INLINE_P (exp) = gimple_call_cannot_inline_p (stmt); > CALL_EXPR_VA_ARG_PACK (exp) = gimple_call_va_arg_pack_p (stmt); > SET_EXPR_LOCATION (exp, gimple_location (stmt)); > Index: tree-inline.c > =================================================================== > *** tree-inline.c (revision 172431) > --- tree-inline.c (working copy) > *************** inline_forbidden_p_stmt (gimple_stmt_ite > *** 2997,3004 **** > this may change program's memory overhead drastically when the > function using alloca is called in loop. In GCC present in > SPEC2000 inlining into schedule_block cause it to require 2GB of > ! RAM instead of 256MB. */ > if (gimple_alloca_call_p (stmt) > && !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn))) > { > inline_forbidden_reason > --- 2997,3007 ---- > this may change program's memory overhead drastically when the > function using alloca is called in loop. In GCC present in > SPEC2000 inlining into schedule_block cause it to require 2GB of > ! RAM instead of 256MB. Don't do so for alloca calls emitted for > ! VLA objects as those can't cause unbounded growth (they're always > ! wrapped inside stack_save/stack_restore regions. */ > if (gimple_alloca_call_p (stmt) > + && !gimple_call_alloca_for_var_p (stmt) > && !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn))) > { > inline_forbidden_reason > Index: gimple.c > =================================================================== > *** gimple.c (revision 172431) > --- gimple.c (working copy) > *************** gimple_build_call_from_tree (tree t) > *** 303,309 **** > gimple_call_set_tail (call, CALL_EXPR_TAILCALL (t)); > gimple_call_set_cannot_inline (call, CALL_CANNOT_INLINE_P (t)); > gimple_call_set_return_slot_opt (call, CALL_EXPR_RETURN_SLOT_OPT (t)); > ! gimple_call_set_from_thunk (call, CALL_FROM_THUNK_P (t)); > gimple_call_set_va_arg_pack (call, CALL_EXPR_VA_ARG_PACK (t)); > gimple_call_set_nothrow (call, TREE_NOTHROW (t)); > gimple_set_no_warning (call, TREE_NO_WARNING (t)); > --- 303,314 ---- > gimple_call_set_tail (call, CALL_EXPR_TAILCALL (t)); > gimple_call_set_cannot_inline (call, CALL_CANNOT_INLINE_P (t)); > gimple_call_set_return_slot_opt (call, CALL_EXPR_RETURN_SLOT_OPT (t)); > ! if (fndecl > ! && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL > ! && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA) > ! gimple_call_set_alloca_for_var (call, ALLOCA_FOR_VAR_P (t)); > ! else > ! gimple_call_set_from_thunk (call, CALL_FROM_THUNK_P (t)); > gimple_call_set_va_arg_pack (call, CALL_EXPR_VA_ARG_PACK (t)); > gimple_call_set_nothrow (call, TREE_NOTHROW (t)); > gimple_set_no_warning (call, TREE_NO_WARNING (t)); > Index: gimple.h > =================================================================== > *** gimple.h (revision 172431) > --- gimple.h (working copy) > *************** enum gf_mask { > *** 102,107 **** > --- 102,108 ---- > GF_CALL_TAILCALL = 1 << 3, > GF_CALL_VA_ARG_PACK = 1 << 4, > GF_CALL_NOTHROW = 1 << 5, > + GF_CALL_ALLOCA_FOR_VAR = 1 << 6, > GF_OMP_PARALLEL_COMBINED = 1 << 0, > > /* True on an GIMPLE_OMP_RETURN statement if the return does not require > *************** gimple_call_nothrow_p (gimple s) > *** 2329,2334 **** > --- 2330,2358 ---- > return (gimple_call_flags (s) & ECF_NOTHROW) != 0; > } > > + /* If FOR_VAR is true, GIMPLE_CALL S is a call to builtin_alloca that > + is known to be emitted for VLA objects. Those are wrapped by > + stack_save/stack_restore calls and hence can't lead to unbounded > + stack growth even when they occur in loops. */ > + > + static inline void > + gimple_call_set_alloca_for_var (gimple s, bool for_var) > + { > + GIMPLE_CHECK (s, GIMPLE_CALL); > + if (for_var) > + s->gsbase.subcode |= GF_CALL_ALLOCA_FOR_VAR; > + else > + s->gsbase.subcode &= ~GF_CALL_ALLOCA_FOR_VAR; > + } > + > + /* Return true of S is a call to builtin_alloca emitted for VLA objects. */ > + > + static inline bool > + gimple_call_alloca_for_var_p (gimple s) > + { > + GIMPLE_CHECK (s, GIMPLE_CALL); > + return (s->gsbase.subcode & GF_CALL_ALLOCA_FOR_VAR) != 0; > + } > > /* Copy all the GF_CALL_* flags from ORIG_CALL to DEST_CALL. */ > >