Hi, this bug report is about cris generating worse code since tree-ssa. The effect is also visible on x86-64. The symptom is that the work horse of adler32.c (from zlib) needs spills in the inner loop, while gcc 3 did not, and those spills go away with -fno-tree-reassoc.
The underlying reason for the spills is register pressure, which could either be rectified by the pressure aware scheduling (which cris doesn't have), or by simply not generating high pressure code to begin with. In this case it's TER which ultimately causes the register pressure to increase, and there are many plans in people minds how to fix this (make TER regpressure aware, do some regpressure scheduling on gimple, or even more ambitious things), but this patch doesn't tackle this. Instead it makes reassoc not generate the situation which causes TER to run wild. TER increasing register pressure is a long standing problem and so it has some heuristics to avoid that. One wobbly heuristic is that it doesn't TER expressions together that have the same base variable as their LHSs. But reassoc generates only anonymous SSA names for its temporary subexpressions, so that TER heuristic doesn't apply. In this testcase it's even the case that reassoc doesn't really change much code (one addition moves from the end to the beginning of the inner loop), so that whole rewriting is even pointless. In any case, let's use copy_ssa_name instead of make_ssa_name, when we have an obvious LHS; that leads to TER making the same decisions with and without -fno-tree-reassoc, leading to the same register pressure and no spills. On x86-64 the effect is: before patch: 48 bytes stackframe, 24 stack accesses (most of them in the loops), 576 bytes codesize; after patch: no stack frame, no stack accesses, 438 bytes codesize On cris: before patch: 64 bytes stack frame, 27 stack access in loops, size of .s 145 lines after patch: 20 bytes stack frame (as it uses callee saved regs, which is also complained about in the bug report), but no stack accesses in loops, size of .s: 125 lines I'm wondering about testcase: should I add an x86-64 specific that tests for no stack accesses, or would that be too constraining in the future? Regstrapped on x86-64-linux, no regressions. Okay for trunk? Ciao, Michael. PR tree-optimization/37916 * tree-ssa-reassoc.c (make_new_ssa_for_def): Use copy_ssa_name. (rewrite_expr_tree, linearize_expr, negate_value, repropagate_negates, attempt_builtin_copysign, reassociate_bb): Likewise. diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c index 971d926e7895..339c3d4e447f 100644 --- a/gcc/tree-ssa-reassoc.c +++ b/gcc/tree-ssa-reassoc.c @@ -1182,7 +1182,7 @@ make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) tree new_lhs, new_debug_lhs = NULL_TREE; tree lhs = gimple_get_lhs (stmt); - new_lhs = make_ssa_name (TREE_TYPE (lhs)); + new_lhs = copy_ssa_name (lhs); gimple_set_lhs (stmt, new_lhs); /* Also need to update GIMPLE_DEBUGs. */ @@ -4512,7 +4512,7 @@ rewrite_expr_tree (gimple *stmt, unsigned int opindex, { gimple *insert_point = find_insert_point (stmt, oe1->op, oe2->op); - lhs = make_ssa_name (TREE_TYPE (lhs)); + lhs = copy_ssa_name (lhs); stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), oe1->op, oe2->op); @@ -4583,7 +4583,7 @@ rewrite_expr_tree (gimple *stmt, unsigned int opindex, unsigned int uid = gimple_uid (stmt); gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op); - lhs = make_ssa_name (TREE_TYPE (lhs)); + lhs = copy_ssa_name (lhs); stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), new_rhs1, oe->op); gimple_set_uid (stmt, uid); @@ -4820,7 +4820,7 @@ linearize_expr (gimple *stmt) gsi = gsi_for_stmt (stmt); gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); - binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), + binrhs = gimple_build_assign (copy_ssa_name (lhs), gimple_assign_rhs_code (binrhs), gimple_assign_lhs (binlhs), gimple_assign_rhs2 (binrhs)); @@ -4909,7 +4909,7 @@ negate_value (tree tonegate, gimple_stmt_iterator *gsip) rhs2 = negate_value (rhs2, &gsi); gsi = gsi_for_stmt (negatedefstmt); - lhs = make_ssa_name (TREE_TYPE (lhs)); + lhs = copy_ssa_name (lhs); gimple_set_visited (negatedefstmt, true); g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); gimple_set_uid (g, gimple_uid (negatedefstmt)); @@ -5245,7 +5245,7 @@ repropagate_negates (void) tree b = gimple_assign_rhs2 (user); gimple_stmt_iterator gsi = gsi_for_stmt (feed); gimple_stmt_iterator gsi2 = gsi_for_stmt (user); - tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); + tree x = copy_ssa_name (gimple_assign_lhs (feed)); gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b); gsi_insert_before (&gsi2, g, GSI_SAME_STMT); gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); @@ -5737,7 +5737,7 @@ attempt_builtin_copysign (vec<operand_entry *> *ops) else new_call = gimple_build_call (gimple_call_fndecl (old_call), 2, mul, arg1); - tree lhs = make_ssa_name (type); + tree lhs = copy_ssa_name (oe->op); gimple_call_set_lhs (new_call, lhs); gimple_set_location (new_call, gimple_location (old_call)); @@ -5748,7 +5748,7 @@ attempt_builtin_copysign (vec<operand_entry *> *ops) /* Handle the CST1 < 0 case by negating the result. */ if (cst1_neg) { - tree negrhs = make_ssa_name (TREE_TYPE (lhs)); + tree negrhs = copy_ssa_name (lhs); gimple *negate_stmt = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); insert_stmt_after (negate_stmt, new_call); @@ -6052,7 +6052,7 @@ reassociate_bb (basic_block bb) if (negate_result) { stmt = SSA_NAME_DEF_STMT (lhs); - tree tmp = make_ssa_name (TREE_TYPE (lhs)); + tree tmp = copy_ssa_name (lhs); gimple_set_lhs (stmt, tmp); if (lhs != new_lhs) tmp = new_lhs;