Re: [PATCH] widening_mul: Do cost check when propagating mult into plus/minus expressions

Andreas Krebbel Thu, 14 Jul 2011 06:10:03 -0700

On Wed, Jul 13, 2011 at 09:58:08AM -0700, Richard Henderson wrote:
> Why the force_operand?  You've got register inputs.  Either the target
> is going to support the operation or it isn't.


I agree that it doesn't seem to be necessary. I've used force_operand
since ivopts (add_cost) is doing it without seeing a clear reason for
it.  So I've removed it now.

> Saving cost data dependent on speed, which is non-constant.
> You probably need to make this a two dimensional array.

Fixed.

Here is an updated version.

Bye,

-Andreas-

2011-07-14  Andreas Krebbel  <andreas.kreb...@de.ibm.com>

        * tree-ssa-math-opts.c (compute_costs): New function.
        (convert_mult_to_fma): Take costs into account when propagating
        multiplications into several additions.
        * config/s390/s390.c (z196_costs): Adjust costs for madbr and
        maebr.


Index: gcc/tree-ssa-math-opts.c
===================================================================
*** gcc/tree-ssa-math-opts.c.orig
--- gcc/tree-ssa-math-opts.c
*************** convert_plusminus_to_widen (gimple_stmt_
*** 2185,2190 ****
--- 2185,2236 ----
    return true;
  }
  
+ /* Computing the costs for calculating RTX with CODE in MODE.  */
+ 
+ static unsigned
+ compute_costs (enum machine_mode mode, enum rtx_code code, bool speed)
+ {
+   rtx insn;
+   unsigned cost;
+ 
+   switch (GET_RTX_LENGTH (code))
+     {
+     case 2:
+       insn = gen_rtx_fmt_ee (code, mode,
+                            gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
+                            gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2));
+       break;
+     case 3:
+       insn = gen_rtx_fmt_eee (code, mode,
+                             gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
+                             gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2),
+                             gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 3));
+       break;
+     default:
+       gcc_unreachable ();
+     }
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+       fprintf (dump_file, "Calculating costs of %s in %s mode.  RTX is:\n",
+              GET_RTX_NAME (code), GET_MODE_NAME (mode));
+       print_rtl (dump_file, insn);
+     }
+ 
+   cost = rtx_cost (insn, SET, speed);
+ 
+   /* If the backend returns a cost of zero it is most certainly lying.
+      Set this to one in order to notice that we already calculated it
+      once.  */
+   cost = cost ? cost : 1;
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     fprintf (dump_file, "\n%s in %s costs %d\n\n",
+              GET_RTX_NAME (code), GET_MODE_NAME (mode), cost);
+ 
+   return cost;
+ }
+ 
  /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
     with uses in additions and subtractions to form fused multiply-add
     operations.  Returns true if successful and MUL_STMT should be removed.  */
*************** convert_mult_to_fma (gimple mul_stmt, tr
*** 2197,2202 ****
--- 2243,2254 ----
    gimple use_stmt, neguse_stmt, fma_stmt;
    use_operand_p use_p;
    imm_use_iterator imm_iter;
+   enum machine_mode mode;
+   int uses = 0;
+   bool speed = optimize_bb_for_speed_p (gimple_bb (mul_stmt));
+   static unsigned mul_cost[2][NUM_MACHINE_MODES];
+   static unsigned add_cost[2][NUM_MACHINE_MODES];
+   static unsigned fma_cost[2][NUM_MACHINE_MODES];
  
    if (FLOAT_TYPE_P (type)
        && flag_fp_contract_mode == FP_CONTRACT_OFF)
*************** convert_mult_to_fma (gimple mul_stmt, tr
*** 2213,2222 ****
    if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
      return false;
  
    /* Make sure that the multiplication statement becomes dead after
!      the transformation, thus that all uses are transformed to FMAs.
!      This means we assume that an FMA operation has the same cost
!      as an addition.  */
    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
      {
        enum tree_code use_code;
--- 2265,2281 ----
    if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
      return false;
  
+   mode = TYPE_MODE (type);
+ 
+   if (!fma_cost[speed][mode])
+     {
+       fma_cost[speed][mode] = compute_costs (mode, FMA, speed);
+       add_cost[speed][mode] = compute_costs (mode, PLUS, speed);
+       mul_cost[speed][mode] = compute_costs (mode, MULT, speed);
+     }
+ 
    /* Make sure that the multiplication statement becomes dead after
!      the transformation, thus that all uses are transformed to FMAs.  */
    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
      {
        enum tree_code use_code;
*************** convert_mult_to_fma (gimple mul_stmt, tr
*** 2292,2297 ****
--- 2351,2357 ----
        if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
        return false;
  
+       uses++;
        /* While it is possible to validate whether or not the exact form
         that we've recognized is available in the backend, the assumption
         is that the transformation is never a loss.  For instance, suppose
*************** convert_mult_to_fma (gimple mul_stmt, tr
*** 2302,2307 ****
--- 2362,2374 ----
         independant and could be run in parallel.  */
      }
  
+   /* Calculate the costs of moving the multiplication into all the
+      minus/plus expressions.  */
+ 
+   if (uses * fma_cost[speed][mode] >
+       uses * add_cost[speed][mode] + mul_cost[speed][mode])
+     return false;
+ 
    FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
      {
        gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
Index: gcc/config/s390/s390.c
===================================================================
*** gcc/config/s390/s390.c.orig
--- gcc/config/s390/s390.c
*************** struct processor_costs z196_cost =
*** 242,249 ****
    COSTS_N_INSNS (100),   /* SQXBR B+100 */
    COSTS_N_INSNS (42),    /* SQDBR B+42 */
    COSTS_N_INSNS (28),    /* SQEBR B+28 */
!   COSTS_N_INSNS (1),     /* MADBR B */
!   COSTS_N_INSNS (1),     /* MAEBR B */
    COSTS_N_INSNS (101),   /* DXBR B+101 */
    COSTS_N_INSNS (29),    /* DDBR */
    COSTS_N_INSNS (22),    /* DEBR */
--- 242,250 ----
    COSTS_N_INSNS (100),   /* SQXBR B+100 */
    COSTS_N_INSNS (42),    /* SQDBR B+42 */
    COSTS_N_INSNS (28),    /* SQEBR B+28 */
!   /* Cheaper than a mul+add but more expensive then a single mul/add.  */
!   COSTS_N_INSNS (1) + COSTS_N_INSNS (1) / 2, /* MADBR B */
!   COSTS_N_INSNS (1) + COSTS_N_INSNS (1) / 2, /* MAEBR B */
    COSTS_N_INSNS (101),   /* DXBR B+101 */
    COSTS_N_INSNS (29),    /* DDBR */
    COSTS_N_INSNS (22),    /* DEBR */

Re: [PATCH] widening_mul: Do cost check when propagating mult into plus/minus expressions

Reply via email to