Hi,

Gently ping for this.

This version has removed the target hook and added a new optab for cfcmov.

Thanks,
Lingling

From: Kong, Lingling <lingling.k...@intel.com>
Sent: Tuesday, June 18, 2024 3:41 PM
To: gcc-patches@gcc.gnu.org
Cc: Alexander Monakov <amona...@ispras.ru>; Uros Bizjak <ubiz...@gmail.com>; 
lingling.ko...@gmail.com; Hongtao Liu <crazy...@gmail.com>; Jeff Law 
<jeffreya...@gmail.com>; Richard Biener <richard.guent...@gmail.com>
Subject: [PATCH v2 1/2] [APX CFCMOV] Support APX CFCMOV in if_convert pass


APX CFCMOV feature implements conditionally faulting which means

that all memory faults are suppressed when the condition code

evaluates to false and load or store a memory operand. Now we

could load or store a memory operand may trap or fault for

conditional move.



In middle-end, now we don't support a conditional move if we knew

that a load from A or B could trap or fault. To enable CFCMOV, we

added a new optab.



Conditional move suppress_fault for condition mem store would not

move any arithmetic calculations. For condition mem load now just

support a conditional move one trap mem and one no trap and no mem

cases.



gcc/ChangeLog:



               * ifcvt.cc (noce_try_cmove_load_mem_notrap): Allow convert

               to cfcmov for conditional load.

               (noce_try_cmove_store_mem_notrap): Convert to conditional store.

               (noce_process_if_block): Ditto.

               * optabs.def (OPTAB_D): New optab.

---

gcc/ifcvt.cc   | 246 ++++++++++++++++++++++++++++++++++++++++++++++++-

gcc/optabs.def |   1 +

2 files changed, 246 insertions(+), 1 deletion(-)



diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc

index 58ed42673e5..65c069b8cc6 100644

--- a/gcc/ifcvt.cc

+++ b/gcc/ifcvt.cc

@@ -783,6 +783,8 @@ static rtx noce_emit_cmove (struct noce_if_info *, rtx, 
enum rtx_code, rtx,

                                                 rtx, rtx, rtx, rtx = NULL, rtx 
= NULL);

static bool noce_try_cmove (struct noce_if_info *);

static bool noce_try_cmove_arith (struct noce_if_info *);

+static bool noce_try_cmove_load_mem_notrap (struct noce_if_info *);

+static bool noce_try_cmove_store_mem_notrap (struct noce_if_info *, rtx *, 
rtx);

static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);

static bool noce_try_minmax (struct noce_if_info *);

static bool noce_try_abs (struct noce_if_info *);

@@ -2401,6 +2403,233 @@ noce_try_cmove_arith (struct noce_if_info *if_info)

   return false;

}



+/* When target support suppress memory fault, try more complex cases involving

+   conditional_move's source or dest may trap or fault.  */

+

+static bool

+noce_try_cmove_load_mem_notrap (struct noce_if_info *if_info)

+{

+  rtx a = if_info->a;

+  rtx b = if_info->b;

+  rtx x = if_info->x;

+

+  if (MEM_P (x))

+    return false;

+  /* Just handle a conditional move from one trap MEM + other non_trap,

+     non mem cases.  */

+  if (!(MEM_P (a) ^ MEM_P (b)))

+      return false;

+  bool a_trap = may_trap_or_fault_p (a);

+  bool b_trap = may_trap_or_fault_p (b);

+

+  if (!(a_trap ^ b_trap))

+    return false;

+  if (a_trap && !MEM_P (a))

+    return false;

+  if (b_trap && !MEM_P (b))

+    return false;

+

+  rtx orig_b;

+  rtx_insn *insn_a, *insn_b;

+  bool a_simple = if_info->then_simple;

+  bool b_simple = if_info->else_simple;

+  basic_block then_bb = if_info->then_bb;

+  basic_block else_bb = if_info->else_bb;

+  rtx target;

+  enum rtx_code code;

+  rtx cond = if_info->cond;

+  rtx_insn *ifcvt_seq;

+

+  /* if (test) x = *a; else x = c - d;

+     => x = c - d;

+             if (test)

+               x = *a;

+  */

+

+  code = GET_CODE (cond);

+  insn_a = if_info->insn_a;

+  insn_b = if_info->insn_b;

+  machine_mode x_mode = GET_MODE (x);

+

+  /* Because we only handle one trap MEM + other non_trap, non mem cases,

+     just move one trap MEM always in then_bb.  */

+  if (noce_reversed_cond_code (if_info) != UNKNOWN)

+    {

+      bool reversep = false;

+      if (b_trap)

+             reversep = true;

+

+      if (reversep)

+             {

+               if (if_info->rev_cond)

+                 {

+                   cond = if_info->rev_cond;

+                   code = GET_CODE (cond);

+                 }

+               else

+                 code = reversed_comparison_code (cond, if_info->jump);

+               std::swap (a, b);

+               std::swap (insn_a, insn_b);

+               std::swap (a_simple, b_simple);

+               std::swap (then_bb, else_bb);

+             }

+    }

+

+  if (then_bb && else_bb

+      && (!bbs_ok_for_cmove_arith (then_bb, else_bb,  if_info->orig_x)

+               || !bbs_ok_for_cmove_arith (else_bb, then_bb,  
if_info->orig_x)))

+    return false;

+

+  start_sequence ();

+

+  /* If one of the blocks is empty then the corresponding B or A value

+     came from the test block.  The non-empty complex block that we will

+     emit might clobber the register used by B or A, so move it to a pseudo

+     first.  */

+

+  rtx tmp_b = NULL_RTX;

+

+  /* Don't move trap mem to a pseudo. */

+  if (!may_trap_or_fault_p (b) && (b_simple || !else_bb))

+    tmp_b = gen_reg_rtx (x_mode);

+

+  orig_b = b;

+

+  rtx emit_a = NULL_RTX;

+  rtx emit_b = NULL_RTX;

+  rtx_insn *tmp_insn = NULL;

+  bool modified_in_a = false;

+  bool modified_in_b = false;

+  /* If either operand is complex, load it into a register first.

+     The best way to do this is to copy the original insn.  In this

+     way we preserve any clobbers etc that the insn may have had.

+     This is of course not possible in the IS_MEM case.  */

+

+  if (! general_operand (b, GET_MODE (b)) || tmp_b)

+    {

+               if (insn_b)

+                 {

+                   b = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b));

+                   rtx_insn *copy_of_b = as_a <rtx_insn *> (copy_rtx (insn_b));

+                   rtx set = single_set (copy_of_b);

+

+                   SET_DEST (set) = b;

+                   emit_b = PATTERN (copy_of_b);

+                 }

+               else

+                 {

+                   rtx tmp_reg = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b));

+                   emit_b = gen_rtx_SET (tmp_reg, b);

+                   b = tmp_reg;

+                 }

+    }

+

+  if (tmp_b && then_bb)

+    {

+      FOR_BB_INSNS (then_bb, tmp_insn)

+             /* Don't check inside insn_a.  We will have changed it to emit_a

+                with a destination that doesn't conflict.  */

+             if (!(insn_a && tmp_insn == insn_a)

+                 && modified_in_p (orig_b, tmp_insn))

+               {

+                 modified_in_a = true;

+                 break;

+               }

+

+    }

+

+  modified_in_b = emit_b != NULL_RTX && modified_in_p (a, emit_b);

+  /* If insn to set up A clobbers any registers B depends on, try to

+     swap insn that sets up A with the one that sets up B.  If even

+     that doesn't help, punt.  */

+  if (modified_in_a && !modified_in_b)

+    {

+      if (!noce_emit_bb (emit_b, else_bb, b_simple))

+             goto end_seq_and_fail;

+

+      if (!noce_emit_bb (emit_a, then_bb, a_simple))

+             goto end_seq_and_fail;

+    }

+  else if (!modified_in_a)

+    {

+      if (!noce_emit_bb (emit_b, else_bb, b_simple))

+             goto end_seq_and_fail;

+

+      if (!noce_emit_bb (emit_a, then_bb, a_simple))

+             goto end_seq_and_fail;

+    }

+  else

+    goto end_seq_and_fail;

+

+  target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1),

+                                               a, b);

+

+  if (! target)

+    goto end_seq_and_fail;

+

+  if (target != x)

+    noce_emit_move_insn (x, target);

+

+  ifcvt_seq = end_ifcvt_sequence (if_info);

+  if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info))

+    return false;

+

+  emit_insn_before_setloc (ifcvt_seq, if_info->jump,

+                                              INSN_LOCATION (if_info->insn_a));

+  if_info->transform_name = "noce_try_cmove_load_mem_notrap";

+  return true;

+

+ end_seq_and_fail:

+  end_sequence ();

+  return false;

+}

+

+static bool

+noce_try_cmove_store_mem_notrap (struct noce_if_info *if_info, rtx *x_ptr, rtx 
orig_x)

+{

+  rtx a = if_info->a;

+  rtx b = if_info->b;

+  rtx x = orig_x;

+  machine_mode x_mode = GET_MODE (x);

+

+  if (!MEM_P (x) || !rtx_equal_p (x, b))

+    return false;

+  if (!may_trap_or_fault_p (x))

+    return false;

+  if (!if_info->then_simple || !register_operand (a, x_mode))

+    return false;

+

+  rtx cond = if_info->cond;

+  enum rtx_code code = GET_CODE (cond);

+  rtx_insn *ifcvt_seq;

+

+  start_sequence ();

+

+  rtx target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 
1),

+                                               a, b);

+

+  if (! target)

+    goto end_seq_and_fail;

+

+  if (target != x)

+    noce_emit_move_insn (x, target);

+

+  ifcvt_seq = end_ifcvt_sequence (if_info);

+  if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info))

+    return false;

+

+  emit_insn_before_setloc (ifcvt_seq, if_info->jump,

+                                              INSN_LOCATION (if_info->insn_a));

+  if_info->transform_name = "noce_try_cmove_load_mem_notrap";

+  if_info->x = orig_x;

+  *x_ptr = orig_x;

+  return true;

+

+ end_seq_and_fail:

+  end_sequence ();

+  return false;

+}

+

/* For most cases, the simplified condition we found is the best

    choice, but this is not the case for the min/max/abs transforms.

    For these we wish to know that it is A or B in the condition.  */

@@ -4055,6 +4284,8 @@ noce_process_if_block (struct noce_if_info *if_info)

      the lifetime of hard registers on small register class machines.  */

   orig_x = x;

   if_info->orig_x = orig_x;

+  bool have_cfmovcc = (optab_handler (cfmovcc_optab, GET_MODE (orig_x))

+                                   != CODE_FOR_nothing);

   if (!REG_P (x)

       || (HARD_REGISTER_P (x)

                 && targetm.small_register_classes_for_mode_p (GET_MODE (x))))

@@ -4121,12 +4352,21 @@ noce_process_if_block (struct noce_if_info *if_info)

     }



   if (!set_b && MEM_P (orig_x))

+    {

+      /* Conditional_move_suppress_fault for condition mem store would not

+             move any arithmetic calculations.  */

+      if (have_cfmovcc

+               && HAVE_conditional_move

+               && noce_try_cmove_store_mem_notrap (if_info, &x, orig_x))

+             goto success;

+      else

     /* We want to avoid store speculation to avoid cases like

               if (pthread_mutex_trylock(mutex))

                  ++global_variable;

        Rather than go to much effort here, we rely on the SSA optimizers,

        which do a good enough job these days.  */

-    return false;

+       return false;

+    }



   if (noce_try_move (if_info))

     goto success;

@@ -4160,6 +4400,10 @@ noce_process_if_block (struct noce_if_info *if_info)

       if (HAVE_conditional_move

                 && noce_try_cmove_arith (if_info))

               goto success;

+      if (HAVE_conditional_move

+               && have_cfmovcc

+               && noce_try_cmove_load_mem_notrap (if_info))

+             goto success;

       if (noce_try_sign_mask (if_info))

               goto success;

     }

diff --git a/gcc/optabs.def b/gcc/optabs.def

index bc2611abdc2..49335ec3212 100644

--- a/gcc/optabs.def

+++ b/gcc/optabs.def

@@ -540,3 +540,4 @@ OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")

OPTAB_D (len_load_optab, "len_load_$a")

OPTAB_D (len_store_optab, "len_store_$a")

OPTAB_D (select_vl_optab, "select_vl$a")

+OPTAB_D (cfmovcc_optab, "cfmov$acc")

--

2.31.1


Reply via email to