RE: [PATCH v2 1/2] [APX CFCMOV] Support APX CFCMOV in if_convert pass
Hi, Gently ping for this. This version has removed the target hook and added a new optab for cfcmov. Thanks, Lingling From: Kong, Lingling Sent: Tuesday, June 18, 2024 3:41 PM To: gcc-patches@gcc.gnu.org Cc: Alexander Monakov ; Uros Bizjak ; lingling.ko...@gmail.com; Hongtao Liu ; Jeff Law ; Richard Biener Subject: [PATCH v2 1/2] [APX CFCMOV] Support APX CFCMOV in if_convert pass APX CFCMOV feature implements conditionally faulting which means that all memory faults are suppressed when the condition code evaluates to false and load or store a memory operand. Now we could load or store a memory operand may trap or fault for conditional move. In middle-end, now we don't support a conditional move if we knew that a load from A or B could trap or fault. To enable CFCMOV, we added a new optab. Conditional move suppress_fault for condition mem store would not move any arithmetic calculations. For condition mem load now just support a conditional move one trap mem and one no trap and no mem cases. gcc/ChangeLog: * ifcvt.cc (noce_try_cmove_load_mem_notrap): Allow convert to cfcmov for conditional load. (noce_try_cmove_store_mem_notrap): Convert to conditional store. (noce_process_if_block): Ditto. * optabs.def (OPTAB_D): New optab. --- gcc/ifcvt.cc | 246 - gcc/optabs.def | 1 + 2 files changed, 246 insertions(+), 1 deletion(-) diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index 58ed42673e5..65c069b8cc6 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -783,6 +783,8 @@ static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx, rtx, rtx, rtx, rtx = NULL, rtx = NULL); static bool noce_try_cmove (struct noce_if_info *); static bool noce_try_cmove_arith (struct noce_if_info *); +static bool noce_try_cmove_load_mem_notrap (struct noce_if_info *); +static bool noce_try_cmove_store_mem_notrap (struct noce_if_info *, rtx *, rtx); static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **); static bool noce_try_minmax (struct noce_if_info *); static bool noce_try_abs (struct noce_if_info *); @@ -2401,6 +2403,233 @@ noce_try_cmove_arith (struct noce_if_info *if_info) return false; } +/* When target support suppress memory fault, try more complex cases involving + conditional_move's source or dest may trap or fault. */ + +static bool +noce_try_cmove_load_mem_notrap (struct noce_if_info *if_info) +{ + rtx a = if_info->a; + rtx b = if_info->b; + rtx x = if_info->x; + + if (MEM_P (x)) +return false; + /* Just handle a conditional move from one trap MEM + other non_trap, + non mem cases. */ + if (!(MEM_P (a) ^ MEM_P (b))) + return false; + bool a_trap = may_trap_or_fault_p (a); + bool b_trap = may_trap_or_fault_p (b); + + if (!(a_trap ^ b_trap)) +return false; + if (a_trap && !MEM_P (a)) +return false; + if (b_trap && !MEM_P (b)) +return false; + + rtx orig_b; + rtx_insn *insn_a, *insn_b; + bool a_simple = if_info->then_simple; + bool b_simple = if_info->else_simple; + basic_block then_bb = if_info->then_bb; + basic_block else_bb = if_info->else_bb; + rtx target; + enum rtx_code code; + rtx cond = if_info->cond; + rtx_insn *ifcvt_seq; + + /* if (test) x = *a; else x = c - d; + => x = c - d; + if (test) + x = *a; + */ + + code = GET_CODE (cond); + insn_a = if_info->insn_a; + insn_b = if_info->insn_b; + machine_mode x_mode = GET_MODE (x); + + /* Because we only handle one trap MEM + other non_trap, non mem cases, + just move one trap MEM always in then_bb. */ + if (noce_reversed_cond_code (if_info) != UNKNOWN) +{ + bool reversep = false; + if (b_trap) + reversep = true; + + if (reversep) + { + if (if_info->rev_cond) + { + cond = if_info->rev_cond; + code = GET_CODE (cond); + } + else + code = reversed_comparison_code (cond, if_info->jump); + std::swap (a, b); + std::swap (insn_a, insn_b); + std::swap (a_simple, b_simple); + std::swap (then_bb, else_bb); + } +} + + if (then_bb && else_bb + && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x) + || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x))) +return false; + + start_sequence (); + + /* If one of the blocks is empty then the corresponding B or A value + came from the test block. The non-empty complex block that we will + emit might clobber the register used by B or A, so move it to a
[PATCH v2 1/2] [APX CFCMOV] Support APX CFCMOV in if_convert pass
APX CFCMOV feature implements conditionally faulting which means that all memory faults are suppressed when the condition code evaluates to false and load or store a memory operand. Now we could load or store a memory operand may trap or fault for conditional move. In middle-end, now we don't support a conditional move if we knew that a load from A or B could trap or fault. To enable CFCMOV, we added a new optab. Conditional move suppress_fault for condition mem store would not move any arithmetic calculations. For condition mem load now just support a conditional move one trap mem and one no trap and no mem cases. gcc/ChangeLog: * ifcvt.cc (noce_try_cmove_load_mem_notrap): Allow convert to cfcmov for conditional load. (noce_try_cmove_store_mem_notrap): Convert to conditional store. (noce_process_if_block): Ditto. * optabs.def (OPTAB_D): New optab. --- gcc/ifcvt.cc | 246 - gcc/optabs.def | 1 + 2 files changed, 246 insertions(+), 1 deletion(-) diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index 58ed42673e5..65c069b8cc6 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -783,6 +783,8 @@ static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx, rtx, rtx, rtx, rtx = NULL, rtx = NULL); static bool noce_try_cmove (struct noce_if_info *); static bool noce_try_cmove_arith (struct noce_if_info *); +static bool noce_try_cmove_load_mem_notrap (struct noce_if_info *); +static bool noce_try_cmove_store_mem_notrap (struct noce_if_info *, rtx *, rtx); static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **); static bool noce_try_minmax (struct noce_if_info *); static bool noce_try_abs (struct noce_if_info *); @@ -2401,6 +2403,233 @@ noce_try_cmove_arith (struct noce_if_info *if_info) return false; } +/* When target support suppress memory fault, try more complex cases involving + conditional_move's source or dest may trap or fault. */ + +static bool +noce_try_cmove_load_mem_notrap (struct noce_if_info *if_info) +{ + rtx a = if_info->a; + rtx b = if_info->b; + rtx x = if_info->x; + + if (MEM_P (x)) +return false; + /* Just handle a conditional move from one trap MEM + other non_trap, + non mem cases. */ + if (!(MEM_P (a) ^ MEM_P (b))) + return false; + bool a_trap = may_trap_or_fault_p (a); + bool b_trap = may_trap_or_fault_p (b); + + if (!(a_trap ^ b_trap)) +return false; + if (a_trap && !MEM_P (a)) +return false; + if (b_trap && !MEM_P (b)) +return false; + + rtx orig_b; + rtx_insn *insn_a, *insn_b; + bool a_simple = if_info->then_simple; + bool b_simple = if_info->else_simple; + basic_block then_bb = if_info->then_bb; + basic_block else_bb = if_info->else_bb; + rtx target; + enum rtx_code code; + rtx cond = if_info->cond; + rtx_insn *ifcvt_seq; + + /* if (test) x = *a; else x = c - d; + => x = c - d; + if (test) + x = *a; + */ + + code = GET_CODE (cond); + insn_a = if_info->insn_a; + insn_b = if_info->insn_b; + machine_mode x_mode = GET_MODE (x); + + /* Because we only handle one trap MEM + other non_trap, non mem cases, + just move one trap MEM always in then_bb. */ + if (noce_reversed_cond_code (if_info) != UNKNOWN) +{ + bool reversep = false; + if (b_trap) + reversep = true; + + if (reversep) + { + if (if_info->rev_cond) + { + cond = if_info->rev_cond; + code = GET_CODE (cond); + } + else + code = reversed_comparison_code (cond, if_info->jump); + std::swap (a, b); + std::swap (insn_a, insn_b); + std::swap (a_simple, b_simple); + std::swap (then_bb, else_bb); + } +} + + if (then_bb && else_bb + && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x) + || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x))) +return false; + + start_sequence (); + + /* If one of the blocks is empty then the corresponding B or A value + came from the test block. The non-empty complex block that we will + emit might clobber the register used by B or A, so move it to a pseudo + first. */ + + rtx tmp_b = NULL_RTX; + + /* Don't move trap mem to a pseudo. */ + if (!may_trap_or_fault_p (b) && (b_simple || !else_bb)) +tmp_b = gen_reg_rtx (x_mode); + + orig_b = b; + + rtx emit_a = NULL_RTX; + rtx emit_b = NULL_RTX; + rtx_insn *tmp_insn = NULL; + bool modified_in_a = false; + bool modified_in_b = false; + /* If either operand is complex, load it into a register first. + The best way to do this is to