[PATCH] Add TARGET_IFUNC_REF_LOCAL_OK

2021-06-19 Thread H.J. Lu via Gcc-patches
1. On some targets, like PowerPC, reference to ifunc function resolver
must be non-local so that compiler will properly emit PLT call.  Add
TARGET_IFUNC_REF_LOCAL_OK to allow binding indirect function resolver
locally for targets which don't require special PLT call sequence.
2. Add ix86_call_use_plt_p to call local ifunc function resolvers via
PLT.

gcc/

PR target/51469
PR target/83782
* target.def (ifunc_ref_local_ok): Add a target hook.
* varasm.c (default_binds_local_p_3): Force indirect function
resolver non-local only if targetm.ifunc_ref_local_ok returns
false.
* config/i386/i386-expand.c (ix86_expand_call): Call
ix86_call_use_plt_p to check if PLT should be used.
* config/i386/i386-protos.h (ix86_call_use_plt_p): New.
* config/i386/i386.c (output_pic_addr_const): Call
ix86_call_use_plt_p to check if "@PLT" is needed.
(ix86_call_use_plt_p): New.
(TARGET_IFUNC_REF_LOCAL_OK): New.
* doc/tm.texi.in: Add TARGET_IFUNC_REF_LOCAL_OK.
* doc/tm.texi: Regenerated.

gcc/testsuite/

PR target/51469
PR target/83782
* gcc.target/i386/pr83782-1.c: New test.
* gcc.target/i386/pr83782-2.c: Likewise.
---
 gcc/config/i386/i386-expand.c |  2 +-
 gcc/config/i386/i386-protos.h |  1 +
 gcc/config/i386/i386.c| 25 +-
 gcc/doc/tm.texi   |  5 +
 gcc/doc/tm.texi.in|  2 ++
 gcc/target.def|  8 +++
 gcc/testsuite/gcc.target/i386/pr83782-1.c | 26 +++
 gcc/testsuite/gcc.target/i386/pr83782-2.c | 26 +++
 gcc/varasm.c  |  3 ++-
 9 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr83782-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr83782-2.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 8f4e4e4d884..229a765cb40 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -8184,7 +8184,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
   rtx addr = XEXP (fnaddr, 0);
   if (flag_pic
  && GET_CODE (addr) == SYMBOL_REF
- && !SYMBOL_REF_LOCAL_P (addr))
+ && ix86_call_use_plt_p (addr))
{
  if (flag_plt
  && (SYMBOL_REF_DECL (addr) == NULL_TREE
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index e6ac9390777..2d1cd07a215 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -152,6 +152,7 @@ extern void ix86_expand_sse_movcc (rtx, rtx, rtx, rtx);
 extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool);
 extern bool ix86_expand_int_addcc (rtx[]);
 extern rtx_insn *ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
+extern bool ix86_call_use_plt_p (rtx);
 extern void ix86_split_call_vzeroupper (rtx, rtx);
 extern void x86_initialize_trampoline (rtx, rtx, rtx);
 extern rtx ix86_zero_extend_to_Pmode (rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7d0d4143bca..731a516b516 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -11971,7 +11971,7 @@ output_pic_addr_const (FILE *file, rtx x, int code)
  assemble_name (file, name);
}
   if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
- && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
+ && code == 'P' && ix86_call_use_plt_p (x))
fputs ("@PLT", file);
   break;
 
@@ -15678,6 +15678,26 @@ ix86_zero_extend_to_Pmode (rtx exp)
   return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
 }
 
+/* Return true if the function is called via PLT.   */
+
+bool
+ix86_call_use_plt_p (rtx call_op)
+{
+  if (SYMBOL_REF_LOCAL_P (call_op))
+{
+  if (SYMBOL_REF_DECL (call_op))
+   {
+ /* NB: All ifunc functions must be called via PLT.  */
+ cgraph_node *node
+   = cgraph_node::get (SYMBOL_REF_DECL (call_op));
+ if (node && node->ifunc_resolver)
+   return true;
+   }
+  return false;
+}
+  return true;
+}
+
 /* Return true if the function being called was marked with attribute
"noplt" or using -fno-plt and we are compiling for non-PIC.  We need
to handle the non-PIC case in the backend because there is no easy
@@ -24016,6 +24036,9 @@ ix86_run_selftests (void)
 #define TARGET_GET_MULTILIB_ABI_NAME \
   ix86_get_multilib_abi_name
 
+#undef TARGET_IFUNC_REF_LOCAL_OK
+#define TARGET_IFUNC_REF_LOCAL_OK hook_bool_void_true
+
 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
 {
 #ifdef OPTION_GLIBC
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 2a41ae5fba1..d327af03c80 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12346,6 +12346,11 @@ The support includes the assembler, linker and dynamic 
linker.
 The default value of this hook is 

[PATCH 7/7] Port most of the A CMP 0 ? A : -A to match

2021-06-19 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

To improve phiopt and be able to remove abs_replacement, this ports
most of "A CMP 0 ? A : -A" from fold_cond_expr_with_comparison to
match.pd.  There is a few extra changes that are needed to remove
the "A CMP 0 ? A : -A" part from fold_cond_expr_with_comparison:
   * Need to handle (A - B) case
   * Need to handle UN* comparisons.

I will handle those in a different patch.

Note phi-opt-15.c test needed to be updated as we get ABSU now
instead of not getting ABS.  When ABSU was added phiopt was not
updated even to use ABSU instead of not creating ABS.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd (A CMP 0 ? A : -A): New patterns.
* tree-ssa-phiopt.c (abs_replacement): Delete function.
(tree_ssa_phiopt_worker): Don't call abs_replacement.
Update comment about abs_replacement.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-15.c: Update test to expect
ABSU and still not expect ABS_EXPR.
---
 gcc/match.pd   |  60 +
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-15.c |   4 +-
 gcc/tree-ssa-phiopt.c  | 134 +
 3 files changed, 64 insertions(+), 134 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index f38baf2..a5cfb4e5 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3977,6 +3977,66 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cnd (logical_inverted_value truth_valued_p@0) @1 @2)
   (cnd @0 @2 @1)))
 
+/* abs/negative simplifications moved from fold_cond_expr_with_comparison,
+   Need to handle (A - B) case as fold_cond_expr_with_comparison does.
+   Need to handle UN* comparisons.
+
+   None of these transformations work for modes with signed
+   zeros.  If A is +/-0, the first two transformations will
+   change the sign of the result (from +0 to -0, or vice
+   versa).  The last four will fix the sign of the result,
+   even though the original expressions could be positive or
+   negative, depending on the sign of A.
+
+   Note that all these transformations are correct if A is
+   NaN, since the two alternatives (A and -A) are also NaNs.  */
+
+(for cnd (cond vec_cond)
+ /* A == 0? A : -Asame as -A */
+ (for cmp (eq uneq)
+  (simplify
+   (cnd (cmp @0 zerop) @0 (negate@1 @0))
+(if (!HONOR_SIGNED_ZEROS (element_mode (type)))
+ @1))
+  (simplify
+   (cnd (cmp @0 zerop) zerop (negate@1 @0))
+(if (!HONOR_SIGNED_ZEROS (element_mode (type)))
+ @1))
+ )
+ /* A != 0? A : -Asame as A */
+ (for cmp (ne ltgt)
+  (simplify
+   (cnd (cmp @0 zerop) @0 (negate @0))
+(if (!HONOR_SIGNED_ZEROS (element_mode (type)))
+ @0))
+  (simplify
+   (cnd (cmp @0 zerop) @0 zerop)
+(if (!HONOR_SIGNED_ZEROS (element_mode (type)))
+ @0))
+ )
+ /* A >=/> 0? A : -Asame as abs (A) */
+ (for cmp (ge gt)
+  (simplify
+   (cnd (cmp @0 zerop) @0 (negate @0))
+(if (!HONOR_SIGNED_ZEROS (element_mode (type))
+&& !TYPE_UNSIGNED (type))
+ (abs @0
+ /* A <=/< 0? A : -Asame as -abs (A) */
+ (for cmp (le lt)
+  (simplify
+   (cnd (cmp @0 zerop) @0 (negate @0))
+(if (!HONOR_SIGNED_ZEROS (element_mode (type))
+&& !TYPE_UNSIGNED (type))
+ (if (ANY_INTEGRAL_TYPE_P (type)
+ && !TYPE_OVERFLOW_WRAPS (type))
+  (with {
+   tree utype = unsigned_type_for (type);
+   }
+   (convert (negate (absu:utype @0
+   (negate (abs @0)
+ )
+)
+
 /* -(type)!A -> (type)A - 1.  */
 (simplify
  (negate (convert?:s (logical_inverted_value:s @0)))
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-15.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-15.c
index ac3018e..6aec689 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-15.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-15.c
@@ -9,4 +9,6 @@ foo (int i)
   return i;
 }
 
-/* { dg-final { scan-tree-dump-not "ABS" "optimized" } } */
+/* We should not have ABS_EXPR but ABSU_EXPR instead. */
+/* { dg-final { scan-tree-dump-not "ABS_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump "ABSU" "optimized" } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 8b289be..ab5aef9 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -62,8 +62,6 @@ static int value_replacement (basic_block, basic_block,
  edge, edge, gphi *, tree, tree);
 static bool minmax_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
-static bool abs_replacement (basic_block, basic_block,
-edge, edge, gphi *, tree, tree);
 static bool spaceship_replacement (basic_block, basic_block,
   edge, edge, gphi *, tree, tree);
 static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
@@ -350,8 +348,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool 
do_hoist_loads, bool early_p)
  else if (match_simplify_replacement (bb, bb1, e1, e2, phi,
 

[PATCH 5/7] Allow match-and-simplified phiopt to run in early phiopt

2021-06-19 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

To move a few things more to match-and-simplify from phiopt,
we need to allow match_simplify_replacement to run in early
phiopt.  To do this, we need to mark some match patterns
if they can be done in early phiopt or not.

OK? Bootstrapped and tested on x86_64-linux-gnu with no
regressions.

gcc/ChangeLog:

* generic-match-head.c (phiopt_earlymode): New function.
* gimple-match-head.c (phiopt_earlymode): New function.
* match.pd (A ? CST0 : CST1): Disable for early phiopt.
(x >= 0 ? ~y : y): Likewise.
(x >= 0 ? y : ~y): Likewise.
* tree-pass.h (PROP_gimple_lomp_dev): Increment bit by one.
(PROP_rtl_split_insns): Likewise.
(PROP_phioptearly): New define.
* tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Set and unset
PROP_phioptearly on curr_properties if early.
---
 gcc/generic-match-head.c |  7 +
 gcc/gimple-match-head.c  |  7 +
 gcc/match.pd | 76 ++--
 gcc/tree-pass.h  |  5 ++--
 gcc/tree-ssa-phiopt.c|  8 +++--
 5 files changed, 63 insertions(+), 40 deletions(-)

diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index f426208..90ebf84 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -91,6 +91,13 @@ optimize_vectors_before_lowering_p ()
   return true;
 }
 
+/* Return true if phiopt is in early mode. */
+static inline bool
+phiopt_earlymode ()
+{
+  return false;
+}
+
 /* Return true if successive divisions can be optimized.
Defer to GIMPLE opts.  */
 
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 7112c11..1eafbb7 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -1159,6 +1159,13 @@ canonicalize_math_after_vectorization_p ()
   return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0;
 }
 
+/* Return true if phiopt is in early mode. */
+static inline bool
+phiopt_earlymode ()
+{
+  return !cfun || (cfun->curr_properties & PROP_phioptearly) != 0;
+}
+
 /* Return true if we can still perform transformations that may introduce
vector operations that are not supported by the target. Vector lowering
normally handles those, but after that pass, it becomes unsafe.  */
diff --git a/gcc/match.pd b/gcc/match.pd
index 39fb57e..f38baf2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3728,39 +3728,40 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 #if GIMPLE
 (simplify
  (cond @0 INTEGER_CST@1 INTEGER_CST@2)
- (switch
-  (if (integer_zerop (@2))
-   (switch
-/* a ? 1 : 0 -> a if 0 and 1 are integral types. */
-(if (integer_onep (@1))
- (convert (convert:boolean_type_node @0)))
-/* a ? -1 : 0 -> -a. */
-(if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
- (negate (convert (convert:boolean_type_node @0
-/* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
-(if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
- (with {
-   tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
-  }
-  (lshift (convert (convert:boolean_type_node @0)) { shift; })
-  (if (integer_zerop (@1))
-   (with {
-  tree booltrue = constant_boolean_node (true, boolean_type_node);
-}
+ (if (!phiopt_earlymode ())
+  (switch
+   (if (integer_zerop (@2))
 (switch
- /* a ? 0 : 1 -> !a. */
- (if (integer_onep (@2))
-  (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))
- /* a ? -1 : 0 -> -(!a). */
- (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
-  (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } 

- /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */
- (if (INTEGRAL_TYPE_P (type) &&  integer_pow2p (@2))
+ /* a ? 1 : 0 -> a if 0 and 1 are integral types. */
+ (if (integer_onep (@1))
+  (convert (convert:boolean_type_node @0)))
+ /* a ? -1 : 0 -> -a. */
+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
+  (negate (convert (convert:boolean_type_node @0
+ /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
   (with {
-   tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
+tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
}
-   (lshift (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } 
))
-{ shift; }
+   (lshift (convert (convert:boolean_type_node @0)) { shift; })
+   (if (integer_zerop (@1))
+(with {
+   tree booltrue = constant_boolean_node (true, boolean_type_node);
+ }
+ (switch
+  /* a ? 0 : 1 -> !a. */
+  (if (integer_onep (@2))
+   (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))
+  /* a ? -1 : 0 -> -(!a). */
+  (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
+   (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } 

+  /* a ? powerof2cst : 0 -> (!a) 

[PATCH 6/7] Lower for loops before lowering cond in genmatch

2021-06-19 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

While converting some fold_cond_expr_with_comparison
to match, I found that I wanted to use "for cnd (cond vec_cond)"
but that was not causing the lowering of cond to happen.
What was happening was the lowering of the for loop
was happening after the lowering of the cond. So
swapping was the correct thing to do but it also
means we need to copy for_subst_vec in lower_cond.

OK?  Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* genmatch.c (lower_cond): Copy for_subst_vec
for the simplify also.
(lower): Swap the order for lower_for and lower_cond.
---
 gcc/genmatch.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/gcc/genmatch.c b/gcc/genmatch.c
index 4d47672..3aee3dd 100644
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -1306,6 +1306,7 @@ lower_cond (simplify *s, vec& simplifiers)
 {
   simplify *ns = new simplify (s->kind, s->id, matchers[i], s->result,
   s->for_vec, s->capture_ids);
+  ns->for_subst_vec.safe_splice (s->for_subst_vec);
   simplifiers.safe_push (ns);
 }
 }
@@ -1543,24 +1544,23 @@ static void
 lower (vec& simplifiers, bool gimple)
 {
   auto_vec out_simplifiers;
-  for (unsigned i = 0; i < simplifiers.length (); ++i)
-lower_opt (simplifiers[i], out_simplifiers);
+  for (auto s: simplifiers)
+lower_opt (s, out_simplifiers);
 
   simplifiers.truncate (0);
-  for (unsigned i = 0; i < out_simplifiers.length (); ++i)
-lower_commutative (out_simplifiers[i], simplifiers);
+  for (auto s: out_simplifiers)
+lower_commutative (s, simplifiers);
 
   out_simplifiers.truncate (0);
-  if (gimple)
-for (unsigned i = 0; i < simplifiers.length (); ++i)
-  lower_cond (simplifiers[i], out_simplifiers);
-  else
-out_simplifiers.safe_splice (simplifiers);
-
+  for (auto s: simplifiers)
+lower_for (s, out_simplifiers);
 
   simplifiers.truncate (0);
-  for (unsigned i = 0; i < out_simplifiers.length (); ++i)
-lower_for (out_simplifiers[i], simplifiers);
+  if (gimple)
+for (auto s: out_simplifiers)
+  lower_cond (s, simplifiers);
+  else
+simplifiers.safe_splice (out_simplifiers);
 }
 
 
-- 
1.8.3.1



[PATCH 4/7] Expand the comparison argument of fold_cond_expr_with_comparison

2021-06-19 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

To make things slightly easiler to convert fold_cond_expr_with_comparison
over to match.pd, expanding the arg0 argument into 3 different arguments
is done. Also this was simple because we don't use arg0 after grabbing
the code and the two operands.
Also since we do this, we don't need to fold the comparison to
get the inverse but just use invert_tree_comparison directly.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* fold-const.c (fold_cond_expr_with_comparison):
Exand arg0 into comp_code, arg00, and arg01.
(fold_ternary_loc): Use invert_tree_comparison
instead of fold_invert_truthvalue for the case
where we have A CMP B ? C : A.
---
 gcc/fold-const.c | 39 ++-
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 95673d2..85e90f4 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -126,7 +126,8 @@ static tree range_binop (enum tree_code, tree, tree, int, 
tree, int);
 static tree range_predecessor (tree);
 static tree range_successor (tree);
 static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
-static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, 
tree);
+static tree fold_cond_expr_with_comparison (location_t, tree, enum tree_code,
+   tree, tree, tree, tree);
 static tree unextend (tree, int, int, tree);
 static tree extract_muldiv (tree, tree, enum tree_code, tree, bool *);
 static tree extract_muldiv_1 (tree, tree, enum tree_code, tree, bool *);
@@ -5735,20 +5736,19 @@ merge_ranges (int *pin_p, tree *plow, tree *phigh, int 
in0_p, tree low0,
 
 
 /* Subroutine of fold, looking inside expressions of the form
-   A op B ? A : C, where ARG0, ARG1 and ARG2 are the three operands
-   of the COND_EXPR.  This function is being used also to optimize
-   A op B ? C : A, by reversing the comparison first.
+   A op B ? A : C, where (ARG00, COMP_CODE, ARG01), ARG1 and ARG2
+   are the three operands of the COND_EXPR.  This function is
+   being used also to optimize A op B ? C : A, by reversing the
+   comparison first.
 
Return a folded expression whose code is not a COND_EXPR
anymore, or NULL_TREE if no folding opportunity is found.  */
 
 static tree
 fold_cond_expr_with_comparison (location_t loc, tree type,
-   tree arg0, tree arg1, tree arg2)
+   enum tree_code comp_code,
+   tree arg00, tree arg01, tree arg1, tree arg2)
 {
-  enum tree_code comp_code = TREE_CODE (arg0);
-  tree arg00 = TREE_OPERAND (arg0, 0);
-  tree arg01 = TREE_OPERAND (arg0, 1);
   tree arg1_type = TREE_TYPE (arg1);
   tree tem;
 
@@ -12822,7 +12822,10 @@ fold_ternary_loc (location_t loc, enum tree_code code, 
tree type,
  && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), op1)
  && !HONOR_SIGNED_ZEROS (element_mode (op1)))
{
- tem = fold_cond_expr_with_comparison (loc, type, arg0, op1, op2);
+ tem = fold_cond_expr_with_comparison (loc, type, TREE_CODE (arg0),
+   TREE_OPERAND (arg0, 0),
+   TREE_OPERAND (arg0, 1),
+   op1, op2);
  if (tem)
return tem;
}
@@ -12831,14 +12834,16 @@ fold_ternary_loc (location_t loc, enum tree_code 
code, tree type,
  && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), op2)
  && !HONOR_SIGNED_ZEROS (element_mode (op2)))
{
- location_t loc0 = expr_location_or (arg0, loc);
- tem = fold_invert_truthvalue (loc0, arg0);
- if (tem && COMPARISON_CLASS_P (tem))
-   {
- tem = fold_cond_expr_with_comparison (loc, type, tem, op2, op1);
- if (tem)
-   return tem;
-   }
+ enum tree_code comp_code = TREE_CODE (arg0);
+ tree arg00 = TREE_OPERAND (arg0, 0);
+ tree arg01 = TREE_OPERAND (arg0, 1);
+ comp_code = invert_tree_comparison (comp_code, HONOR_NANS (arg00));
+ tem = fold_cond_expr_with_comparison (loc, type, comp_code,
+   arg00,
+   arg01,
+   op2, op1);
+ if (tem)
+   return tem;
}
 
   /* If the second operand is simpler than the third, swap them
-- 
1.8.3.1



[PATCH 3/7] Try inverted comparison for match_simplify in phiopt

2021-06-19 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Since match and simplify does not have all of the inverted
comparison patterns, it make sense to just have
phi-opt try to do the inversion and try match and simplify again.

OK? Bootstrapped and tested on x86_64-linux-gnu.

Thanks,
Andrew Pinski

gcc/ChangeLog:

* tree-ssa-phiopt.c (match_simplify_replacement):
If "A ? B : C" fails to simplify, try "(!A) ? C : B".
---
 gcc/tree-ssa-phiopt.c | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index feb8ca8d0d1..3b3762a668b 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -879,7 +879,26 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
arg0, arg1,
, NULL);
   if (!result)
-return false;
+{
+  /* Try !A ? arg1 : arg0 instead.
+Not all match patterns support inverted comparisons.  */
+  enum tree_code comp_code = gimple_cond_code (stmt);
+  tree cmp0 = gimple_cond_lhs (stmt);
+  tree cmp1 = gimple_cond_rhs (stmt);
+  comp_code = invert_tree_comparison (comp_code, HONOR_NANS (cmp0));
+  if (comp_code != ERROR_MARK)
+   {
+ cond = build2_loc (gimple_location (stmt),
+comp_code, boolean_type_node,
+cmp0, cmp1);
+ result = gimple_simplify (COND_EXPR, type,
+   cond,
+   arg1, arg0,
+   , NULL);
+   }
+  if (!result)
+   return false;
+}
 
   gsi = gsi_last_bb (cond_bb);
   if (stmt_to_move)
-- 
2.27.0



[PATCH 2/7] Duplicate the range information of the phi onto the new ssa_name

2021-06-19 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Since match_simplify_replacement uses gimple_simplify, there is a new
ssa name created sometimes and then we go and replace the phi edge with
this new ssa name, the range information on the phi is lost.
I don't have a testcase right now where we lose the range information
though but it does show up when enhancing match.pd to handle
some min/max patterns and g++.dg/warn/Wstringop-overflow-1.C starts
to fail.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiopt.c (match_simplify_replacement): Duplicate range
info if we're the only things setting the target PHI.
---
 gcc/tree-ssa-phiopt.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 24cbce9955a..feb8ca8d0d1 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -894,6 +894,14 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
   gsi_move_before (, );
   reset_flow_sensitive_info (gimple_assign_lhs (stmt_to_move));
 }
+  /* Duplicate range info if we're the only things setting the target PHI.  */
+  tree phi_result = PHI_RESULT (phi);
+  if (!gimple_seq_empty_p (seq)
+  && EDGE_COUNT (gimple_bb (phi)->preds) == 2
+  && !POINTER_TYPE_P (TREE_TYPE (phi_result))
+  && SSA_NAME_RANGE_INFO (phi_result))
+duplicate_ssa_name_range_info (result, SSA_NAME_RANGE_TYPE (phi_result),
+  SSA_NAME_RANGE_INFO (phi_result));
   if (seq)
 gsi_insert_seq_before (, seq, GSI_SAME_STMT);
 
-- 
2.27.0



[PATCH 1/7] Reset the range info on the moved instruction in PHIOPT

2021-06-19 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

I had missed this when wrote the patch which allowed the
gimple to be moved from inside the conditional as it.  It
was also missed in the review.  Anyways the range information
needs to be reset for the moved gimple as it was under a
conditional and the flow has changed to be unconditional.
I have not seen any testcase in the wild that produces wrong code
yet which is why there is no testcase but this is similar to what
the other code in phiopt does so after moving those to match, there
might be some.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiopt.c (match_simplify_replacement): Reset
flow senatitive info on the moved ssa set.
---
 gcc/tree-ssa-phiopt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 02e26f974a5..24cbce9955a 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -836,7 +836,7 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
   if (!is_gimple_assign (stmt_to_move))
return false;
 
-  tree lhs = gimple_assign_lhs  (stmt_to_move);
+  tree lhs = gimple_assign_lhs (stmt_to_move);
   gimple *use_stmt;
   use_operand_p use_p;
 
@@ -892,6 +892,7 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
}
   gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move);
   gsi_move_before (, );
+  reset_flow_sensitive_info (gimple_assign_lhs (stmt_to_move));
 }
   if (seq)
 gsi_insert_seq_before (, seq, GSI_SAME_STMT);
-- 
2.27.0



[PATCH] c++: REF_PARENTHESIZED_P wrapper inhibiting NRVO [PR67302]

2021-06-19 Thread Patrick Palka via Gcc-patches
Here, in C++14 or later, we remember the parentheses around 'a' in the
return statement by using a REF_PARENTHESIZED_P wrapper, which ends up
inhibiting NRVO because we don't look through this wrapper before
checking the conditions for NRVO.  This patch fixes this by calling
maybe_undo_parenthesized_ref sooner in check_return_expr.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/67302

gcc/cp/ChangeLog:

* typeck.c (check_return_expr): Call maybe_undo_parenthesized_ref
sooner, before the NRVO handling.

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv21.C: New test.
---
 gcc/cp/typeck.c  |  9 -
 gcc/testsuite/g++.dg/opt/nrv21.C | 14 ++
 2 files changed, 18 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/opt/nrv21.C

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index dbb2370510c..aa014c3812a 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -10306,7 +10306,10 @@ check_return_expr (tree retval, bool *no_warning)
 
  See finish_function and finalize_nrv for the rest of this optimization.  
*/
   if (retval)
-STRIP_ANY_LOCATION_WRAPPER (retval);
+{
+  retval = maybe_undo_parenthesized_ref (retval);
+  STRIP_ANY_LOCATION_WRAPPER (retval);
+}
 
   bool named_return_value_okay_p = can_do_nrvo_p (retval, functype);
   if (fn_returns_value_p && flag_elide_constructors)
@@ -10340,10 +10343,6 @@ check_return_expr (tree retval, bool *no_warning)
   if (VOID_TYPE_P (functype))
return error_mark_node;
 
-  /* If we had an id-expression obfuscated by force_paren_expr, we need
-to undo it so we can try to treat it as an rvalue below.  */
-  retval = maybe_undo_parenthesized_ref (retval);
-
   if (processing_template_decl)
retval = build_non_dependent_expr (retval);
 
diff --git a/gcc/testsuite/g++.dg/opt/nrv21.C b/gcc/testsuite/g++.dg/opt/nrv21.C
new file mode 100644
index 000..31bff79afc1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/nrv21.C
@@ -0,0 +1,14 @@
+// PR c++/67302
+// { dg-additional-options -fdump-tree-gimple }
+// { dg-final { scan-tree-dump-not " = a" "gimple" } }
+
+struct A
+{
+  int ar[42];
+  A();
+};
+
+A f() {
+  A a;
+  return (a);
+}
-- 
2.32.0.93.g670b81a890



Re: [EXTERNAL] Re: [PATCH] tree-optimization: Optimize division followed by multiply [PR95176]

2021-06-19 Thread Marc Glisse

On Fri, 18 Jun 2021, Richard Biener wrote:


Option 2: Add a new pattern to support scenarios that the existing nop_convert 
pattern bails out on.

Existing pattern:

(simplify
   (minus (nop_convert1? @0) (nop_convert2? (minus (nop_convert3? @@0) @1)))
   (view_convert @1))


I tried to check with a program when

T3 x;
T1 y;
(T2)x-(T2)((T1)x-y)

can be safely replaced with

(T2)y

From the output, it looks like this is safe when T1 is at least as large 
as T2. It is wrong when T1 is unsigned and smaller than T2. And when T1 is 
signed and smaller than T2, it is ok if T3 is the same type as T1 (signed 
then) or has strictly less precision (any sign), and not in other cases.


Note that this is when signed implies undefined overflow and unsigned 
implies wrapping, and I wouldn't put too much faith in this recently 
dusted program. And it doesn't say how to write the match.pd pattern with 
'?', "@@", disabling it if TYPE_OVERFLOW_SANITIZED, etc.


Mostly, I wanted to say that if we are going to go handle more than 
nop_convert for more than just 1 or 2 easy transformations, I think some 
kind of computer verification would be useful, it would save a lot of time 
and headaches.


(I just check by brute force all possible precisions (from 1 to 6) and 
signedness for T1, T2 and T3, all possible values for x and y, compute 
the before and after formulas, accepting if there is UB before, rejecting 
if there is UB after (and not before), and manually try to see a pattern 
in the list of types that work)


--
Marc Glisse


Re: [PATCH] Modula-2 into the GCC tree on master

2021-06-19 Thread Segher Boessenkool
On Sat, Jun 19, 2021 at 09:09:05AM -0500, Segher Boessenkool wrote:
> powerpc64-linux now is building, and is running the tetsuite.  My
> powerpc64le-linux build used --enable-languages=all, but Ada fails to
> build, so I'll redo that without Ada.

For powerpc64le-linux I get

=== gm2 tests ===


Running target unix
FAIL: gm2/pim/fail/TestLong4.mod,  -g  
FAIL: gm2/pim/fail/TestLong4.mod,  -O  
FAIL: gm2/pim/fail/TestLong4.mod,  -O -g  
FAIL: gm2/pim/fail/TestLong4.mod,  -Os  
FAIL: gm2/pim/fail/TestLong4.mod,  -O3 -fomit-frame-pointer  
FAIL: gm2/pim/fail/TestLong4.mod,  -O3 -fomit-frame-pointer -finline-functions  
FAIL: gm2/pimlib/logitech/run/pass/realconv.mod execution,  -g 
FAIL: gm2/pimlib/logitech/run/pass/realconv.mod execution,  -O 
FAIL: gm2/pimlib/logitech/run/pass/realconv.mod execution,  -O -g 
FAIL: gm2/pimlib/logitech/run/pass/realconv.mod execution,  -Os 
FAIL: gm2/pimlib/logitech/run/pass/realconv.mod execution,  -O3 
-fomit-frame-pointer 
FAIL: gm2/pimlib/logitech/run/pass/realconv.mod execution,  -O3 
-fomit-frame-pointer -finline-functions 

=== gm2 Summary ===

# of expected passes11610
# of unexpected failures12

So that is excellent, only two failing tests :-)

For BE there is more:

A whole bunch of testcases fail to build (both 32-bit and 64-bit).  I
don't know yet.

The realconv.mod testcase fails at all optimisation levels (also -O0).

setarith*.mod and setrotate*.mod and setshift*.mod and simple*.mod fail
to build.  Also cardrange*.mod and intrange*.mod and multint*.mod and
realrange*.mod and subrange.mod and cardrange.mod and forcheck.mod.
And the extended-opaque tests.  And more :-)

: error: the file containing the definition module <80><98>M2RTS
<80><99> cannot be found
compiler exited with status 1
output is:
: error: the file containing the definition module <80><98>M2RTS
<80><99> cannot be found

(That is UTF-8 quotation marks, and I do not use an UTF-8 locale there
btw.  That is just a cosmetic problem of course.)

Does this have to do with gm2tools?


Segher


Re: [PATCH] Modula-2 into the GCC tree on master

2021-06-19 Thread Segher Boessenkool
Hi!

On Fri, Jun 18, 2021 at 10:00:40PM +0100, Gaius Mulley wrote:
> Segher Boessenkool  writes:
> > On Thu, Jun 17, 2021 at 11:26:41PM +0100, Gaius Mulley via Gcc-patches 
> > wrote:
> >> Debian Stretch using make -j 4, x86_64 GNU/Linux Debian Stretch built
> >> using make -j 24 and also under x86_64 GNU/Linux Debian Buster using
> >> make -j 4.
> >
> > I am building it on powerpc64-linux (-m32,-m64) and poweerpc64le-linux
> > currently.  (All CentOS 7 fwiw).
> 
> excellent the more varieties the better - I'm eagerly awaiting a risc-v
> motherboard which might also be interesting

I needed a few fixes to get it to build, they are in my branch
().

The files gm2-libs/getopt.def and gm2-libs/GetOpt.def have filenames
that differ case only, this is censored by the scripts that we run on
the Git server.  I renamed the former to cgetopt.def for now, but of
course more changes are needed for this to work at all.

> > It does not want to build gm2tools, haven't investigated that yet
> > either.

Not yet :-)

> > Will report results later.

powerpc64-linux now is building, and is running the tetsuite.  My
powerpc64le-linux build used --enable-languages=all, but Ada fails to
build, so I'll redo that without Ada.

Gaius, could you look through the two patches I did to get the build to
work, see if those are correct or if something better needs to be done?


$(subdir) is an absolute path for me, so ../$(subdir) cannot work.


Maybe your texinfo is less picky than mine, I use an older one (5.1)?


Segher


Re: [Ping^2, Patch, Fortran] PR100337 Should be able to pass non-present optional arguments to CO_BROADCAST

2021-06-19 Thread Andre Vehreschild via Gcc-patches
PING!

On Fri, 4 Jun 2021 18:05:18 +0200
Andre Vehreschild  wrote:

> Ping!
>
> On Fri, 21 May 2021 15:33:11 +0200
> Andre Vehreschild  wrote:
>
> > Hi,
> >
> > the attached patch fixes an issue when calling CO_BROADCAST in
> > -fcoarray=single mode, where the optional but non-present (in the calling
> > scope) stat variable was assigned to before checking for it being not
> > present.
> >
> > Regtests fine on x86-64-linux/f33. Ok for trunk?
> >
> > Regards,
> > Andre
>
>


--
Andre Vehreschild * Email: vehre ad gmx dot de
gcc/fortran/ChangeLog:

	PR fortran/100337
	* trans-intrinsic.c (conv_co_collective): Check stat for null ptr
	before dereferrencing.

gcc/testsuite/ChangeLog:

	PR fortran/100337
	* gfortran.dg/coarray_collectives_17.f90: New test.

diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 4d7451479d3..03a38090051 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -11232,8 +11232,28 @@ conv_co_collective (gfc_code *code)
   if (flag_coarray == GFC_FCOARRAY_SINGLE)
 {
   if (stat != NULL_TREE)
-	gfc_add_modify (, stat,
-			fold_convert (TREE_TYPE (stat), integer_zero_node));
+	{
+	  /* For optional stats, check the pointer is valid before zero'ing.  */
+	  if (gfc_expr_attr (stat_expr).optional)
+	{
+	  tree tmp;
+	  stmtblock_t ass_block;
+	  gfc_start_block (_block);
+	  gfc_add_modify (_block, stat,
+			  fold_convert (TREE_TYPE (stat),
+	integer_zero_node));
+	  tmp = fold_build2 (NE_EXPR, logical_type_node,
+ gfc_build_addr_expr (NULL_TREE, stat),
+ null_pointer_node);
+	  tmp = fold_build3 (COND_EXPR, void_type_node, tmp,
+ gfc_finish_block (_block),
+ build_empty_stmt (input_location));
+	  gfc_add_expr_to_block (, tmp);
+	}
+	  else
+	gfc_add_modify (, stat,
+			fold_convert (TREE_TYPE (stat), integer_zero_node));
+	}
   return gfc_finish_block ();
 }

diff --git a/gcc/testsuite/gfortran.dg/coarray_collectives_17.f90 b/gcc/testsuite/gfortran.dg/coarray_collectives_17.f90
new file mode 100644
index 000..84a6645865e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray_collectives_17.f90
@@ -0,0 +1,42 @@
+! { dg-do run }
+! { dg-options "-fcoarray=single" }
+!
+! PR 100337
+! Test case inspired by code submitted by Brad Richardson
+
+program main
+implicit none
+
+integer, parameter :: MESSAGE = 42
+integer :: result
+
+call myco_broadcast(MESSAGE, result, 1)
+
+if (result /= MESSAGE) error stop 1
+contains
+subroutine myco_broadcast(m, r, source_image, stat, errmsg)
+integer, intent(in) :: m
+integer, intent(out) :: r
+integer, intent(in) :: source_image
+integer, intent(out), optional :: stat
+character(len=*), intent(inout), optional :: errmsg
+
+integer :: data_length
+
+data_length = 1
+
+call co_broadcast(data_length, source_image, stat, errmsg)
+
+if (present(stat)) then
+if (stat /= 0) return
+end if
+
+if (this_image() == source_image) then
+r = m
+end if
+
+call co_broadcast(r, source_image, stat, errmsg)
+end subroutine
+
+end program
+


Re: [Patch, fortran V3] PR fortran/100683 - Array initialization refuses valid

2021-06-19 Thread dhumieres.dominique--- via Gcc-patches

Hi José,

The logic is now much clearer.
OK for the new version. Thanks for the work.

Dominique


Re: [Patch, fortran v2] PR fortran/93308/93963/94327/94331/97046 problems raised by descriptor handling

2021-06-19 Thread dhumieres.dominique--- via Gcc-patches

Le 2021-06-06 19:58, dhumieres.domini...@free.fr a écrit :

Hi José,


Patch tested only on x86_64-pc-linux-gnu.


Also tested on darwin20. The patch is OK for me provided the updated
PR94331.c test file replaces the original one.
Since the PRs are about wrong code, I think the patch should be
backported to at least GCC11 (applied and regtested OK).

Thanks for the work,

Dominique


OK for the new version.

Dominique


Re: [PATCH] Modula-2 into the GCC tree on master

2021-06-19 Thread Matthias Klose
On 6/19/21 9:53 AM, Gaius Mulley wrote:
> Matthias Klose  writes:
> 
>> x86_64-linux-gnu-g++-10 is the compiler used for the bootstrap.  I haven't
>> checked if that is also seen for a normal bootstrap. Apparently it tries to
>> re-bootstrap the compiler.
>>
>> The build is configured with --with-build-config=bootstrap-lto-lean, built 
>> with
>> make profiledbootstrap-lean
> 
> many thanks for the patch - committed.  I've also fixed the make install
> bug (causing the re-bootstrap mentioned above),

the build now fails already in stage1 with

[...]
gm2.a m2/mc-boot-ch/Glibc.o m2/mc-boot-ch/Gmcrts.o libcommon.a
../libcpp/libcpp.a   ../libbacktrace/.libs/libbacktrace.a
../libiberty/libiberty.a ../libdecnumber/libdecnumber.a  -lm
/usr/bin/x86_64-linux-gnu-ld: cannot find libcommon.a: No such file or directory
collect2: error: ld returned 1 exit status

Matthias


Re: [PATCH] Modula-2 into the GCC tree on master

2021-06-19 Thread Gaius Mulley via Gcc-patches
Matthias Klose  writes:

> x86_64-linux-gnu-g++-10 is the compiler used for the bootstrap.  I haven't
> checked if that is also seen for a normal bootstrap. Apparently it tries to
> re-bootstrap the compiler.
>
> The build is configured with --with-build-config=bootstrap-lto-lean, built 
> with
> make profiledbootstrap-lean

many thanks for the patch - committed.  I've also fixed the make install
bug (causing the re-bootstrap mentioned above),




regards,
Gaius


Re: [PATCH] tree-optimization/101014 - Remove poor value computations.

2021-06-19 Thread Richard Biener via Gcc-patches
On June 18, 2021 11:46:08 PM GMT+02:00, Andrew MacLeod  
wrote:
>I am pleased to say that this patch kills the poor value computations
>in 
>the ranger's cache.
>
>Its been a bit of a thorn, and is mostly a hack that was applied early 
>on to enable getting some opportunities that were hard to get
>otherwise.
>
>The more consistent propagation we now do combined with other changes 
>means I can kill this wart on trunk. It even results in a 1% speedup.. 
>and should resolve some of the excessive compile time issues causes by 
>undesirable iteration, including 101014.. for good I hope :-).
>
>I tried turning off the poor_value computations on the GCC11 branch,
>and 
>we may want to consider doing it there too.  In my testsuite, we miss a
>
>total of 3 cases out of 4700 new ones identified by ranger.  For the 
>stability, I'd suggest we turn off poor_value computations there as 
>well.  This patch rips out all the code, but for GCC11 I'd just change 
>push_poor_value to always return false, thus never registering any 
>values. less churn that way. I'll run some tests and post that 
>separately if you think we should go ahead with it.
>
>Bootstraps on 86_64-pc-linux-gnu with no regressions.  pushed.

Nice. I think we should indeed consider mostly syncing the algorithmic changes 
with GCC 11 to make maintenance easier, at least up to 11.2. Now, please leave 
such changes some time to bake on trunk before backporting. 

Thanks, 
Richard. 

>Andrew
>
>The details:
>
>The cache is designed to propagate info without creating NEW info.  Ie,
>
>It cannot query new ranges, cannot set global ranges, etc... only the 
>higher level ranger can do that.  This is how we avoid cycles when 
>iterating..  Ranger says "This set this value to X here" , and the 
>cache's job is to propagate that info around the CFG as needed,
>applying 
>static GORI outgoing ranges along the way.   Only the ranger can
>request 
>/set NEW information.
>
>There were some cases where back edges were missing key bits of info 
>that hadn't been created yet.  The "poor value" approach was a stop-gap
>
>measure until things improve. When the cache is trying to propagate a 
>range, the GORI edge computations sometimes wants a value which is not 
>available.  Under some conditions it is allowed to register this as a 
>"poor value" and continue propagating.  When done, it looks at the poor
>
>value list, and asks the ranger to "go get a new value for this".  If  
>the ranger finds a better value, then this new value is propagated 
>around.  So its a bit of a cheat from the original design. The ranger
>is 
>still the only new-info creator, but the request is sometimes started 
>from the cache. This is not desirable, and can lead to some 
>inconsistencies & inefficiencies.
>
>As one can imagine. this sometimes causes significant iteration, as in 
>this testcase.  Ie, the new ranger request from the cache triggers 
>another poor value request, etc etc.  Which is why it wasn't designed
>to 
>work that way.  Anyway, longer story shorter, I revisited the poor
>value 
>code, and discovered that with the new GORI and fold_using_range rework
>
>from the past month, dropping the poor value code results in *0* 
>difference in any of our test cases/suites.   Further more, the biggest
>
>thing that it really enabled was picking up range restrictions imposed 
>by unvisited statements that were being forced to VARYING.  The new 
>rework allows such statements to simply be folded using the new 
>global_range_query and even so that bit of info is now easily
>captured.  
>That's the enhancement from the second patch.



[PATCH] mips: check MSA support for vector modes [PR100760,PR100761,PR100762]

2021-06-19 Thread Xi Ruoyao via Gcc-patches
Check if the vector mode is really supported by MSA in certain cases,
instead of testing ISA_HAS_MSA.  Simply testing ISA_HAS_MSA can cause
ICE when MSA is enabled besides other MIPS SIMD extensions (notably,
Loongson MMI).

Bootstrapped and tested on mips64el-linux-gnu.  OK to commit?

gcc/

* config/mips/mips.c (mips_const_insns): Use MSA_SUPPORTED_MODE_P
instead of ISA_HAS_MSA.
(mips_expand_vec_unpack): Likewise.
(mips_expand_vector_init): Likewise.

gcc/testsuite/

* testsuite/gcc.target/mips/pr100760.c: New test.
* testsuite/gcc.target/mips/pr100761.c: New test.
* testsuite/gcc.target/mips/pr100762.c: New test.
---
 gcc/config/mips/mips.c   |  6 +++---
 gcc/testsuite/gcc.target/mips/pr100760.c | 10 ++
 gcc/testsuite/gcc.target/mips/pr100761.c | 17 
 gcc/testsuite/gcc.target/mips/pr100762.c | 25 
 4 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/pr100760.c
 create mode 100644 gcc/testsuite/gcc.target/mips/pr100761.c
 create mode 100644 gcc/testsuite/gcc.target/mips/pr100762.c

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 1f1475cf400..00a8eef96aa 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -2879,7 +2879,7 @@ mips_const_insns (rtx x)
   return mips_build_integer (codes, INTVAL (x));
 
 case CONST_VECTOR:
-  if (ISA_HAS_MSA
+  if (MSA_SUPPORTED_MODE_P (GET_MODE (x))
  && mips_const_vector_same_int_p (x, GET_MODE (x), -512, 511))
return 1;
   /* Fall through.  */
@@ -21732,7 +21732,7 @@ mips_expand_vec_unpack (rtx operands[2], bool 
unsigned_p, bool high_p)
   rtx (*cmpFunc) (rtx, rtx, rtx);
   rtx tmp, dest, zero;
 
-  if (ISA_HAS_MSA)
+  if (MSA_SUPPORTED_MODE_P (imode))
 {
   switch (imode)
{
@@ -21994,7 +21994,7 @@ mips_expand_vector_init (rtx target, rtx vals)
all_same = false;
 }
 
-  if (ISA_HAS_MSA)
+  if (MSA_SUPPORTED_MODE_P (vmode))
 {
   if (all_same)
{
diff --git a/gcc/testsuite/gcc.target/mips/pr100760.c 
b/gcc/testsuite/gcc.target/mips/pr100760.c
new file mode 100644
index 000..d715b85e790
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/pr100760.c
@@ -0,0 +1,10 @@
+/* PR target/100760
+   This was triggering an ICE with "maximum number of generated reload
+   insns per insn achieved (90)" when compiled with -mmsa -mloongson-mmi. */
+
+/* { dg-do compile } */
+/* { dg-options "-mmsa -mloongson-mmi" } */
+
+typedef __INT32_TYPE__ int32_t;
+typedef int32_t a __attribute__((__vector_size__(8)));
+void b() { a x = (a){1, 1}; }
diff --git a/gcc/testsuite/gcc.target/mips/pr100761.c 
b/gcc/testsuite/gcc.target/mips/pr100761.c
new file mode 100644
index 000..cc2598ee023
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/pr100761.c
@@ -0,0 +1,17 @@
+/* PR target/100761
+   This was triggering an ICE in mips_expand_vec_unpack when compiled with
+   -mmsa -mloongson-mmi. */
+
+/* { dg-do compile } */
+/* { dg-options "-mmsa -mloongson-mmi" } */
+
+typedef __INT8_TYPE__ int8_t;
+typedef __INT16_TYPE__ int16_t;
+typedef int8_t i8x8 __attribute__((__vector_size__(8)));
+typedef int16_t i16x8 __attribute__((__vector_size__(16)));
+
+i8x8 a;
+
+void f() {
+  i16x8 b = __builtin_convertvector (a, i16x8);
+}
diff --git a/gcc/testsuite/gcc.target/mips/pr100762.c 
b/gcc/testsuite/gcc.target/mips/pr100762.c
new file mode 100644
index 000..89c1185317c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/pr100762.c
@@ -0,0 +1,25 @@
+/* PR target/100762
+   This was triggering an ICE in mips_expand_vector_init when compiled with
+   -mmsa -mloongson-mmi. */
+
+/* { dg-do compile } */
+/* { dg-options "-mmsa -mloongson-mmi" } */
+
+typedef __INT32_TYPE__ int32_t;
+typedef int32_t i32x2 __attribute__((__vector_size__(8)));
+
+i32x2 cmp(i32x2 a, i32x2 b) {
+  return a >= b;
+}
+
+i32x2 shift(i32x2 a, i32x2 b) {
+  return a >> b;
+}
+
+i32x2 mul(i32x2 a, i32x2 b) {
+  return a * b;
+}
+
+i32x2 div(i32x2 a, i32x2 b) {
+  return a / b;
+}
-- 
2.32.0