[pushed] c++: location of lambda object and conversion call

2021-11-14 Thread Jason Merrill via Gcc-patches
Two things that had poor location info: we weren't giving the TARGET_EXPR
for a lambda object any location, and the call to a conversion function was
getting whatever input_location happened to be.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* call.c (perform_implicit_conversion_flags): Use the location of
the argument.
* lambda.c (build_lambda_object): Set location on the TARGET_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/lambda/lambda-switch.C: Adjust expected location.
---
 gcc/cp/call.c | 6 +-
 gcc/cp/lambda.c   | 7 +++
 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-switch.C | 4 ++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 01ac114a62c..4ee21c7bdbd 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -12549,7 +12549,11 @@ perform_implicit_conversion_flags (tree type, tree 
expr,
IMPLICIT_CONV_EXPR_BRACED_INIT (expr) = true;
 }
   else
-expr = convert_like (conv, expr, complain);
+{
+  /* Give a conversion call the same location as expr.  */
+  iloc_sentinel il (loc);
+  expr = convert_like (conv, expr, complain);
+}
 
   /* Free all the conversions we allocated.  */
   obstack_free (_obstack, p);
diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c
index 2e9d38bbe83..f68c68ca16e 100644
--- a/gcc/cp/lambda.c
+++ b/gcc/cp/lambda.c
@@ -57,14 +57,13 @@ build_lambda_object (tree lambda_expr)
  - cp_parser_functional_cast  */
   vec *elts = NULL;
   tree node, expr, type;
-  location_t saved_loc;
 
   if (processing_template_decl || lambda_expr == error_mark_node)
 return lambda_expr;
 
   /* Make sure any error messages refer to the lambda-introducer.  */
-  saved_loc = input_location;
-  input_location = LAMBDA_EXPR_LOCATION (lambda_expr);
+  location_t loc = LAMBDA_EXPR_LOCATION (lambda_expr);
+  iloc_sentinel il (loc);
 
   for (node = LAMBDA_EXPR_CAPTURE_LIST (lambda_expr);
node;
@@ -117,10 +116,10 @@ build_lambda_object (tree lambda_expr)
   type = LAMBDA_EXPR_CLOSURE (lambda_expr);
   CLASSTYPE_NON_AGGREGATE (type) = 0;
   expr = finish_compound_literal (type, expr, tf_warning_or_error);
+  protected_set_expr_location (expr, loc);
   CLASSTYPE_NON_AGGREGATE (type) = 1;
 
  out:
-  input_location = saved_loc;
   return expr;
 }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-switch.C 
b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-switch.C
index d05c9760709..e417967a17e 100644
--- a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-switch.C
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-switch.C
@@ -16,11 +16,11 @@ main ()
  break;// { dg-error "break" }
}
  };
- l = []()
+ l = []()  // { dg-warning "statement will never be 
executed" }
{
case 3: // { dg-error "case" }
  break;// { dg-error "break" }
-   };  // { dg-warning "statement will never be executed" }
+   };
}
 }
 }

base-commit: 37326651b439bac55d96fb5a43f4daf25e401eda
-- 
2.27.0



[pushed] c++: check constexpr constructor body

2021-11-14 Thread Jason Merrill via Gcc-patches
The implicit constexpr patch revealed that our checks for constexpr
constructors that could possibly produce a constant value (which
otherwise are IFNDR) was failing to look at most of the function body.
Fixing that required some library tweaks.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* constexpr.c (maybe_save_constexpr_fundef): Also check whether the
body of a constructor is potentially constant.

libstdc++-v3/ChangeLog:

* src/c++17/memory_resource.cc: Add missing constexpr.
* include/experimental/internet: Only mark copy constructor
as constexpr with __cpp_constexpr_dynamic_alloc.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/constexpr-89285-2.C: Expect error.
* g++.dg/cpp1y/constexpr-89285.C: Adjust error.
---
 gcc/cp/constexpr.c| 27 ++-
 .../g++.dg/cpp1y/constexpr-89285-2.C  |  2 +-
 gcc/testsuite/g++.dg/cpp1y/constexpr-89285.C  |  2 +-
 libstdc++-v3/src/c++17/memory_resource.cc |  2 +-
 libstdc++-v3/include/experimental/internet|  2 ++
 5 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 82a597d7bad..c92db5d413c 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -870,7 +870,9 @@ maybe_save_constexpr_fundef (tree fun)
   || (DECL_CLONED_FUNCTION_P (fun) && !DECL_DELETING_DESTRUCTOR_P (fun)))
 return;
 
-  if (!is_valid_constexpr_fn (fun, !DECL_GENERATED_P (fun)))
+  bool complain = !DECL_GENERATED_P (fun);
+
+  if (!is_valid_constexpr_fn (fun, complain))
 return;
 
   tree massaged = massage_constexpr_body (fun, DECL_SAVED_TREE (fun));
@@ -883,15 +885,26 @@ maybe_save_constexpr_fundef (tree fun)
 }
 
   bool potential = potential_rvalue_constant_expression (massaged);
-  if (!potential && !DECL_GENERATED_P (fun))
+  if (!potential && complain)
 require_potential_rvalue_constant_expression (massaged);
 
-  if (DECL_CONSTRUCTOR_P (fun)
-  && cx_check_missing_mem_inits (DECL_CONTEXT (fun),
-massaged, !DECL_GENERATED_P (fun)))
-potential = false;
+  if (DECL_CONSTRUCTOR_P (fun) && potential)
+{
+  if (cx_check_missing_mem_inits (DECL_CONTEXT (fun),
+ massaged, complain))
+   potential = false;
+  else if (cxx_dialect > cxx11)
+   {
+ /* What we got from massage_constexpr_body is pretty much just the
+ctor-initializer, also check the body.  */
+ massaged = DECL_SAVED_TREE (fun);
+ potential = potential_rvalue_constant_expression (massaged);
+ if (!potential && complain)
+   require_potential_rvalue_constant_expression (massaged);
+   }
+}
 
-  if (!potential && !DECL_GENERATED_P (fun))
+  if (!potential && complain)
 return;
 
   constexpr_fundef entry = {fun, NULL_TREE, NULL_TREE, NULL_TREE};
diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-89285-2.C 
b/gcc/testsuite/g++.dg/cpp1y/constexpr-89285-2.C
index 656bc9cb7f1..ea44daa849e 100644
--- a/gcc/testsuite/g++.dg/cpp1y/constexpr-89285-2.C
+++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-89285-2.C
@@ -10,7 +10,7 @@ struct B {
 int *c = >a;
 while (*c)
   c = reinterpret_cast((reinterpret_cast(c) + *c));
-*c = reinterpret_cast(this) - reinterpret_cast(c);
+*c = reinterpret_cast(this) - reinterpret_cast(c); // { 
dg-error "reinterpret_cast" }
   }
 };
 struct C : A {
diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-89285.C 
b/gcc/testsuite/g++.dg/cpp1y/constexpr-89285.C
index 3809e1f7a9f..26aab9b6a50 100644
--- a/gcc/testsuite/g++.dg/cpp1y/constexpr-89285.C
+++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-89285.C
@@ -17,4 +17,4 @@ struct C : A {
   B bar {this};
 };
 
-constexpr C foo {};// { dg-message "expansion of" }
+constexpr C foo {};// { dg-message "" }
diff --git a/libstdc++-v3/src/c++17/memory_resource.cc 
b/libstdc++-v3/src/c++17/memory_resource.cc
index 1ba79903f87..9fc3bb754c1 100644
--- a/libstdc++-v3/src/c++17/memory_resource.cc
+++ b/libstdc++-v3/src/c++17/memory_resource.cc
@@ -603,7 +603,7 @@ namespace pmr
 void* pointer = nullptr;
 aligned_size _M_size;
 
-size_t size() const noexcept
+constexpr size_t size() const noexcept
 {
   if (_M_size.value == size_t(-1)) [[unlikely]]
return size_t(-1);
diff --git a/libstdc++-v3/include/experimental/internet 
b/libstdc++-v3/include/experimental/internet
index 65c97de07d9..95b8cdc9963 100644
--- a/libstdc++-v3/include/experimental/internet
+++ b/libstdc++-v3/include/experimental/internet
@@ -460,7 +460,9 @@ namespace ip
 // constructors:
 constexpr address() noexcept : _M_v4(), _M_is_v4(true) { }
 
+#if __cpp_constexpr_dynamic_alloc
 constexpr
+#endif
 address(const address& __a) noexcept : _M_uninit(), _M_is_v4(__a._M_is_v4)
 {
   if (_M_is_v4)

base-commit: bd95d75f3412e1a7debab7c6c602ba409f274eb5
prerequisite-patch-id: 

[pushed] c++: is_this_parameter and coroutines proxies

2021-11-14 Thread Jason Merrill via Gcc-patches
Compiling coroutines/pr95736.C with the implicit constexpr patch broke
because is_this_parameter didn't recognize the coroutines proxy for 'this'.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* semantics.c (is_this_parameter): Check DECL_HAS_VALUE_EXPR_P
instead of is_capture_proxy.
---
 gcc/cp/semantics.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 60e0982cc48..15404426bce 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -11382,7 +11382,8 @@ is_this_parameter (tree t)
 {
   if (!DECL_P (t) || DECL_NAME (t) != this_identifier)
 return false;
-  gcc_assert (TREE_CODE (t) == PARM_DECL || is_capture_proxy (t)
+  gcc_assert (TREE_CODE (t) == PARM_DECL
+ || (TREE_CODE (t) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (t))
  || (cp_binding_oracle && TREE_CODE (t) == VAR_DECL));
   return true;
 }

base-commit: bd95d75f3412e1a7debab7c6c602ba409f274eb5
-- 
2.27.0



[pushed] c++: c++20 constexpr default ctor and array init

2021-11-14 Thread Jason Merrill via Gcc-patches
The implicit constexpr patch revealed that marking the constructor in the
PR70690 testcase as constexpr made the bug reappear, because build_vec_init
assumed that a constexpr default constructor initialized the whole object,
so it was equivalent to value-initialization.  But this is no longer true in
C++20.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR c++/70690

gcc/cp/ChangeLog:

* init.c (build_vec_init): Check default_init_uninitialized_part in
C++20.

gcc/testsuite/ChangeLog:

* g++.dg/init/array41a.C: New test.
---
 gcc/cp/init.c|  7 +--
 gcc/testsuite/g++.dg/init/array41a.C | 27 +++
 2 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/init/array41a.C

diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index 771a19bc402..3ba2e3bbe04 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -4470,11 +4470,14 @@ build_vec_init (tree base, tree maxindex, tree init,
 
  We do need to keep going if we're copying an array.  */
 
-  if (try_const && !init)
+  if (try_const && !init
+  && (cxx_dialect < cxx20
+ || !default_init_uninitialized_part (inner_elt_type)))
 /* With a constexpr default constructor, which we checked for when
setting try_const above, default-initialization is equivalent to
value-initialization, and build_value_init gives us something more
-   friendly to maybe_constant_init.  */
+   friendly to maybe_constant_init.  Except in C++20 and up a constexpr
+   constructor need not initialize all the members.  */
 explicit_value_init_p = true;
   if (from_array
   || ((type_build_ctor_call (type) || init || explicit_value_init_p)
diff --git a/gcc/testsuite/g++.dg/init/array41a.C 
b/gcc/testsuite/g++.dg/init/array41a.C
new file mode 100644
index 000..aa9fdc629f2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/array41a.C
@@ -0,0 +1,27 @@
+// PR c++/70690
+// { dg-do run { target c++11 } }
+
+struct A {
+  constexpr A() {}
+};
+
+struct APadded : public A {
+  char pad[63];
+};
+
+int f();
+int i = f();
+APadded cache[50];
+APadded *p = cache;
+
+int f()
+{
+  cache[0].pad[0] = 42;
+  return 1;
+}
+
+int main()
+{
+  if (cache[0].pad[0] != 42)
+__builtin_abort();
+}

base-commit: 4df7f8c79835d56928f51f9e674d326300936e8e
-- 
2.27.0



[pushed] c++: don't do constexpr folding in unevaluated context

2021-11-14 Thread Jason Merrill via Gcc-patches
The implicit constexpr patch revealed that we were doing constant evaluation
of arbitrary expressions in unevaluated contexts, leading to failure when we
tried to evaluate e.g. a call to declval.  This is wrong more generally;
only manifestly-constant-evaluated expressions should be evaluated within
an unevaluated operand.

Making this change revealed a case we were failing to mark as manifestly
constant-evaluated.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* constexpr.c (maybe_constant_value): Don't evaluate
in an unevaluated operand unless manifestly const-evaluated.
(fold_non_dependent_expr_template): Likewise.
* decl.c (compute_array_index_type_loc): This context is
manifestly constant-evaluated.
---
 gcc/cp/constexpr.c | 7 +++
 gcc/cp/decl.c  | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 7c27131f506..82a597d7bad 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -7696,6 +7696,10 @@ maybe_constant_value (tree t, tree decl, bool 
manifestly_const_eval)
   return r;
 }
 
+  /* Don't evaluate an unevaluated operand.  */
+  if (cp_unevaluated_operand)
+return t;
+
   uid_sensitive_constexpr_evaluation_checker c;
   r = cxx_eval_outermost_constant_expr (t, true, true, false, false, decl);
   gcc_checking_assert (r == t
@@ -7759,6 +7763,9 @@ fold_non_dependent_expr_template (tree t, tsubst_flags_t 
complain,
  return t;
}
 
+  if (cp_unevaluated_operand && !manifestly_const_eval)
+   return t;
+
   tree r = cxx_eval_outermost_constant_expr (t, true, true,
 manifestly_const_eval,
 false, object);
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 947bbfc6637..eed478199ea 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -11000,7 +11000,7 @@ compute_array_index_type_loc (location_t name_loc, tree 
name, tree size,
cp_convert (ssizetype, integer_one_node,
complain),
complain);
-   itype = maybe_constant_value (itype);
+   itype = maybe_constant_value (itype, NULL_TREE, true);
   }
 
   if (!TREE_CONSTANT (itype))

base-commit: 267318a2857a42922c3ca033dac4690172b17683
-- 
2.27.0



[pushed] c++: constexpr virtual and vbase thunk

2021-11-14 Thread Jason Merrill via Gcc-patches
C++20 allows virtual functions to be constexpr.  I don't think that calling
through a pointer to a vbase subobject is supposed to work in a constant
expression, since an object with virtual bases can't be constant, but the
call shouldn't ICE.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* constexpr.c (cxx_eval_thunk_call): Error instead of ICE
on vbase thunk to constexpr function.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/constexpr-virtual20.C: New test.
---
 gcc/cp/constexpr.c| 15 -
 .../g++.dg/cpp2a/constexpr-virtual20.C| 22 +++
 2 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-virtual20.C

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 453007c686b..7c27131f506 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -2246,15 +2246,20 @@ cxx_eval_thunk_call (const constexpr_ctx *ctx, tree t, 
tree thunk_fndecl,
 {
   tree function = THUNK_TARGET (thunk_fndecl);
 
-  /* virtual_offset is only set in the presence of virtual bases, which make
- the class non-literal, so we don't need to handle it here.  */
   if (THUNK_VIRTUAL_OFFSET (thunk_fndecl))
 {
-  gcc_assert (!DECL_DECLARED_CONSTEXPR_P (function));
   if (!ctx->quiet)
{
- error ("call to non-% function %qD", function);
- explain_invalid_constexpr_fn (function);
+ if (!DECL_DECLARED_CONSTEXPR_P (function))
+   {
+ error ("call to non-% function %qD", function);
+ explain_invalid_constexpr_fn (function);
+   }
+ else
+   /* virtual_offset is only set for virtual bases, which make the
+  class non-literal, so we don't need to handle it here.  */
+   error ("calling constexpr member function %qD through virtual "
+  "base subobject", function);
}
   *non_constant_p = true;
   return t;
diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual20.C 
b/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual20.C
new file mode 100644
index 000..3c411fa3fcc
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual20.C
@@ -0,0 +1,22 @@
+// Test for constexpr call through vbase thunk.
+// { dg-do compile { target c++20 } }
+
+class Rep {
+public:
+  constexpr virtual int foo() { return 1; }
+};
+
+class VBase {
+public:
+  constexpr virtual int foo() { return 2; }
+};
+
+class Main : public Rep, virtual public VBase {
+public:
+  constexpr virtual int foo() { return 5; }
+};
+
+int main() {
+  Main m;
+  static_assert(static_cast()->foo() == 5); // { dg-error "" }
+}

base-commit: adcfd2c45c3523d74279b5fcac1d7c6c34dd1382
-- 
2.27.0



Re: [PATCH 1/2] Add cumulative_args_t variants of TARGET_FUNCTION_ROUND_BOUNDARY and friends

2021-11-14 Thread Richard Biener via Gcc-patches
On Sat, Nov 13, 2021 at 10:43 AM Maxim Blinov  wrote:
>
> The two target hooks responsible for informing GCC about stack
> parameter alignment are `TARGET_FUNCTION_ARG_BOUNDARY` and
> `TARGET_FUNCTION_ARG_ROUND_BOUNDARY`, which currently only consider
> the tree and machine_mode of a specific given argument.
>
> Create two new target hooks suffixed with '_CA', and pass in a third
> `cumulative_args_t` parameter. This enables the backend to make
> alignment decisions based on the context of the whole function rather
> than individual parameters.
>
> The orignal machine_mode/tree type macros are not removed - they are
> called by the default implementations of `TARGET_...BOUNDARY_CA` and
> `TARGET_...ROUND_BOUNDARY_CA`. This is done with the intetnion of
> avoiding large mechanical modifications of nearly every backend in
> GCC. There is also a new flag, -fstack-use-cumulative-args, which
> provides a way to completely bypass the new `..._CA` macros. This
> feature is intended for debugging GCC itself.

Just two quick comments without looking at the patch.

Please do not introduce options in the user namespace -f... which are
for debugging only.  I think you should go without this part instead.

Second, you fail to motivate the change.  I cannot make sense of
"This enables the backend to make alignment decisions based on the
context of the whole function rather than individual parameters."

Richard.

>
> gcc/ChangeLog:
>
> * calls.c (initialize_argument_information): Pass `args_so_far`.
> * common.opt: New flag `-fstack-use-cumulative-args`.
> * config.gcc: No platforms currently use ..._CA-hooks: Set
> -fstack-use-cumulative-args to be off by default.
> * target.h (cumulative_args_t): Move declaration from here, to...
> * cumulative-args.h (cumulative_args_t): ...this new file. This is
> to permit backends to include the declaration of cumulative_args_t
> without dragging in circular dependencies.
> * function.c (assign_parm_find_entry_rtl): Provide
> cumulative_args_t to locate_and_pad_parm.
> (gimplify_parameters): Ditto.
> (locate_and_pad_parm): Conditionally call new hooks if we're
> invoked with -fstack-use-cumulative-args.
> * function.h: Include cumulative-args.h.
> (locate_and_pad_parm): Add cumulative_args_t parameter.
> * target.def (function_arg_boundary_ca): Add.
> (function_arg_round_boundary_ca): Ditto.
> * targhooks.c (default_function_arg_boundary_ca): Implement.
> (default_function_arg_round_boundary_ca): Ditto.
> * targhooks.h (default_function_arg_boundary_ca): Declare.
> (default_function_arg_round_boundary_ca): Ditto.
> * doc/invoke.texi (-fstack-use-cumulative-args): Document.
> * doc/tm.texi: Regenerate.
> * doc/tm.texi.in: Ditto.
> ---
>  gcc/calls.c   |  3 +++
>  gcc/common.opt|  4 
>  gcc/config.gcc|  7 +++
>  gcc/cumulative-args.h | 20 
>  gcc/doc/invoke.texi   | 12 
>  gcc/doc/tm.texi   | 20 
>  gcc/doc/tm.texi.in|  4 
>  gcc/function.c| 25 +
>  gcc/function.h|  2 ++
>  gcc/target.def| 24 
>  gcc/target.h  | 17 +
>  gcc/targhooks.c   | 16 
>  gcc/targhooks.h   |  6 ++
>  13 files changed, 140 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/cumulative-args.h
>
> diff --git a/gcc/calls.c b/gcc/calls.c
> index 27b59f26ad3..cef612a6ef4 100644
> --- a/gcc/calls.c
> +++ b/gcc/calls.c
> @@ -1527,6 +1527,7 @@ initialize_argument_information (int num_actuals 
> ATTRIBUTE_UNUSED,
>  #endif
>  reg_parm_stack_space,
>  args[i].pass_on_stack ? 0 : args[i].partial,
> +args_so_far,
>  fndecl, args_size, [i].locate);
>  #ifdef BLOCK_REG_PADDING
>else
> @@ -4205,6 +4206,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
> value,
>argvec[count].reg != 0,
>  #endif
>reg_parm_stack_space, 0,
> +  args_so_far,
>NULL_TREE, _size, [count].locate);
>
>if (argvec[count].reg == 0 || argvec[count].partial != 0
> @@ -4296,6 +4298,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
> value,
>argvec[count].reg != 0,
>  #endif
>reg_parm_stack_space, argvec[count].partial,
> +  args_so_far,
>NULL_TREE, _size, [count].locate);
>   args_size.constant += argvec[count].locate.size.constant;
>   gcc_assert (!argvec[count].locate.size.var);
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 

Re: [PATCH 2/6] Add returns_zero_on_success/failure attributes

2021-11-14 Thread Prathamesh Kulkarni via Gcc-patches
On Sun, 14 Nov 2021 at 02:07, David Malcolm via Gcc-patches
 wrote:
>
> This patch adds two new attributes.  The followup patch makes use of
> the attributes in -fanalyzer.
>
> gcc/c-family/ChangeLog:
> * c-attribs.c (attr_noreturn_exclusions): Add
> "returns_zero_on_failure" and "returns_zero_on_success".
> (attr_returns_twice_exclusions): Likewise.
> (attr_returns_zero_on_exclusions): New.
> (c_common_attribute_table): Add "returns_zero_on_failure" and
> "returns_zero_on_success".
> (handle_returns_zero_on_attributes): New.
>
> gcc/ChangeLog:
> * doc/extend.texi (Common Function Attributes): Document
> "returns_zero_on_failure" and "returns_zero_on_success".
>
> gcc/testsuite/ChangeLog:
> * c-c++-common/attr-returns-zero-on-1.c: New test.
>
> Signed-off-by: David Malcolm 
> ---
>  gcc/c-family/c-attribs.c  | 37 ++
>  gcc/doc/extend.texi   | 16 +
>  .../c-c++-common/attr-returns-zero-on-1.c | 68 +++
>  3 files changed, 121 insertions(+)
>  create mode 100644 gcc/testsuite/c-c++-common/attr-returns-zero-on-1.c
>
> diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
> index 100c2dabab2..9e03156de5e 100644
> --- a/gcc/c-family/c-attribs.c
> +++ b/gcc/c-family/c-attribs.c
> @@ -153,6 +153,7 @@ static tree handle_argspec_attribute (tree *, tree, tree, 
> int, bool *);
>  static tree handle_fnspec_attribute (tree *, tree, tree, int, bool *);
>  static tree handle_warn_unused_attribute (tree *, tree, tree, int, bool *);
>  static tree handle_returns_nonnull_attribute (tree *, tree, tree, int, bool 
> *);
> +static tree handle_returns_zero_on_attributes (tree *, tree, tree, int, bool 
> *);
>  static tree handle_omp_declare_simd_attribute (tree *, tree, tree, int,
>bool *);
>  static tree handle_omp_declare_variant_attribute (tree *, tree, tree, int,
> @@ -221,6 +222,8 @@ extern const struct attribute_spec::exclusions 
> attr_noreturn_exclusions[] =
>ATTR_EXCL ("pure", true, true, true),
>ATTR_EXCL ("returns_twice", true, true, true),
>ATTR_EXCL ("warn_unused_result", true, true, true),
> +  ATTR_EXCL ("returns_zero_on_failure", true, true, true),
> +  ATTR_EXCL ("returns_zero_on_success", true, true, true),
>ATTR_EXCL (NULL, false, false, false),
>  };
>
> @@ -235,6 +238,8 @@ attr_warn_unused_result_exclusions[] =
>  static const struct attribute_spec::exclusions 
> attr_returns_twice_exclusions[] =
>  {
>ATTR_EXCL ("noreturn", true, true, true),
> +  ATTR_EXCL ("returns_zero_on_failure", true, true, true),
> +  ATTR_EXCL ("returns_zero_on_success", true, true, true),
>ATTR_EXCL (NULL, false, false, false),
>  };
>
> @@ -275,6 +280,16 @@ static const struct attribute_spec::exclusions 
> attr_stack_protect_exclusions[] =
>ATTR_EXCL (NULL, false, false, false),
>  };
>
> +/* Exclusions that apply to the returns_zero_on_* attributes.  */
> +static const struct attribute_spec::exclusions
> +  attr_returns_zero_on_exclusions[] =
> +{
> +  ATTR_EXCL ("noreturn", true, true, true),
> +  ATTR_EXCL ("returns_twice", true, true, true),
> +  ATTR_EXCL ("returns_zero_on_failure", true, true, true),
> +  ATTR_EXCL ("returns_zero_on_success", true, true, true),
> +  ATTR_EXCL (NULL, false, false, false),
> +};
>
>  /* Table of machine-independent attributes common to all C-like languages.
>
> @@ -493,6 +508,12 @@ const struct attribute_spec c_common_attribute_table[] =
>   handle_warn_unused_attribute, NULL },
>{ "returns_nonnull",0, 0, false, true, true, false,
>   handle_returns_nonnull_attribute, NULL },
> +  { "returns_zero_on_failure",0, 0, false, true, true, false,
> + handle_returns_zero_on_attributes,
> + attr_returns_zero_on_exclusions },
> +  { "returns_zero_on_success",0, 0, false, true, true, false,
> + handle_returns_zero_on_attributes,
> + attr_returns_zero_on_exclusions },
>{ "omp declare simd",   0, -1, true,  false, false, false,
>   handle_omp_declare_simd_attribute, NULL },
>{ "omp declare variant base", 0, -1, true,  false, false, false,
> @@ -5660,6 +5681,22 @@ handle_returns_nonnull_attribute (tree *node, tree 
> name, tree, int,
>return NULL_TREE;
>  }
>
> +/* Handle "returns_zero_on_failure" and "returns_zero_on_success" attributes;
> +   arguments as in struct attribute_spec.handler.  */
> +
> +static tree
> +handle_returns_zero_on_attributes (tree *node, tree name, tree, int,
> +  bool *no_add_attrs)
> +{
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (*node)))
> +{
> +  error ("%qE attribute on a function not returning an integral type",
> +name);
> +  *no_add_attrs = true;
> +}
> +  

[Committed] gcc.dg/uninit-pred-9_b.c: Correct last adjustment for cris-elf

2021-11-14 Thread Hans-Peter Nilsson via Gcc-patches
The change at r12-4790 should have done the same change for
CRIS as was done for powerpc64*-*-*.  (Probably MMIX too but
that may have to wait until the next weekend.)

Committed.

gcc/testsuite:
* gcc.dg/uninit-pred-9_b.c: Correct last adjustment, for CRIS.
---
 gcc/testsuite/gcc.dg/uninit-pred-9_b.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/uninit-pred-9_b.c 
b/gcc/testsuite/gcc.dg/uninit-pred-9_b.c
index 552ab8b4ff4e..53c4a5399eaa 100644
--- a/gcc/testsuite/gcc.dg/uninit-pred-9_b.c
+++ b/gcc/testsuite/gcc.dg/uninit-pred-9_b.c
@@ -17,10 +17,10 @@ int foo (int n, int l, int m, int r)
 
   if (l > 100)
 if ( (n <= 9) &&  (m < 100)  && (r < 19) )
-  blah(v); /* { dg-bogus "uninitialized" "bogus warning" { xfail 
powerpc64*-*-* } } */
+  blah(v); /* { dg-bogus "uninitialized" "bogus warning" { xfail 
powerpc64*-*-* cris-*-* } } */
 
   if ( (n <= 8) &&  (m < 99)  && (r < 19) )
-  blah(v); /* { dg-bogus "uninitialized" "pr101674" { xfail mmix-*-* 
cris-*-* } } */
+  blah(v); /* { dg-bogus "uninitialized" "pr101674" { xfail mmix-*-* } } */
 
   return 0;
 }
-- 
2.11.0

brgds, H-P


Re: [PATCH] Enhance unordered container merge

2021-11-14 Thread François Dumont via Gcc-patches

On 15/11/21 12:25 am, Jonathan Wakely wrote:

On Sun, 14 Nov 2021 at 13:31, François Dumont via Libstdc++
 wrote:

  libstdc++: Unordered containers merge re-use hash code.

  When merging between 2 unordered containers with same hasher we can
re-use
  the cached hash code if any.

Instead of introducing the _ReuseOrComputeHash type, wouldn't it be
simpler to just overload _M_hash_code?


 // Same hash function, use the cached hash code.
 __hash_code
 _M_hash_code(const _Hash&,
 const _Hash_node_value<_Value, true>& __n) const
 { return __n._M_hash_code; }

   // Compute hash code using a different hash function, _H2
   template
__hash_code
_M_hash_code(const _H2&,
const _Hash_node_value<_Value, __cache_hash_code>& __n) const
{ return this->_M_hash_code(_ExtractKey{}(__n._M_v()); }

The first overload is more specialized, so will be chosen when the
first argument is the same type as _Hash and the cache_has_code
boolean is true.


Yes, it is simpler.

Ok to commit ?

François


diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 0e949d73614..6e2d4c10cfe 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -1076,7 +1076,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 	  auto __pos = __i++;
 	  const key_type& __k = _ExtractKey{}(*__pos);
-	  __hash_code __code = this->_M_hash_code(__k);
+	  __hash_code __code
+		= this->_M_hash_code(__src.hash_function(), *__pos._M_cur);
 	  size_type __bkt = _M_bucket_index(__code);
 	  if (_M_find_node(__bkt, __k, __code) == nullptr)
 		{
@@ -1099,14 +1100,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  node_type>, "Node types are compatible");
 	  __glibcxx_assert(get_allocator() == __src.get_allocator());
 
+	  __node_ptr __hint = nullptr;
 	  this->reserve(size() + __src.size());
 	  for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;)
 	{
 	  auto __pos = __i++;
-	  const key_type& __k = _ExtractKey{}(*__pos);
-	  __hash_code __code = this->_M_hash_code(__k);
+	  __hash_code __code
+		= this->_M_hash_code(__src.hash_function(), *__pos._M_cur);
 	  auto __nh = __src.extract(__pos);
-	  _M_insert_multi_node(nullptr, __code, __nh._M_ptr);
+	  __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur;
 	  __nh._M_ptr = nullptr;
 	}
 	}
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index c0295b75963..0b5443fc187 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -1250,6 +1250,19 @@ namespace __detail
 	  return _M_hash()(__k);
 	}
 
+  __hash_code
+  _M_hash_code(const _Hash&,
+		   const _Hash_node_value<_Value, true>& __n) const
+  { return __n._M_hash_code; }
+
+  // Compute hash code using _Hash as __n _M_hash_code, if present, was
+  // computed using _H2.
+  template
+	__hash_code
+	_M_hash_code(const _H2&,
+		const _Hash_node_value<_Value, __cache_hash_code>& __n) const
+	{ return _M_hash_code(_ExtractKey{}(__n._M_v())); }
+
   std::size_t
   _M_bucket_index(__hash_code __c, std::size_t __bkt_count) const
   { return _RangeHash{}(__c, __bkt_count); }
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
index 1ed2ce234a1..07b8a344169 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
@@ -17,6 +17,7 @@
 
 // { dg-do run { target c++17 } }
 
+#include 
 #include 
 #include 
 #include 
@@ -105,6 +106,26 @@ test04()
   VERIFY( c2.empty() );
 }
 
+void
+test05()
+{
+  const std::unordered_multiset c0{ "abcd", "abcd", "efgh", "efgh", "ijkl", "ijkl" };
+  std::unordered_multiset c1 = c0;
+  std::unordered_set c2( c0.begin(), c0.end() );
+
+  c1.merge(c2);
+  VERIFY( c1.size() == (1.5 * c0.size()) );
+  for (auto& i : c1)
+VERIFY( c1.count(i) == (1.5 * c0.count(i)) );
+  VERIFY( c2.empty() );
+
+  c1.clear();
+  c2.insert( c0.begin(), c0.end() );
+  c1.merge(std::move(c2));
+  VERIFY( c1.size() == (0.5 * c0.size()) );
+  VERIFY( c2.empty() );
+}
+
 int
 main()
 {
@@ -112,4 +133,5 @@ main()
   test02();
   test03();
   test04();
+  test05();
 }
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
index c9c8a60fd54..0e184b10c60 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
@@ -17,6 +17,7 @@
 
 // { dg-do run { target c++17 } }
 
+#include 
 #include 
 #include 
 #include 
@@ -125,10 +126,52 @@ test03()
   VERIFY( c2.empty() );
 }
 
+void
+test04()
+{
+  

[committed] VAX: Implement the `-mlra' command-line option

2021-11-14 Thread Maciej W. Rozycki
Add the the `-mlra' command-line option for the VAX target, with the 
usual semantics of enabling Local Register Allocation, off by default.

LRA remains unstable with the VAX target, with numerous ICEs throughout 
the testsuite and worse code produced overall where successful, however 
the presence of a command line option to enable it makes it easier to 
experiment with it as the compiler does not have to be rebuilt to flip 
between the old reload and LRA.

gcc/
* config/vax/vax.c (vax_lra_p): New prototype and function.
(TARGET_LRA_P): Wire it.
* config/vax/vax.opt (mlra): New option.
* doc/invoke.texi (Option Summary, VAX Options): Document the 
new option.
---
 gcc/config/vax/vax.c   |   11 ++-
 gcc/config/vax/vax.opt |4 
 gcc/doc/invoke.texi|9 -
 3 files changed, 22 insertions(+), 2 deletions(-)

gcc-vax-mlra.diff
Index: gcc/gcc/config/vax/vax.c
===
--- gcc.orig/gcc/config/vax/vax.c
+++ gcc/gcc/config/vax/vax.c
@@ -62,6 +62,7 @@ static rtx vax_function_arg (cumulative_
 static void vax_function_arg_advance (cumulative_args_t,
  const function_arg_info &);
 static rtx vax_struct_value_rtx (tree, int);
+static bool vax_lra_p (void);
 static void vax_asm_trampoline_template (FILE *);
 static void vax_trampoline_init (rtx, tree, rtx);
 static poly_int64 vax_return_pops_args (tree, tree, poly_int64);
@@ -114,7 +115,7 @@ static HOST_WIDE_INT vax_starting_frame_
 #define TARGET_STRUCT_VALUE_RTX vax_struct_value_rtx
 
 #undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
+#define TARGET_LRA_P vax_lra_p
 
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P vax_legitimate_address_p
@@ -1221,6 +1222,14 @@ vax_struct_value_rtx (tree fntype ATTRIB
   return gen_rtx_REG (Pmode, VAX_STRUCT_VALUE_REGNUM);
 }
 
+/* Return true if we use LRA instead of reload pass.  */
+
+static bool
+vax_lra_p (void)
+{
+  return TARGET_LRA;
+}
+
 /* Output integer move instructions.  */
 
 bool
Index: gcc/gcc/config/vax/vax.opt
===
--- gcc.orig/gcc/config/vax/vax.opt
+++ gcc/gcc/config/vax/vax.opt
@@ -42,6 +42,10 @@ munix
 Target RejectNegative Mask(UNIX_ASM)
 Generate code for UNIX assembler.
 
+mlra
+Target Mask(LRA)
+Enable Local Register Allocation.
+
 mvaxc-alignment
 Target RejectNegative Mask(VAXC_ALIGNMENT)
 Use VAXC structure conventions.
Index: gcc/gcc/doc/invoke.texi
===
--- gcc.orig/gcc/doc/invoke.texi
+++ gcc/gcc/doc/invoke.texi
@@ -1364,7 +1364,7 @@ See RS/6000 and PowerPC Options.
 -mbig-switch}
 
 @emph{VAX Options}
-@gccoptlist{-mg  -mgnu  -munix}
+@gccoptlist{-mg  -mgnu  -munix  -mlra}
 
 @emph{Visium Options}
 @gccoptlist{-mdebug  -msim  -mfpu  -mno-fpu  -mhard-float  -msoft-float @gol
@@ -30523,6 +30523,13 @@ GNU assembler is being used.
 @item -mg
 @opindex mg
 Output code for G-format floating-point numbers instead of D-format.
+
+@item -mlra
+@itemx -mno-lra
+@opindex mlra
+@opindex mno-lra
+Enable Local Register Allocation.  This is still experimental for the VAX,
+so by default the compiler uses standard reload.
 @end table
 
 @node Visium Options


Re: [PATCH v2] Check optab before transforming atomic bit test and operations

2021-11-14 Thread Hongtao Liu via Gcc-patches
On Mon, Nov 15, 2021 at 9:37 AM Hongtao Liu  wrote:
>
> On Sat, Nov 13, 2021 at 2:21 AM H.J. Lu via Gcc-patches
>  wrote:
> >
> > On Fri, Nov 12, 2021 at 8:13 AM Jakub Jelinek  wrote:
> > >
> > > On Fri, Nov 12, 2021 at 07:55:26AM -0800, H.J. Lu wrote:
> > > > > I have following patch queued for testing for this...
> > > > >
> > > > > 2021-11-12  Jakub Jelinek  
> > > > >
> > > > > PR target/103205
> > > > > * config/i386/sync.md (atomic_bit_test_and_set,
> > > > > atomic_bit_test_and_complement,
> > > > > atomic_bit_test_and_reset): Use OPTAB_WIDEN instead of
> > > > > OPTAB_DIRECT.
> > > > >
> > > > > * gcc.target/i386/pr103205.c: New test.
> > > >
> > > > Can you include my tests?  Or you can leave out your test and I can 
> > > > check
> > > > in my tests after your fix has been checked in.
> > >
> > > I'd prefer the latter.
> > >
> >
> > Here is the v2 patch on top of yours.
> >
> > --
> > H.J.
>
> It looks like there're many return; in if (!bit) branch, the those
> return should never be executed since they're already guarded by
> gimple_nop_atomic_bit_test_and_p, So we'd better also remove those
> return;
It looks like those return are used for  (rhs_code == BIT_AND_EXPR)
which doesn't change gimple before, so nevermind.
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao


Re: [PATCH v2] Check optab before transforming atomic bit test and operations

2021-11-14 Thread Hongtao Liu via Gcc-patches
On Sat, Nov 13, 2021 at 2:21 AM H.J. Lu via Gcc-patches
 wrote:
>
> On Fri, Nov 12, 2021 at 8:13 AM Jakub Jelinek  wrote:
> >
> > On Fri, Nov 12, 2021 at 07:55:26AM -0800, H.J. Lu wrote:
> > > > I have following patch queued for testing for this...
> > > >
> > > > 2021-11-12  Jakub Jelinek  
> > > >
> > > > PR target/103205
> > > > * config/i386/sync.md (atomic_bit_test_and_set,
> > > > atomic_bit_test_and_complement,
> > > > atomic_bit_test_and_reset): Use OPTAB_WIDEN instead of
> > > > OPTAB_DIRECT.
> > > >
> > > > * gcc.target/i386/pr103205.c: New test.
> > >
> > > Can you include my tests?  Or you can leave out your test and I can check
> > > in my tests after your fix has been checked in.
> >
> > I'd prefer the latter.
> >
>
> Here is the v2 patch on top of yours.
>
> --
> H.J.

It looks like there're many return; in if (!bit) branch, the those
return should never be executed since they're already guarded by
gimple_nop_atomic_bit_test_and_p, So we'd better also remove those
return;

-- 
BR,
Hongtao


[PATCH v5] c++: Add gnu::diagnose_as attribute

2021-11-14 Thread Matthias Kretz
Sorry for taking so long. I hope we can still get this done for GCC 12.

One open question: If we change std::__cxx11::basic_string to 
std::string with this feature, should DWARF strings change or not? I.e. should 
diagnose_as be conditional on (pp->flags & pp_c_flag_gnu_v3)? If these strings 
are only for user consumption, I think the DWARF strings should be affected by 
the attribute...

Oh, and note that the current patch depends on the "c++: Print function 
template parms when relevant" patch I sent on Nov 8th.

On Wednesday, 8 September 2021 04:21:51 CEST Jason Merrill wrote:
> On 7/23/21 4:58 AM, Matthias Kretz wrote:
> > gcc/cp/ChangeLog:
> >  PR c++/89370
> >  * cp-tree.h: Add is_alias_template_p declaration.
> >  * decl2.c (is_alias_template_p): New function. Determines
> >  whether a given TYPE_DECL is actually an alias template that is
> >  still missing its template_info.
> 
> I still think you want to share code with get_underlying_template.  For
> the case where the alias doesn't have DECL_TEMPLATE_INFO yet, you can
> compare to current_template_args ().  Or you could do some initial
> processing that doesn't care about templates in the handler, and then do
> more in cp_parser_alias_declaration after the call to grokfield/start_decl.

I still don't understand how I could make use of get_underlying_template. I.e. 
I don't even understand how get_underlying_template answers any of the 
questions I need answered. I used way too much time trying to make this 
work...
 
> If you still think you need this function, let's call it
> is_renaming_alias_template or renaming_alias_template_p; using both is_
> and _p is redundant.  I don't have a strong preference which.

OK.
 
> >  (is_late_template_attribute): Decls with diagnose_as attribute
> >  are early attributes only if they are alias templates.
> 
> Is there a reason not to apply it early to other templates as well?

Unconditionally returning false for diagnose_as in is_late_template_attribute 
makes renamed class templates print without template parameter list. E.g.

  template  struct [[diagnose_as("foo")]] A;
  using bar [[diagnose_as]] = A;

  template  struct A {
template  struct B {};
using C [[diagnose_as]] = B;
  };

could query for attributes. So IIUC, member types of class templates require 
late attributes.

> >  * error.c (dump_scope): When printing the name of a namespace,
> >  look for the diagnose_as attribute. If found, print the
> >  associated string instead of calling dump_decl.
> 
> Did you decide not to handle this in dump_decl, so we use the
> diagnose_as when referring to the namespace in non-scope contexts as well?

Good question. dump_decl is the more general place for handling the attribute 
and that's where I moved it to.

> > +  if (flag_diagnostics_use_aliases)
> > +{
> > +  tree attr = lookup_attribute ("diagnose_as", DECL_ATTRIBUTES
> > (decl)); +  if (attr && TREE_VALUE (attr))
> > +   {
> > + pp_cxx_ws_string (
> > +   pp, TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr;
> 
> This pattern is used several places outside this function; can we factor
> it into something like
> 
> if (maybe_print_diagnose_as (special))
>/* OK */;

Yes, I added the functions lookup_diagnose_as_attribute and 
dump_diagnose_as_alias to remove code duplication.

> Missing space before (

OK. I think I found and fixed all of them.

> > + if (tmplate)
> > +   TREE_VALUE (*parms) = make_tree_vec (0);
> 
> This could use a comment.

Added.

> >  (dump_aggr_type): If the type has a diagnose_as attribute, print
> >  the associated string instead of printing the original type
> >  name. Print template parms only if the attribute was not applied
> >  to the instantiation / full specialization. Delay call to
> >  dump_scope until the diagnose_as attribute is found. If the
> >  attribute has a second argument, use it to override the context
> >  passed to dump_scope.
> > 
> > + for (int i = 0; i < NUM_TMPL_ARGS (args); ++i)
> > +   {
> > + tree arg = TREE_VEC_ELT (args, i);
> > + while (INDIRECT_TYPE_P (arg))
> > +   arg = TREE_TYPE (arg);
> > + if (WILDCARD_TYPE_P (arg))
> > +   {
> > + tmplate = true;
> > + break;
> > +   }
> > +   }
> 
> I think you want any_dependent_template_args_p (args)

Yes, except that I need `++processing_template_decl` before calling it (and 
decrement after it, of course). Is that acceptable?

> Checking WILDCARD_TYPE_P is generally not what you want; plenty of
> dependent types don't show up specifically as wildcards.  T*, for instance.

Right, that's why I had `while (INDIRECT_TYPE_P (arg)) arg = TREE_TYPE 
(arg);` before the wildcard test. But I 

[PATCH] PR tree-optimization/103216: optimize some A ? (b op CST) : b into b op (A?CST:CST2)

2021-11-14 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

For this PR, we have:
  if (d_5 < 0)
goto ; [INV]
  else
goto ; [INV]

   :
  v_7 = c_4 | -128;

   :
  # v_1 = PHI 

Which PHI-OPT will try to simplify
"(d_5 < 0) ? (c_4 | -128) : c_4" which is not handled currently.
This adds a few patterns which allows to try to see if (a ? CST : CST1)
where CST1 is either 0, 1 or -1 depending on the operator.
Note to optimize this case always, we should check to make sure that
the a?CST:CST1 gets simplified to not include the conditional expression.
The ! flag does not work as we want to have more simplifcations than just
when we simplify it to a leaf node (SSA_NAME or CONSTANT). This adds a new
flag ^ to genmatch which says the simplification should happen but not down
to the same kind of node.
We could allow this for !GIMPLE and use fold_* rather than fold_buildN but I
didn't see any use of it for now.

Also all of these patterns need to be done late as other optimizations can be
done without them.

OK? Bootstrapped and tested on x86_64 with no regressions.

gcc/ChangeLog:

* doc/match-and-simplify.texi: Document ^ flag.
* genmatch.c (expr::expr): Add Setting of force_simplify.
(expr): Add force_simplify field.
(expr::gen_transform): Add support for force_simplify field.
(parser::parse_expr): Add parsing of ^ flag for the expr.
* match.pd: New patterns to optimize "a ? (b op CST) : b".
---
 gcc/doc/match-and-simplify.texi | 16 +
 gcc/genmatch.c  | 35 ++--
 gcc/match.pd| 41 +
 3 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
index e7e5a4f7299..4e3407c0263 100644
--- a/gcc/doc/match-and-simplify.texi
+++ b/gcc/doc/match-and-simplify.texi
@@ -377,6 +377,22 @@ of the @code{vec_cond} expression but only if the actual 
plus
 operations both simplify.  Note this is currently only supported
 for code generation targeting @code{GIMPLE}.
 
+Another modifier for generated expressions is @code{^} which
+tells the machinery to only consider the simplification in case
+the marked expression simplified away from the original code.
+Consider for example
+
+@smallexample
+(simplify
+ (cond @@0 (plus:s @@1 INTEGER_CST@@2) @@1)
+ (plus @@1 (cond^ @@0 @@2 @{ build_zero_cst (type); @})))
+@end smallexample
+
+which moves the inner @code{plus} operation to the outside of the
+@code{cond} expression but only if the actual cond operation simplify
+wayaway from cond.  Note this is currently only supported for code
+generation targeting @code{GIMPLE}.
+
 As intermediate conversions are often optional there is a way to
 avoid the need to repeat patterns both with and without such
 conversions.  Namely you can mark a conversion as being optional
diff --git a/gcc/genmatch.c b/gcc/genmatch.c
index 95248455ec5..2dca1141df6 100644
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -698,12 +698,13 @@ public:
 : operand (OP_EXPR, loc), operation (operation_),
   ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
   is_generic (false), force_single_use (false), force_leaf (false),
-  opt_grp (0) {}
+  force_simplify(false), opt_grp (0) {}
   expr (expr *e)
 : operand (OP_EXPR, e->location), operation (e->operation),
   ops (vNULL), expr_type (e->expr_type), is_commutative 
(e->is_commutative),
   is_generic (e->is_generic), force_single_use (e->force_single_use),
-  force_leaf (e->force_leaf), opt_grp (e->opt_grp) {}
+  force_leaf (e->force_leaf), force_simplify(e->force_simplify),
+  opt_grp (e->opt_grp) {}
   void append_op (operand *op) { ops.safe_push (op); }
   /* The operator and its operands.  */
   id_base *operation;
@@ -721,6 +722,9 @@ public:
   /* Whether in the result expression this should be a leaf node
  with any children simplified down to simple operands.  */
   bool force_leaf;
+  /* Whether in the result expression this should be a node
+ with any children simplified down not to use the original operator.  */
+  bool force_simplify;
   /* If non-zero, the group for optional handling.  */
   unsigned char opt_grp;
   virtual void gen_transform (FILE *f, int, const char *, bool, int,
@@ -2527,6 +2531,17 @@ expr::gen_transform (FILE *f, int indent, const char 
*dest, bool gimple,
fprintf (f, ", _o%d[%u]", depth, i);
   fprintf (f, ");\n");
   fprintf_indent (f, indent, "tem_op.resimplify (lseq, valueize);\n");
+  if (force_simplify)
+   {
+ fprintf_indent (f, indent, "if (tem_op.code.is_tree_code ())\n");
+ fprintf_indent (f, indent, "  {\n");
+ indent+=4;
+ fprintf_indent (f, indent, "if (((tree_code)tem_op.code) == %s)\n",
+ opr_name);
+ fprintf_indent (f, indent, "  goto %s;\n", fail_label);
+ indent-=4;
+ fprintf_indent (f, indent, "  }\n");
+   }
   

[PATCH] [Commmitted] Move some testcases to torture from tree-ssa

2021-11-14 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

While writing up some testcases, I noticed some newer testcases
just had "dg-do compile/run" on them with dg-options of either -O1
or -O2. Since it is always better to run them over all optimization
levels I put them in gcc.c-torture/compile or gcc.c-torture/execute.

Committed after testing to make sure the testcases pass.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr100278.c: Move to ...
* gcc.c-torture/compile/pr100278.c: Here.
Remove dg-do and dg-options.
* gcc.dg/tree-ssa/pr101189.c: Move to ...
* gcc.c-torture/compile/pr101189.c: Here.
Remove dg-do and dg-options.
* gcc.dg/tree-ssa/pr100453.c: Move to ...
* gcc.c-torture/execute/pr100453.c: Here.
Remove dg-do and dg-options.
* gcc.dg/tree-ssa/pr101335.c: Move to ...
* gcc.c-torture/execute/pr101335.c: Here
Remove dg-do and dg-options.
---
 gcc/testsuite/gcc.c-torture/compile/pr100278.c | 15 +++
 gcc/testsuite/gcc.c-torture/compile/pr101189.c | 15 +++
 gcc/testsuite/gcc.c-torture/execute/pr100453.c | 16 
 gcc/testsuite/gcc.c-torture/execute/pr101335.c | 15 +++
 gcc/testsuite/gcc.dg/tree-ssa/pr100278.c   | 17 -
 gcc/testsuite/gcc.dg/tree-ssa/pr100453.c   | 18 --
 gcc/testsuite/gcc.dg/tree-ssa/pr101189.c   | 17 -
 gcc/testsuite/gcc.dg/tree-ssa/pr101335.c   | 17 -
 8 files changed, 61 insertions(+), 69 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr100278.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr101189.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100453.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr101335.c
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr100278.c
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr100453.c
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr101189.c
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr101335.c

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr100278.c 
b/gcc/testsuite/gcc.c-torture/compile/pr100278.c
new file mode 100644
index 000..4631080
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr100278.c
@@ -0,0 +1,15 @@
+
+void a()
+{
+#if defined __s390__
+  register int b asm("r5");
+#elif defined __x86_64__
+  register int b asm("eax");
+#else
+  volatile int b;
+#endif
+  if (b)
+b = 1;
+  for (; b;)
+;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr101189.c 
b/gcc/testsuite/gcc.c-torture/compile/pr101189.c
new file mode 100644
index 000..ad83d32
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr101189.c
@@ -0,0 +1,15 @@
+/* PR tree-optimization/101189  */
+
+static int a, b;
+int main() {
+  int d = 0, e, f = 5;
+  if (a)
+f = 0;
+  for (; f < 4; f++)
+;
+  e = f ^ -f;
+  e && d;
+  if (!e)
+e || b;
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100453.c 
b/gcc/testsuite/gcc.c-torture/execute/pr100453.c
new file mode 100644
index 000..853a892
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr100453.c
@@ -0,0 +1,16 @@
+
+struct a {
+  int b : 4;
+} d;
+static int c, e;
+static const struct a f;
+static void g(const struct a h) {
+  for (; c < 1; c++)
+d = h;
+  e = h.b;
+  c = h.b;
+}
+int main() {
+  g(f);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr101335.c 
b/gcc/testsuite/gcc.c-torture/execute/pr101335.c
new file mode 100644
index 000..45a399f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr101335.c
@@ -0,0 +1,15 @@
+
+unsigned a = 0x;
+int b;
+int main()
+{
+  int c = ~a;
+  unsigned d = c - 10;
+  if (d > c)
+c = 20;
+  b = -(c | 0);
+  if (b > -8)
+__builtin_abort ();
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100278.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr100278.c
deleted file mode 100644
index 8d70228..000
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr100278.c
+++ /dev/null
@@ -1,17 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2" } */
-
-void a()
-{
-#if defined __s390__
-  register int b asm("r5");
-#elif defined __x86_64__
-  register int b asm("eax");
-#else
-  volatile int b;
-#endif
-  if (b)
-b = 1;
-  for (; b;)
-;
-}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100453.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr100453.c
deleted file mode 100644
index 0cf0ad2..000
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr100453.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/* { dg-do run } */
-/* { dg-options "-O1" } */
-
-struct a {
-  int b : 4;
-} d;
-static int c, e;
-static const struct a f;
-static void g(const struct a h) {
-  for (; c < 1; c++)
-d = h;
-  e = h.b;
-  c = h.b;
-}
-int main() {
-  g(f);
-  return 0;
-}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr101189.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr101189.c
deleted file mode 100644
index 5730708..000
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr101189.c
+++ /dev/null
@@ 

Re: [PATCH] Enhance unordered container merge

2021-11-14 Thread Jonathan Wakely via Gcc-patches
On Sun, 14 Nov 2021 at 13:31, François Dumont via Libstdc++
 wrote:
>
>  libstdc++: Unordered containers merge re-use hash code.
>
>  When merging between 2 unordered containers with same hasher we can
> re-use
>  the cached hash code if any.

Instead of introducing the _ReuseOrComputeHash type, wouldn't it be
simpler to just overload _M_hash_code?


// Same hash function, use the cached hash code.
__hash_code
_M_hash_code(const _Hash&,
const _Hash_node_value<_Value, true>& __n) const
{ return __n._M_hash_code; }

  // Compute hash code using a different hash function, _H2
  template
   __hash_code
   _M_hash_code(const _H2&,
   const _Hash_node_value<_Value, __cache_hash_code>& __n) const
   { return this->_M_hash_code(_ExtractKey{}(__n._M_v()); }

The first overload is more specialized, so will be chosen when the
first argument is the same type as _Hash and the cache_has_code
boolean is true.


Use modref kills in tree-ssa-dse

2021-11-14 Thread Jan Hubicka via Gcc-patches
Hi,
this patch extends tree-ssa-dse to use modref kill summary to clear
live_bytes.  This makes it possible to remove calls that are killed
in parts.

I noticed that DSE duplicates the logic of tree-ssa-alias that is mathing bases
of memory accesses.  Here operands_equal_p (base1, base, OEP_ADDRESS_OF) is 
used.
So it won't work with mismatching memref offsets.   We probably want to 
commonize
this and add common function that matches bases and returns offset adjustments.
I wonder however if it can catch any cases that the tree-ssa-alias code doesn't?

Other check that stmt_kills_ref_p has and tree-ssa-dse is for 
non-call-exceptions.

Bootstrapped/regtested x86_64-linux, OK?

gcc/ChangeLog:

* ipa-modref.c (get_modref_function_summary): New function.
* ipa-modref.h (get_modref_function_summary): Declare.
* tree-ssa-dse.c (clear_live_bytes_for_ref): Break out from ...
(clear_bytes_written_by): ... here; add handling of modref summary.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/modref-dse-4.c: New test.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index df4612bbff9..8966f9fd2a4 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -724,6 +724,22 @@ get_modref_function_summary (cgraph_node *func)
   return r;
 }
 
+/* Get function summary for CALL if it exists, return NULL otherwise.
+   If INTERPOSED is non-NULL set it to true if call may be interposed.  */
+
+modref_summary *
+get_modref_function_summary (gcall *call, bool *interposed)
+{
+  tree callee = gimple_call_fndecl (call);
+  if (!callee)
+return NULL;
+  struct cgraph_node *node = cgraph_node::get (callee);
+  if (!node)
+return NULL;
+  if (interposed)
+*interposed = !node->binds_to_current_def_p ();
+  return get_modref_function_summary (node);
+}
+
 namespace {
 
 /* Construct modref_access_node from REF.  */
diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h
index 9e8a30fd80a..72e608864ce 100644
--- a/gcc/ipa-modref.h
+++ b/gcc/ipa-modref.h
@@ -50,6 +50,7 @@ struct GTY(()) modref_summary
 };
 
 modref_summary *get_modref_function_summary (cgraph_node *func);
+modref_summary *get_modref_function_summary (gcall *call, bool *interposed);
 void ipa_modref_c_finalize ();
 void ipa_merge_modref_summary_after_inlining (cgraph_edge *e);
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c
new file mode 100644
index 000..81aa7dc587c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse2-details"  } */
+struct a {int a,b,c;};
+__attribute__ ((noinline))
+void
+kill_me (struct a *a)
+{
+  a->a=0;
+  a->b=0;
+  a->c=0;
+}
+__attribute__ ((noinline))
+void
+my_pleasure (struct a *a)
+{
+  a->a=1;
+  a->c=2;
+}
+void
+set (struct a *a)
+{
+  kill_me (a);
+  my_pleasure (a);
+  a->b=1;
+}
+/* { dg-final { scan-tree-dump "Deleted dead store: kill_me" "dse2" } } */
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index ce0083a6dab..d2f54b0faad 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -209,6 +209,24 @@ normalize_ref (ao_ref *copy, ao_ref *ref)
   return true;
 }
 
+/* Update LIVE_BYTES tracking REF for write to WRITE:
+   Verify we have the same base memory address, the write
+   has a known size and overlaps with REF.  */
+static void
+clear_live_bytes_for_ref (sbitmap live_bytes, ao_ref *ref, ao_ref *write)
+{
+  HOST_WIDE_INT start, size;
+
+  if (valid_ao_ref_for_dse (write)
+  && operand_equal_p (write->base, ref->base, OEP_ADDRESS_OF)
+  && known_eq (write->size, write->max_size)
+  && normalize_ref (write, ref)
+  && (write->offset - ref->offset).is_constant ()
+  && write->size.is_constant ())
+bitmap_clear_range (live_bytes, start / BITS_PER_UNIT,
+   size / BITS_PER_UNIT);
+}
+
 /* Clear any bytes written by STMT from the bitmap LIVE_BYTES.  The base
address written by STMT must match the one found in REF, which must
have its base address previously initialized.
@@ -220,20 +238,21 @@ static void
 clear_bytes_written_by (sbitmap live_bytes, gimple *stmt, ao_ref *ref)
 {
   ao_ref write;
+
+  if (gcall *call = dyn_cast  (stmt))
+{
+  bool interposed;
+  modref_summary *summary = get_modref_function_summary (call, 
);
+
+  if (summary && !interposed)
+   for (auto kill : summary->kills)
+ if (kill.get_ao_ref (as_a  (stmt), ))
+   clear_live_bytes_for_ref (live_bytes, ref, );
+}
   if (!initialize_ao_ref_for_dse (stmt, ))
 return;
 
-  /* Verify we have the same base memory address, the write
- has a known size and overlaps with REF.  */
-  HOST_WIDE_INT start, size;
-  if (valid_ao_ref_for_dse ()
-  && operand_equal_p (write.base, ref->base, OEP_ADDRESS_OF)
-  && known_eq (write.size, write.max_size)
-  && normalize_ref (, ref)
-  && (write.offset - ref->offset).is_constant ()
-  && 

[PATCH] libstdc++: Implement P1328 "Making std::type_info::operator== constexpr"

2021-11-14 Thread Jonathan Wakely via Gcc-patches
Not committed yet ...


This feature is present in the C++23 draft. The ARM EABI requires that
the type_info::operator== function can be defined out-of-line (and
suggests that should be the default). With this patch, we fail to
conform to that in C++23 mode. I think we might want to move the logic
from operator== into a separate std::type_info::__equal function, which
can be non-inline even if the actual type_info::operator== function is
constexpr (and so implicitly inline). That isn't done by this patch, but
probably should be.

libstdc++-v3/ChangeLog:

* include/bits/c++config (_GLIBCXX23_CONSTEXPR): Define.
* include/std/version (__cpp_lib_constexpr_typeinfo): Define.
* libsupc++/tinfo.cc: Add #error to ensure non-inline definition
is emitted.
* libsupc++/typeinfo (type_info::name()): Avoid branching.
(type_info::before): Combine different implementations into one.
(type_info::operator==): Likewise.
---
 libstdc++-v3/include/bits/c++config | 10 +++-
 libstdc++-v3/include/std/version|  1 +
 libstdc++-v3/libsupc++/tinfo.cc |  4 ++
 libstdc++-v3/libsupc++/typeinfo | 90 ++---
 4 files changed, 70 insertions(+), 35 deletions(-)

diff --git a/libstdc++-v3/include/bits/c++config 
b/libstdc++-v3/include/bits/c++config
index 4b7fa659300..4ae635c8a56 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -175,13 +175,21 @@
 #endif
 
 #ifndef _GLIBCXX20_CONSTEXPR
-# if __cplusplus > 201703L
+# if __cplusplus >= 202002L
 #  define _GLIBCXX20_CONSTEXPR constexpr
 # else
 #  define _GLIBCXX20_CONSTEXPR
 # endif
 #endif
 
+#ifndef _GLIBCXX23_CONSTEXPR
+# if __cplusplus >= 202100L
+#  define _GLIBCXX23_CONSTEXPR constexpr
+# else
+#  define _GLIBCXX23_CONSTEXPR
+# endif
+#endif
+
 #ifndef _GLIBCXX17_INLINE
 # if __cplusplus >= 201703L
 #  define _GLIBCXX17_INLINE inline
diff --git a/libstdc++-v3/include/std/version b/libstdc++-v3/include/std/version
index 0930de82efa..e8d696fa72f 100644
--- a/libstdc++-v3/include/std/version
+++ b/libstdc++-v3/include/std/version
@@ -290,6 +290,7 @@
 #if __cplusplus > 202002L
 // c++2b
 #define __cpp_lib_adaptor_iterator_pair_constructor 202106L
+#define __cpp_lib_constexpr_typeinfo 202106L
 #define __cpp_lib_invoke_r 202106L
 #define __cpp_lib_is_scoped_enum 202011L
 #if __cpp_lib_concepts
diff --git a/libstdc++-v3/libsupc++/tinfo.cc b/libstdc++-v3/libsupc++/tinfo.cc
index a620e23e91c..5356fbbedd3 100644
--- a/libstdc++-v3/libsupc++/tinfo.cc
+++ b/libstdc++-v3/libsupc++/tinfo.cc
@@ -32,6 +32,10 @@ std::type_info::
 
 #if !__GXX_TYPEINFO_EQUALITY_INLINE
 
+#if __cplusplus > 202002L
+# error "this file must be compiled with C++20 or older to define operator=="
+#endif
+
 // We can't rely on common symbols being shared between shared objects.
 bool std::type_info::
 operator== (const std::type_info& arg) const _GLIBCXX_NOEXCEPT
diff --git a/libstdc++-v3/libsupc++/typeinfo b/libstdc++-v3/libsupc++/typeinfo
index 975321f2017..06769e0c243 100644
--- a/libstdc++-v3/libsupc++/typeinfo
+++ b/libstdc++-v3/libsupc++/typeinfo
@@ -38,6 +38,10 @@
 
 #pragma GCC visibility push(default)
 
+#if __cplusplus >= 202101L
+# define __cpp_lib_constexpr_typeinfo 202106L
+#endif
+
 extern "C++" {
 
 namespace __cxxabiv1
@@ -97,42 +101,14 @@ namespace std
 /** Returns an @e implementation-defined byte string; this is not
  *  portable between compilers!  */
 const char* name() const _GLIBCXX_NOEXCEPT
-{ return __name[0] == '*' ? __name + 1 : __name; }
+{ return __name + int(__name[0] == '*'); }
 
-#if !__GXX_TYPEINFO_EQUALITY_INLINE
-// In old abi, or when weak symbols are not supported, there can
-// be multiple instances of a type_info object for one
-// type. Uniqueness must use the _name value, not object address.
-bool before(const type_info& __arg) const _GLIBCXX_NOEXCEPT;
-bool operator==(const type_info& __arg) const _GLIBCXX_NOEXCEPT;
-#else
-  #if !__GXX_MERGED_TYPEINFO_NAMES
-/** Returns true if @c *this precedes @c __arg in the implementation's
+/** Returns true if `*this` precedes `__arg` in the implementation's
  *  collation order.  */
-// Even with the new abi, on systems that support dlopen
-// we can run into cases where type_info names aren't merged,
-// so we still need to do string comparison.
-bool before(const type_info& __arg) const _GLIBCXX_NOEXCEPT
-{ return (__name[0] == '*' && __arg.__name[0] == '*')
-   ? __name < __arg.__name
-   : __builtin_strcmp (__name, __arg.__name) < 0; }
+bool before(const type_info& __arg) const _GLIBCXX_NOEXCEPT;
 
-bool operator==(const type_info& __arg) const _GLIBCXX_NOEXCEPT
-{
-  return ((__name == __arg.__name)
- || (__name[0] != '*' &&
- __builtin_strcmp (__name, __arg.__name) == 0));
-}
-  #else
-// On some targets we can rely on type_info's NTBS being unique,
-// and therefore 

Track nondeterminism and interposable calls in ipa-modref

2021-11-14 Thread Jan Hubicka via Gcc-patches
Hi,
This patch adds tracking of two new flags in ipa-modref: nondeterministic and
calls_interposable.  First is set when function does something that is not
guaranteed to be the same if run again (volatile memory access, volatile asm or
external function call).  Second is set if function calls something that
does not bind to current def.

nondeterministic enables ipa-modref to discover looping pure/const functions
and it now discovers 138 of them during cc1plus link (which about doubles
number of such functions detected late).  We however can do more

 1) We can extend FRE to eliminate redundant calls.
I filled a PR103168 for that.
A common case are inline functions that are not autodetected as ECF_CONST
just becuase they do not bind to local def and can be easily handled.
More tricky is to use modref summary to check what memory locations are
read.
 2) DSE can eliminate redundant stores

The calls_interposable flag currently also improves tree-ssa-structalias
on functions that are not binds_to_current_def since reads_global_memory
is now not cleared by interposable functions.

Bootstrapped/regtsted x86_64-linux, will commit it shortly.

gcc/ChangeLog:

* ipa-modref.h (struct modref_summary): Add nondeterministic
and calls_interposable flags.
* ipa-modref.c (modref_summary::modref_summary): Initialize new flags.
(modref_summary::useful_p): Check new flags.
(struct modref_summary_lto): Add nondeterministic and
calls_interposable flags.
(modref_summary_lto::modref_summary_lto): Initialize new flags.
(modref_summary_lto::useful_p): Check new flags.
(modref_summary::dump): Dump new flags.
(modref_summary_lto::dump): Dump new flags.
(ignore_nondeterminism_p): New function.
(merge_call_side_effects): Merge new flags.
(process_fnspec): Likewise.
(analyze_load): Volatile access is nondeterministic.
(analyze_store): Liekwise.
(analyze_stmt): Volatile ASM is nondeterministic.
(analyze_function): Clear new flags.
(modref_summaries::duplicate): Duplicate new flags.
(modref_summaries_lto::duplicate): Duplicate new flags.
(modref_write): Stream new flags.
(read_section): Stream new flags.
(propagate_unknown_call): Update new flags.
(modref_propagate_in_scc): Propagate new flags.
* tree-ssa-alias.c (ref_maybe_used_by_call_p_1): Check
calls_interposable.
* tree-ssa-structalias.c (determine_global_memory_access):
Likewise.


diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index b75ed84135b..4d878f45e30 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -276,7 +276,8 @@ static GTY(()) fast_function_summary 
 
 modref_summary::modref_summary ()
   : loads (NULL), stores (NULL), retslot_flags (0), static_chain_flags (0),
-writes_errno (false), side_effects (false), global_memory_read (false),
+writes_errno (false), side_effects (false), nondeterministic (false),
+calls_interposable (false), global_memory_read (false),
 global_memory_written (false), try_dse (false)
 {
 }
@@ -332,11 +333,13 @@ modref_summary::useful_p (int ecf_flags, bool check_flags)
   && remove_useless_eaf_flags (static_chain_flags, ecf_flags, false))
 return true;
   if (ecf_flags & (ECF_CONST | ECF_NOVOPS))
-return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
+return ((!side_effects || !nondeterministic)
+   && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
   if (loads && !loads->every_base)
 return true;
   if (ecf_flags & ECF_PURE)
-return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
+return ((!side_effects || !nondeterministic)
+   && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
   return stores && !stores->every_base;
 }
 
@@ -354,8 +357,10 @@ struct GTY(()) modref_summary_lto
   auto_vec GTY((skip)) arg_flags;
   eaf_flags_t retslot_flags;
   eaf_flags_t static_chain_flags;
-  bool writes_errno;
-  bool side_effects;
+  unsigned writes_errno : 1;
+  unsigned side_effects : 1;
+  unsigned nondeterministic : 1;
+  unsigned calls_interposable : 1;
 
   modref_summary_lto ();
   ~modref_summary_lto ();
@@ -367,7 +372,8 @@ struct GTY(()) modref_summary_lto
 
 modref_summary_lto::modref_summary_lto ()
   : loads (NULL), stores (NULL), retslot_flags (0), static_chain_flags (0),
-writes_errno (false), side_effects (false)
+writes_errno (false), side_effects (false), nondeterministic (false),
+calls_interposable (false)
 {
 }
 
@@ -397,11 +403,13 @@ modref_summary_lto::useful_p (int ecf_flags, bool 
check_flags)
   && remove_useless_eaf_flags (static_chain_flags, ecf_flags, false))
 return true;
   if (ecf_flags & (ECF_CONST | ECF_NOVOPS))
-return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
+return ((!side_effects || !nondeterministic)
+   && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
   

[PATCH] Fortran: Mark internal symbols as artificial [PR88009,PR68800]

2021-11-14 Thread Bernhard Reutner-Fischer via Gcc-patches
Hi!

Amend fix for PR88009 to mark all these class components as artificial.

gcc/fortran/ChangeLog:

* class.c (gfc_build_class_symbol, generate_finalization_wrapper,
(gfc_find_derived_vtab, find_intrinsic_vtab): Use stringpool for
names. Mark internal symbols as artificial.
* decl.c (gfc_match_decl_type_spec, gfc_match_end): Fix
indentation.
(gfc_match_derived_decl): Fix indentation. Check extension level
before incrementing refs counter.
* parse.c (parse_derived): Fix style.
* resolve.c (resolve_global_procedure): Likewise.
* symbol.c (gfc_check_conflict): Do not ignore artificial symbols.
(gfc_add_flavor): Reorder condition, cheapest first.
(gfc_new_symbol, gfc_get_sym_tree,
generate_isocbinding_symbol): Fix style.
* trans-expr.c (gfc_trans_subcomponent_assign): Remove
restriction on !artificial.
* match.c (gfc_match_equivalence): Special-case CLASS_DATA for
warnings.

---
gfc_match_equivalence(), too, should not bail-out early on the first
error but should diagnose all errors. I.e. not goto cleanup but set
err=true and continue in order to diagnose all constraints of a
statement. Maybe Sandra or somebody else will eventually find time to
tweak that.

I think it also plugs a very minor leak of name in gfc_find_derived_vtab
so i also tagged it [PR68800]. At least that was the initial
motiviation to look at that spot.
We were doing
-  name = xasprintf ("__vtab_%s", tname);
...
  gfc_set_sym_referenced (vtab);
- name = xasprintf ("__vtype_%s", tname);

Bootstrapped and regtested without regressions on x86_64-unknown-linux.
Ok for trunk?
diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c
index 2bf21434a42..94e7dce1675 100644
--- a/gcc/fortran/match.c
+++ b/gcc/fortran/match.c
@@ -5706,11 +5706,22 @@ gfc_match_equivalence (void)
 
  if (!gfc_add_in_equivalence (>attr, sym->name, NULL))
goto cleanup;
- if (sym->ts.type == BT_CLASS
- && CLASS_DATA (sym)
- && !gfc_add_in_equivalence (_DATA (sym)->attr,
- sym->name, NULL))
-   goto cleanup;
+ if (sym->ts.type == BT_CLASS && CLASS_DATA (sym))
+   {
+ bool ret;
+ /* The check above should have seen allocatable and some more.
+But gfc_build_class_symbol clears
+allocatable, pointer, dimension, codimension on the
+base symbol.  Cheat by temporarily pretending our class data
+has the real symbol's attribs.
+  */
+ CLASS_DATA (sym)->attr.artificial = 0;
+ ret = gfc_add_in_equivalence (_DATA (sym)->attr,
+ sym->name, NULL);
+ CLASS_DATA (sym)->attr.artificial = 1;
+ if (!ret)
+   goto cleanup;
+   }
 
  if (sym->attr.in_common)
{
>From 764a41d4afc1a03e1e8a380f4f92242a5bc9bd65 Mon Sep 17 00:00:00 2001
From: Bernhard Reutner-Fischer 
Date: Sun, 7 Nov 2021 11:15:56 +0100
Subject: [PATCH] Fortran: Mark internal symbols as artificial
To: fort...@gcc.gnu.org

Amend fix for PR88009 to mark all these as artificial.

gcc/fortran/ChangeLog:

* class.c (gfc_build_class_symbol, generate_finalization_wrapper,
(gfc_find_derived_vtab, find_intrinsic_vtab): Use stringpool for
names. Mark internal symbols as artificial.
* decl.c (gfc_match_decl_type_spec, gfc_match_end): Fix
indentation.
(gfc_match_derived_decl): Fix indentation. Check extension level
before incrementing refs counter.
* parse.c (parse_derived): Fix style.
* resolve.c (resolve_global_procedure): Likewise.
* symbol.c (gfc_check_conflict): Do not ignore artificial symbols.
(gfc_add_flavor): Reorder condition, cheapest first.
(gfc_new_symbol, gfc_get_sym_tree,
generate_isocbinding_symbol): Fix style.
* trans-expr.c (gfc_trans_subcomponent_assign): Remove
restriction on !artificial.
* match.c (gfc_match_equivalence): Special-case CLASS_DATA for
warnings.

---
gfc_match_equivalence(), too, should not bail-out early on the first
error but should diagnose all errors. I.e. not goto cleanup but set
err=true and continue in order to diagnose all constraints of a
statement.
---
 gcc/fortran/class.c  | 70 +++-
 gcc/fortran/decl.c   | 49 ++--
 gcc/fortran/match.c  | 21 +---
 gcc/fortran/parse.c  |  5 ++-
 gcc/fortran/resolve.c|  2 +-
 gcc/fortran/symbol.c | 20 
 gcc/fortran/trans-expr.c |  2 +-
 7 files changed, 92 insertions(+), 77 deletions(-)

diff --git a/gcc/fortran/class.c b/gcc/fortran/class.c
index 6b017667600..44fccced7b9 100644
--- 

[PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1)

2021-11-14 Thread Philipp Tomsich
From: Philipp Tomsich 

The Ventana VT1 core supports quad-issue and instruction fusion.
This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
together and adds idiom matcheing for the supported fusion cases.

gcc/ChangeLog:

* config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
constants to identify supported fusion patterns.
(struct riscv_tune_param): Add fusible_op field.
(riscv_macro_fusion_p): Implement.
(riscv_fusion_enabled_p): Implement.
(riscv_macro_fusion_pair_p): Implement and recoginze fusible
idioms for Ventana VT1.
(TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
(TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to riscv_macro_fusion_pair_p.

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/riscv.c | 196 +++
 1 file changed, 196 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 6b918db65e9..8eac52101a3 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -211,6 +211,19 @@ struct riscv_integer_op {
The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
 #define RISCV_MAX_INTEGER_OPS 8
 
+enum riscv_fusion_pairs
+{
+  RISCV_FUSE_NOTHING = 0,
+  RISCV_FUSE_ZEXTW = (1 << 0),
+  RISCV_FUSE_ZEXTH = (1 << 1),
+  RISCV_FUSE_ZEXTWS = (1 << 2),
+  RISCV_FUSE_LDINDEXED = (1 << 3),
+  RISCV_FUSE_LUI_ADDI = (1 << 4),
+  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
+  RISCV_FUSE_LUI_LD = (1 << 6),
+  RISCV_FUSE_AUIPC_LD = (1 << 7),
+};
+
 /* Costs of various operations on the different architectures.  */
 
 struct riscv_tune_param
@@ -224,6 +237,7 @@ struct riscv_tune_param
   unsigned short branch_cost;
   unsigned short memory_cost;
   bool slow_unaligned_access;
+  unsigned int fusible_ops;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -289,6 +303,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   3,   /* branch_cost */
   5,   /* memory_cost */
   true,/* 
slow_unaligned_access */
+  RISCV_FUSE_NOTHING,   /* fusible_ops */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -302,6 +317,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   4,   /* branch_cost */
   3,   /* memory_cost */
   true,/* 
slow_unaligned_access */
+  RISCV_FUSE_NOTHING,   /* fusible_ops */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -328,6 +344,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   1,   /* branch_cost */
   2,   /* memory_cost */
   false,   /* slow_unaligned_access */
+  RISCV_FUSE_NOTHING,   /* fusible_ops */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -341,6 +358,10 @@ static const struct riscv_tune_param ventana_vt1_tune_info 
= {
   4,   /* branch_cost */
   5,   /* memory_cost */
   false,   /* slow_unaligned_access */
+  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |   /* fusible_ops */
+RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
+RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
+RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
   return tune_param->issue_rate;
 }
 
+/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
+   instruction fusion of some sort.  */
+
+static bool
+riscv_macro_fusion_p (void)
+{
+  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
+}
+
+/* Return true iff the instruction fusion described by OP is enabled.  */
+
+static bool
+riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
+{
+  return tune_param->fusible_ops & op;
+}
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
+   should be kept together during scheduling.  */
+
+static bool
+riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+  rtx prev_set = single_set (prev);
+  rtx curr_set = single_set (curr);
+  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
+  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+  if (!riscv_macro_fusion_p ())
+return false;
+
+  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
+   riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
+{
+  /* We are trying to match the following:
+  prev (slli) == (set (reg:DI rD)
+  

[PATCH v1 1/2] RISC-V: Add basic support for the Ventana-VT1 core

2021-11-14 Thread Philipp Tomsich
From: Philipp Tomsich 

The Ventana-VT1 core is compatible with rv64gc and Zb[abcs].
This introduces a placeholder -mcpu=ventana-vt1, so tooling and
scripts don't need to change once full support (pipeline, tuning,
etc.) will become public later.

gcc/ChangeLog:

* config/riscv/riscv-cores.def (RISCV_CORE): Add ventana-vt1.
* config/riscv/riscv-opts.h (enum riscv_microarchitecture_type): Add 
ventana_vt1.
* config/riscv/riscv.c: Add tune-info for ventana-vt1.
* config/riscv/riscv.md (tune): Add ventana_vt1.
* doc/invoke.texi: Add ventana-vt1.

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/riscv-cores.def |  2 ++
 gcc/config/riscv/riscv-opts.h|  3 ++-
 gcc/config/riscv/riscv.c | 14 ++
 gcc/config/riscv/riscv.md|  2 +-
 gcc/doc/invoke.texi  |  4 ++--
 5 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index bf5aaba49c3..f6f225d3c5f 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -46,4 +46,6 @@ RISCV_CORE("sifive-s76",  "rv64imafdc", "sifive-7-series")
 RISCV_CORE("sifive-u54",  "rv64imafdc", "sifive-5-series")
 RISCV_CORE("sifive-u74",  "rv64imafdc", "sifive-7-series")
 
+RISCV_CORE("ventana-vt1", "rv64imafdc_zba_zbb_zbc_zbs","ventana-vt1")
+
 #undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 2efc4b80f1f..32d6a9db1bd 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -52,7 +52,8 @@ extern enum riscv_isa_spec_class riscv_isa_spec;
 /* Keep this list in sync with define_attr "tune" in riscv.md.  */
 enum riscv_microarchitecture_type {
   generic,
-  sifive_7
+  sifive_7,
+  ventana_vt1
 };
 extern enum riscv_microarchitecture_type riscv_microarchitecture;
 
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index df66abeb6ce..6b918db65e9 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -330,6 +330,19 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   false,   /* slow_unaligned_access */
 };
 
+/* Costs to use when optimizing for Ventana Micro VT1.  */
+static const struct riscv_tune_param ventana_vt1_tune_info = {
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},  /* fp_add */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},  /* fp_mul */
+  {COSTS_N_INSNS (20), COSTS_N_INSNS (20)},/* fp_div */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (4)},  /* int_mul */
+  {COSTS_N_INSNS (6), COSTS_N_INSNS (6)},  /* int_div */
+  4,   /* issue_rate */
+  4,   /* branch_cost */
+  5,   /* memory_cost */
+  false,   /* slow_unaligned_access */
+};
+
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
 
@@ -366,6 +379,7 @@ static const struct riscv_tune_info riscv_tune_info_table[] 
= {
   { "sifive-5-series", generic, _tune_info },
   { "sifive-7-series", sifive_7, _7_tune_info },
   { "thead-c906", generic, _c906_tune_info },
+  { "ventana-vt1", ventana_vt1, _vt1_tune_info },
   { "size", generic, _size_tune_info },
 };
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b06a26bffb3..be7ccc753a4 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -270,7 +270,7 @@ (define_attr "cannot_copy" "no,yes" (const_string "no"))
 ;; Microarchitectures we know how to tune for.
 ;; Keep this in sync with enum riscv_microarchitecture.
 (define_attr "tune"
-  "generic,sifive_7"
+  "generic,sifive_7,ventana_vt1"
   (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
 
 ;; Describe a user's asm statement.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 99cdeb90c7c..b5934183a88 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -27358,14 +27358,14 @@ by particular CPU name.
 Permissible values for this option are: @samp{sifive-e20}, @samp{sifive-e21},
 @samp{sifive-e24}, @samp{sifive-e31}, @samp{sifive-e34}, @samp{sifive-e76},
 @samp{sifive-s21}, @samp{sifive-s51}, @samp{sifive-s54}, @samp{sifive-s76},
-@samp{sifive-u54}, and @samp{sifive-u74}.
+@samp{sifive-u54}, @samp{sifive-u74}, and @samp{ventana-vt1} .
 
 @item -mtune=@var{processor-string}
 @opindex mtune
 Optimize the output for the given processor, specified by microarchitecture or
 particular CPU name.  Permissible values for this option are: @samp{rocket},
 @samp{sifive-3-series}, @samp{sifive-5-series}, @samp{sifive-7-series},
-@samp{size}, and all valid options for @option{-mcpu=}.
+@samp{ventana-vt1}, @samp{size}, and all valid options for @option{-mcpu=}.
 
 When @option{-mtune=} is not specified, use the setting from @option{-mcpu},
 the 

[PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion

2021-11-14 Thread Philipp Tomsich


This series provides support for the Ventana VT1 (a 4-way superscalar
rv64gc_zba_zbb_zbc_zbs core) including support for the supported
instruction fusion patterns.

This includes the addition of the fusion-aware scheduling
infrastructure for RISC-V and implements idiom recognition for the
fusion patterns supported by VT1.


Philipp Tomsich (2):
  RISC-V: Add basic support for the Ventana-VT1 core
  RISC-V: Add instruction fusion (for ventana-vt1)

 gcc/config/riscv/riscv-cores.def |   2 +
 gcc/config/riscv/riscv-opts.h|   3 +-
 gcc/config/riscv/riscv.c | 210 +++
 gcc/config/riscv/riscv.md|   2 +-
 gcc/doc/invoke.texi  |   4 +-
 5 files changed, 217 insertions(+), 4 deletions(-)

-- 
2.32.0



Re: [PATCH] libgccjit: add some reflection functions in the jit C api

2021-11-14 Thread Antoni Boucher via Gcc-patches
David: PING

Le mardi 12 octobre 2021 à 22:09 -0400, Antoni Boucher a écrit :
> David: PING
> 
> Le lundi 27 septembre 2021 à 20:53 -0400, Antoni Boucher a écrit :
> > I fixed an issue (it would show an error message when
> > gcc_jit_type_dyncast_function_ptr_type was called on a type
> > different
> > than a function pointer type).
> > 
> > Here's the updated patch.
> > 
> > Le vendredi 18 juin 2021 à 16:37 -0400, David Malcolm a écrit :
> > > On Fri, 2021-06-18 at 15:41 -0400, Antoni Boucher wrote:
> > > > I have write access now.
> > > 
> > > Great.
> > > 
> > > > I'm not sure how I'm supposed to send my patches:
> > > > should I put it in personal branches and you'll merge them?
> > > 
> > > Please send them to this mailing list for review; once they're
> > > approved
> > > you can merge them.
> > > 
> > > > 
> > > > And for the MAINTAINERS file, should I just push to master
> > > > right
> > > > away,
> > > > after sending it to the mailing list?
> > > 
> > > I think people just push the MAINTAINERS change and then let the
> > > list
> > > know, since it makes a good test that write access is working
> > > correctly.
> > > 
> > > Dave
> > > 
> > > > 
> > > > Thanks for your help!
> > > > 
> > > > Le vendredi 18 juin 2021 à 12:09 -0400, David Malcolm a écrit :
> > > > > On Fri, 2021-06-18 at 11:55 -0400, Antoni Boucher wrote:
> > > > > > Le vendredi 11 juin 2021 à 14:00 -0400, David Malcolm a
> > > > > > écrit :
> > > > > > > On Fri, 2021-06-11 at 08:15 -0400, Antoni Boucher wrote:
> > > > > > > > Thank you for your answer.
> > > > > > > > I attached the updated patch.
> > > > > > > 
> > > > > > > BTW you (or possibly me) dropped the mailing lists; was
> > > > > > > that
> > > > > > > deliberate?
> > > > > > 
> > > > > > Oh, my bad.
> > > > > > 
> > > > > 
> > > > > [...]
> > > > > 
> > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > > I have signed the FSF copyright attribution.
> > > > > > > 
> > > > > > > I can push changes on your behalf, but I'd prefer it if
> > > > > > > you
> > > > > > > did
> > > > > > > it,
> > > > > > > especially given that you have various other patches you
> > > > > > > want
> > > > > > > to
> > > > > > > get
> > > > > > > in.
> > > > > > > 
> > > > > > > Instructions on how to get push rights to the git repo
> > > > > > > are
> > > > > > > here:
> > > > > > >   https://gcc.gnu.org/gitwrite.html
> > > > > > > 
> > > > > > > I can sponsor you.
> > > > > > 
> > > > > > Thanks.
> > > > > > I did sign up to get push rights.
> > > > > > Have you accepted my request to get those?
> > > > > 
> > > > > I did, but I didn't see any kind of notification.  Did you
> > > > > get
> > > > > an
> > > > > email
> > > > > about it?
> > > > > 
> > > > > 
> > > > > Dave
> > > > > 
> > > > 
> > > > 
> > > 
> > > 
> > 
> 
> 



[committed] VAX: Add the `setmemhi' instruction

2021-11-14 Thread Maciej W. Rozycki
The MOVC5 machine instruction has `memset' semantics if encoded with a 
zero source length[1]:

"4. MOVC5 with a zero source length operand is the preferred way
to fill a block of memory with the fill character."

Use that instruction to implement the `setmemhi' instruction then.  Use 
the AP register in the register deferred mode for the source address to 
yield the shortest possible encoding of the otherwise unused operand, 
observing that the address is never dereferenced if the source length is 
zero.

The use of this instruction yields steadily better performance, at least 
with the Mariah VAX implementation, for a variable-length `memset' call 
expanded inline as a single MOVC5 operation compared to an equivalent 
libcall invocation:

Length:   1, time elapsed:  0.971789 (builtin),  2.847303 (libcall)
Length:   2, time elapsed:  0.907904 (builtin),  2.728259 (libcall)
Length:   3, time elapsed:  1.038311 (builtin),  2.917245 (libcall)
Length:   4, time elapsed:  0.775305 (builtin),  2.686088 (libcall)
Length:   7, time elapsed:  1.112331 (builtin),  2.992968 (libcall)
Length:   8, time elapsed:  0.856882 (builtin),  2.764885 (libcall)
Length:  15, time elapsed:  1.256086 (builtin),  3.096660 (libcall)
Length:  16, time elapsed:  1.001962 (builtin),  2.888131 (libcall)
Length:  31, time elapsed:  1.590456 (builtin),  3.774164 (libcall)
Length:  32, time elapsed:  1.288909 (builtin),  3.629622 (libcall)
Length:  63, time elapsed:  3.430285 (builtin),  5.269789 (libcall)
Length:  64, time elapsed:  3.265147 (builtin),  5.113156 (libcall)
Length: 127, time elapsed:  6.438772 (builtin),  8.268305 (libcall)
Length: 128, time elapsed:  6.268991 (builtin),  8.114557 (libcall)
Length: 255, time elapsed: 12.417338 (builtin), 14.259678 (libcall)

(times given in seconds per 100 `memset' invocations for the given 
length made in a loop).  It is clear from these figures that hardware 
does data coalescence for consecutive bytes rather than naively copying 
them one by one, as for lengths that are powers of 2 the figures are 
consistently lower than ones for their respective next lower lengths.

The use of MOVC5 also requires at least 4 bytes less in terms of machine 
code as it avoids encoding the address of `memset' needed for the CALLS 
instruction used to make a libcall, as well as extra PUSHL instructions 
needed to pass arguments to the call as those can be encoded directly as 
the respective operands of the MOVC5 instruction.

It is perhaps worth noting too that for constant lengths we prefer to 
emit up to 5 individual MOVx instructions rather than a single MOVC5 
instruction to clear memory and for consistency we copy this behavior 
here for filling memory with another value too, even though there may be 
a performance advantage with a string copy in comparison to a piecemeal 
copy, e.g.:

Length:  40, time elapsed:  2.183192 (string),   2.638878 (piecemeal)

But this is something for another change as it will have to be carefully 
evaluated.

[1] DEC STD 032-0 "VAX Architecture Standard", Digital Equipment
Corporation, A-DS-EL-00032-00-0 Rev J, December 15, 1989, Section
3.10 "Character-String Instructions", p. 3-163

gcc/
* config/vax/vax.h (SET_RATIO): New macro.
* config/vax/vax.md (UNSPEC_SETMEM_FILL): New constant.
(setmemhi): New expander.
(setmemhi1): New insn and splitter.
(*setmemhi1): New insn.

gcc/testsuite/
* gcc.target/vax/setmem.c: New test.
---
Regression-tested with no change in results.  Committed.
---
 gcc/config/vax/vax.h  |1 
 gcc/config/vax/vax.md |   64 ++
 gcc/testsuite/gcc.target/vax/setmem.c |   22 +++
 3 files changed, 87 insertions(+)

gcc-vax-setmem.diff
Index: gcc/gcc/config/vax/vax.h
===
--- gcc.orig/gcc/config/vax/vax.h
+++ gcc/gcc/config/vax/vax.h
@@ -433,6 +433,7 @@ enum reg_class { NO_REGS, ALL_REGS, LIM_
move-instruction pairs, we will do a cpymem or libcall instead.  */
 #define MOVE_RATIO(speed) ((speed) ? 6 : 3)
 #define CLEAR_RATIO(speed) ((speed) ? 6 : 2)
+#define SET_RATIO(speed) ((speed) ? 6 : 2)
 
 /* Nonzero if access to memory by bytes is slow and undesirable.  */
 #define SLOW_BYTE_ACCESS 0
Index: gcc/gcc/config/vax/vax.md
===
--- gcc.orig/gcc/config/vax/vax.md
+++ gcc/gcc/config/vax/vax.md
@@ -32,6 +32,12 @@
   VUNSPEC_PEM  ; 'procedure_entry_mask' insn.
 ])
 
+;; UNSPEC usage:
+
+(define_c_enum "unspec" [
+  UNSPEC_SETMEM_FILL   ; 'fill' operand to 'setmem' insn.
+])
+
 (define_constants
   [(VAX_AP_REGNUM 12)  ; Register 12 contains the argument pointer
(VAX_FP_REGNUM 13)  ; Register 13 contains the frame pointer
@@ -438,6 +444,64 @@
(clobber (reg:CC VAX_PSL_REGNUM))]
   "reload_completed"
   "movc3 %2,%1,%0")
+
+;; This is here to accept 4 

Re: [PATCH,FORTRAN] Fix memory leak in finalization wrappers

2021-11-14 Thread Bernhard Reutner-Fischer via Gcc-patches
On Sun, 7 Nov 2021 13:32:34 +0100
Mikael Morin  wrote:

> > btw.. Just because it's vagely related.
> > I think f8add009ce300f24b75e9c2e2cc5dd944a020c28 for
> > PR fortran/88009 (ICE in find_intrinsic_vtab, at fortran/class.c:2761)
> > is incomplete in that i think all the internal class helpers should be
> > flagged artificial. All these symbols built in gfc_build_class_symbol,
> > generate_finalization_wrapper, gfc_find_derived_vtab etc.
> > Looking at the history it seems the artificial bit often was forgotten.  
> 
> I guess so, yes...
> 
> > And most importantly i think it is not correct to ignore artificial in
> > gfc_check_conflict!
> >   
> Well, it’s not correct to throw errors at users for things they haven’t 
> written and that they don’t control.

oops, i forgot to add the hunk to the patch to drain complaints to
the user 1).

Of course we don't want the error to be user-visible, but i think we do
want to check_conflicts (e.g. gfortran.dg/pr95587.f90 regresses via an
unspecific Unclassifiable statement; I assume we should copy all or at
least some sym attribs to the corresponding CLASS_DATA attribs which i
think makes sense for consistency anyway).

1)
diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c
index 1a1e4551355..9df23f314df 100644
--- a/gcc/fortran/symbol.c
+++ b/gcc/fortran/symbol.c
@@ -898,6 +898,10 @@ gfc_check_conflict (symbol_attribute *attr, const char 
*name, locus *where)
   return true;
 
 conflict:
+  /* It would be wrong to complain about artificial code.  */
+  if (attr->artificial)
+return false;
+
   if (name == NULL)
 gfc_error ("%s attribute conflicts with %s attribute at %L",
   a1, a2, where);


Re: Basic kill analysis for modref

2021-11-14 Thread Jan Hubicka via Gcc-patches
> > 
> > I think you want get_addr_base_and_unit_offset here.  All
> > variable indexed addresses are in separate stmts.  That also means
> > you can eventually work with just byte sizes/offsets?
> 
> Will do.  The access range in modref summary is bit based (since we want
> to disabiguate bitfields like we do in rest of alias oracle) but indeed
> this part cna be in bytes.

Actually after the unifiation I can just use get_ao_ref which will call
ao_ref_init_from_ptr_and_range that has all the logic I need in there.
I also noticed that I ended up duplicating the code matching bases
and ranges which is done already twice in the function - once for store
targets and once for MEMSET and friends.  The later copy lacked overflow
checks so I took the first copy and moved it to helper function.  This
makes the gimple part of patch really straighforward: just build ao_ref
if possible and then pass it to this function.

I also added statistics.

I have bootstrapped/regtsed on x86_64-linux the updated patch and
comitted it so I can break out the patches that depends on it.
I have patch improving the kill tracking at modref side and also the
kill oracle itself can use fnspec and does not need to special case
mem* functions.

For cc1plus LTO link I now get:

Alias oracle query stats:
  refs_may_alias_p: 76106130 disambiguations, 100928932 queries
  on_includes: 12539931 disambiguations, 39864841 queries
  ref_maybe_used_by_call_p: 625857 disambiguations, 77138089 queries
  call_may_clobber_ref_p: 366420 disambiguations, 369293 queries
  stmt_kills_ref_p: 107503 kills, 5699589 queries
  nonoverlapping_component_refs_p: 0 disambiguations, 26176 queries
  nonoverlapping_refs_since_match_p: 30339 disambiguations, 65400 must 
overlaps, 96698 queries
  aliasing_component_refs_p: 57500 disambiguations, 15464678 queries
  TBAA oracle: 28248334 disambiguations 104710521 queries
   15220245 are in alias set 0
   8905994 queries asked about the same object
   98 queries asked about the same alias set
   0 access volatile
   50371110 are dependent in the DAG
   1964740 are aritificially in conflict with void *

Modref stats:  
  modref kill: 52 kills, 6655 queries
  modref use: 25204 disambiguations, 692151 queries
  modref clobber: 2309709 disambiguations, 21877806 queries
  5320532 tbaa queries (0.243193 per modref query)
  761785 base compares (0.034820 per modref query)

PTA query stats:
  pt_solution_includes: 12539931 disambiguations, 39864841 queries
  pt_solutions_intersect: 1713075 disambiguations, 14023484 queries

Newly we get statis of kill oracle itself:
  stmt_kills_ref_p: 107503 kills, 5699589 queries
and the modref part:
  modref kill: 52 kills, 6655 queries
So an improvemnet over 1 kill using modref I had before. Still not
really great.

Honza

gcc/ChangeLog:

* ipa-modref-tree.c (modref_access_node::update_for_kills): New
member function.
(modref_access_node::merge_for_kills): Likewise.
(modref_access_node::insert_kill): Likewise.
* ipa-modref-tree.h (modref_access_node::update_for_kills,
modref_access_node::merge_for_kills, 
modref_access_node::insert_kill):
Declare.
(modref_access_node::useful_for_kill): New member function.
* ipa-modref.c (modref_summary::useful_p): Release useless kills.
(lto_modref_summary): Add kills.
(modref_summary::dump): Dump kills.
(record_access): Add mdoref_access_node parameter.
(record_access_lto): Likewise.
(merge_call_side_effects): Merge kills.
(analyze_call): Add ALWAYS_EXECUTED param and pass it around.
(struct summary_ptrs): Add always_executed filed.
(analyze_load): Update.
(analyze_store): Update; record kills.
(analyze_stmt): Add always_executed; record kills in clobbers.
(analyze_function): Track always_executed.
(modref_summaries::duplicate): Duplicate kills.
(update_signature): Release kills.
* ipa-modref.h (struct modref_summary): Add kills.
* tree-ssa-alias.c (alias_stats): Add kill stats.
(dump_alias_stats): Dump kill stats.
(store_kills_ref_p): Break out from ...
(stmt_kills_ref_p): Use it; handle modref info based kills.

gcc/testsuite/ChangeLog:

2021-11-14  Jan Hubicka  

* gcc.dg/tree-ssa/modref-dse-3.c: New test.


diff --git a/gcc/ipa-modref-tree.c b/gcc/ipa-modref-tree.c
index 6fc2b7298f4..bbe23a5a211 100644
--- a/gcc/ipa-modref-tree.c
+++ b/gcc/ipa-modref-tree.c
@@ -638,6 +638,185 @@ modref_access_node::get_ao_ref (const gcall *stmt, ao_ref 
*ref) const
   return true;
 }
 
+/* Return true A is a subkill.  */
+bool
+modref_access_node::contains_for_kills (const modref_access_node ) const
+{
+  poly_int64 aoffset_adj = 0;
+
+  

Re: [PATCH][_GLIBCXX_DEBUG] Code cleanup/simplification

2021-11-14 Thread Jonathan Wakely via Gcc-patches
On Sat, 13 Nov 2021 at 13:19, François Dumont via Libstdc++
 wrote:
>
>  libstdc++: [_GLIBCXX_DEBUG] Remove _Safe_container<>::_M_safe()
>
>  Container code cleanup to get rid of _Safe_container<>::_M_safe()
> and just
>  _Safe:: calls which use normal inheritance. Also remove several
> usages of _M_base()
>  which can be most of the time ommitted and sometimes replace with
> explicit _Base::
>  calls.
>
>  libstdc++-v3/ChangeLog:
>
>  * include/debug/safe_container.h
> (_Safe_container<>::_M_safe): Remove.
>  * include/debug/deque
> (deque::operator=(initializer_list<>)): Replace
>  _M_base() call with _Base:: call.
>  (deque::operator[](size_type)): Likewise.
>  * include/debug/forward_list (forward_list(forward_list&&,
> const allocator_type&):
>  Remove _M_safe() and _M_base() calls.
>  (forward_list::operator=(initializer_list<>)): Remove
> _M_base() calls.
>  (forward_list::splice_after, forward_list::merge): Likewise.
>  * include/debug/list (list(list&&, const allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
>  (list::operator=(initializer_list<>)): Remove _M_base() calls.
>  (list::splice, list::merge): Likewise.
>  * include/debug/map.h (map(map&&, const allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
>  (map::operator=(initializer_list<>)): Remove _M_base() calls.
>  * include/debug/multimap.h (multimap(multimap&&, const
> allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
>  (multimap::operator=(initializer_list<>)): Remove _M_base()
> calls.
>  * include/debug/set.h (set(set&&, const allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
>  (set::operator=(initializer_list<>)): Remove _M_base() calls.
>  * include/debug/multiset.h (multiset(multiset&&, const
> allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
>  (multiset::operator=(initializer_list<>)): Remove _M_base()
> calls.
>  * include/debug/string (basic_string(basic_string&&, const
> allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
>  (basic_string::operator=(initializer_list<>)): Remove
> _M_base() call.
>  (basic_string::operator=(const _CharT*),
> basic_string::operator=(_CharT)): Likewise.
>  (basic_string::operator[](size_type),
> basic_string::operator+=(const basic_string&)): Likewise.
>  (basic_string::operator+=(const _Char*),
> basic_string::operator+=(_CharT)): Likewise.
>  * include/debug/unordered_map
> (unordered_map(unordered_map&&, const allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
>  (unordered_map::operator=(initializer_list<>),
> unordered_map::merge): Remove _M_base() calls.
>  (unordered_multimap(unordered_multimap&&, const
> allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
> (unordered_multimap::operator=(initializer_list<>),
> unordered_multimap::merge):
>  Remove _M_base() calls.
>  * include/debug/unordered_set
> (unordered_set(unordered_set&&, const allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
>  (unordered_set::operator=(initializer_list<>),
> unordered_set::merge): Remove _M_base() calls.
>  (unordered_multiset(unordered_multiset&&, const
> allocator_type&)):
>  Remove _M_safe() and _M_base() calls.
> (unordered_multiset::operator=(initializer_list<>),
> unordered_multiset::merge):
>  Remove _M_base() calls.
>  * include/debug/vector (vector(vector&&, const
> allocator_type&):
>  Remove _M_safe() and _M_base() calls.
>  (vector::operator=(initializer_list<>)): Remove _M_base()
> calls.
>  (vector::operator[](size_type)): Likewise.
>
> Tested under Linux x86_64 _GLIBCXX_DEBUG mode -std=gnu++14 and -std=gnu++98.
>
> Ok to commit ?

Looks good, please commit, thanks.


Re: [PATCH] rs6000: Fix a handful of 32-bit built-in function problems in the new support

2021-11-14 Thread Segher Boessenkool
Hi!

On Sun, Nov 14, 2021 at 08:17:41AM -0600, Bill Schmidt wrote:
> On 11/11/21 10:50 AM, Bill Schmidt wrote:
> > On 11/11/21 7:11 AM, Segher Boessenkool wrote:
> >> void f(long x) { __builtin_set_texasr(x); }
> >>
> >> built with -m32 -mpowerpc64 gives (in the expand dump):
> >>
> >> void f (long int x)
> >> {
> >>   long long unsigned int _1;
> >>
> >> ;;   basic block 2, loop depth 0
> >> ;;pred:   ENTRY
> >>   _1 = (long long unsigned int) x_2(D);
> >>   __builtin_set_texasr (_1); [tail call]
> >>   return;
> >> ;;succ:   EXIT
> >>
> >> }
> >>
> >> The builtins have a "long long" argument in the existing code, in this
> >> configuration.  And this is not the same as "long" here.
> > Hm, strange.  I'll have to go back and revisit this.  Something subtle 
> > going on.
> >
> So, we have one of the more bizarre API situations here that I've ever seen.
> 
> We have three 64-bit HTM registers:  TEXASR, TFHAR, and TFIAR.  We also have 
> the
> 32-bit TEXASRU, which is the upper half of TEXASR.  The documnted interfaces 
> for
> reading and modifying these registers are:
> 
>   unsigned long __builtin_get_texasr (void);
>   unsigned long __builtin_get_texasru (void);
>   unsigned long __builtin_get_tfhar (void);
>   unsigned long __builtin_get_tfiar (void);
> 
>   void __builtin_set_texasr (unsigned long);
>   void __builtin_set_texasru (unsigned long);
>   void __builtin_set_tfhar (unsigned long);
>   void __builtin_set_tfiar (unsigned long);
> 
> In reality, these interfaces are defined this way for pure 32-bit and pure 
> 64-bit,
> but for -m32 -mpowerpc64 we have some grotesque hackery that overrides the
> expected interfaces to be:
> 
>   unsigned long long __builtin_get_texasr (void);
>   unsigned long long __builtin_get_texasru (void);
>   unsigned long long __builtin_get_tfhar (void);
>   unsigned long long __builtin_get_tfiar (void);
> 
>   void __builtin_set_texasr (unsigned long long);
>   void __builtin_set_texasru (unsigned long long);
>   void __builtin_set_tfhar (unsigned long long);
>   void __builtin_set_tfiar (unsigned long long);

Yes.  Everything in -m32 -mpowerpc64 should follow the 32-bit ABI.  If
you consider these builtins part of the ABI (are they documented there?)
then this is simply a bug.

> An undocumented conditional API is a really, really bad idea, given that it
> forces users of this interface for general code to #ifdef on the -m32
> -mpowerpc64 condition.  Not to mention treating 32-bit registers the same as
> 64-bit ones, and only modifying half the register on a 32-bit system.  (Is HTM
> even supported on a 32-bit system?)

There are no pure 32 bit CPUs that implement HTM, to my knowledge.  But
of course HTM works fine with SF=0 (that is the reason TEXASRU exists!
Compare to TB and TBU).

> It would have likely been better to have one consistent interface, using
> int for TEXASRU and long long for the others, even though that requires
> dealing with two registers for the 32-bit case; but that's all water under
> the bridge.  We have what we have.

"long" for the others, actually.

TFHAR and TFIAR hold code addresses.  TEXASR gets only the low 32 bits
of the register read, that is why TEXASRU exists :-)

> If I sound irritated, it's because, just for this case, I'll have to add a
> bunch of extra machinery to track up to two prototypes for each builtin
> function, and perform conditional initialization when it applies.  The one
> good thing is that these already have a builtin attribute "htmspr" that I
> can key off of to do the extra processing.

Another option might be to finally fix this.  There still are shipping
CPUs that support HTM ;-)

And essentially no one uses -m32 -mpowerpc64 on Linux or AIX.  On Linux
because ucontext_t and jmp_buf do not deal with the high half of the
registers, and iiuc on AIX the kernel doesn't deal with it in context
switches even.  Darwin does use it, but afaik no one runs Darwin on a
CPU with HTM.

> And somebody ought to fix the misleading documentation...

Yes.

Do you want to fix this mess?  I will take a patch using "long" for
all these registers and builtins (just like we have for essentially all
other SPRs!)


Segher


Re: [COMMITTED] Do not pass NULL to memset in ssa_global_cache.

2021-11-14 Thread Aldy Hernandez via Gcc-patches
Ok, done.

Pushed.

Aldy

On Sun, Nov 14, 2021 at 2:53 PM Martin Liška  wrote:
>
> On 11/14/21 14:15, Aldy Hernandez wrote:
> > The code computing ranges in PHIs in the path solver reuses the
> > temporary ssa_global_cache by calling its clear method.  Calling it on
> > an empty cache causes us to call memset with NULL.
> >
> > [The testcase doesn't fail without the patch.  I suppose it needs some
> > usbsan magic, or to live somewhere else?]
>
> Well, the actual test-case is the compiler itself as the source code.
> Anyway, the UBSAN error happens for thousands of test-cases when run
> during bootstrap-ubsan.mk config file. That said, I would remove
> the added test-case.
>
> Cheers,
> Martin
>
> >
> > Tested on x86-64 Linux.
> >
> > gcc/ChangeLog:
> >
> >   PR tree-optimization/103229
> >   * gimple-range-cache.cc (ssa_global_cache::clear): Do not pass
> >   null value to memset.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.dg/pr103229.c: New test.
> > ---
> >   gcc/gimple-range-cache.cc   |  3 ++-
> >   gcc/testsuite/gcc.dg/pr103229.c | 10 ++
> >   2 files changed, 12 insertions(+), 1 deletion(-)
> >   create mode 100644 gcc/testsuite/gcc.dg/pr103229.c
> >
> > diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
> > index a63e20e7e49..b347edeb474 100644
> > --- a/gcc/gimple-range-cache.cc
> > +++ b/gcc/gimple-range-cache.cc
> > @@ -651,7 +651,8 @@ ssa_global_cache::clear_global_range (tree name)
> >   void
> >   ssa_global_cache::clear ()
> >   {
> > -  memset (m_tab.address(), 0, m_tab.length () * sizeof (irange *));
> > +  if (m_tab.address ())
> > +memset (m_tab.address(), 0, m_tab.length () * sizeof (irange *));
> >   }
> >
> >   // Dump the contents of the global cache to F.
> > diff --git a/gcc/testsuite/gcc.dg/pr103229.c 
> > b/gcc/testsuite/gcc.dg/pr103229.c
> > new file mode 100644
> > index 000..96ef9aff67c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr103229.c
> > @@ -0,0 +1,10 @@
> > +// { dg-do compile }
> > +// { dg-options "-O -w" }
> > +
> > +int main() {
> > +  int i;
> > +  for (; i;)
> > +;
> > +
> > +  return 0;
> > +}
> >
>
From 8a601f9bc45f9faaa91f18d58ba71b141acff701 Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Sun, 14 Nov 2021 16:17:36 +0100
Subject: [PATCH] Remove gcc.dg/pr103229.c

gcc/testsuite/ChangeLog:

	* gcc.dg/pr103229.c: Removed.
---
 gcc/testsuite/gcc.dg/pr103229.c | 10 --
 1 file changed, 10 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.dg/pr103229.c

diff --git a/gcc/testsuite/gcc.dg/pr103229.c b/gcc/testsuite/gcc.dg/pr103229.c
deleted file mode 100644
index 96ef9aff67c..000
--- a/gcc/testsuite/gcc.dg/pr103229.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// { dg-do compile }
-// { dg-options "-O -w" }
-
-int main() {
-  int i;
-  for (; i;)
-;
-
-  return 0;
-}
-- 
2.31.1



Re: [PATCH] rs6000: Fix a handful of 32-bit built-in function problems in the new support

2021-11-14 Thread Bill Schmidt via Gcc-patches


On 11/11/21 10:50 AM, Bill Schmidt wrote:
> On 11/11/21 7:11 AM, Segher Boessenkool wrote:
>> void f(long x) { __builtin_set_texasr(x); }
>>
>> built with -m32 -mpowerpc64 gives (in the expand dump):
>>
>> void f (long int x)
>> {
>>   long long unsigned int _1;
>>
>> ;;   basic block 2, loop depth 0
>> ;;pred:   ENTRY
>>   _1 = (long long unsigned int) x_2(D);
>>   __builtin_set_texasr (_1); [tail call]
>>   return;
>> ;;succ:   EXIT
>>
>> }
>>
>> The builtins have a "long long" argument in the existing code, in this
>> configuration.  And this is not the same as "long" here.
> Hm, strange.  I'll have to go back and revisit this.  Something subtle going 
> on.
>
So, we have one of the more bizarre API situations here that I've ever seen.

We have three 64-bit HTM registers:  TEXASR, TFHAR, and TFIAR.  We also have the
32-bit TEXASRU, which is the upper half of TEXASR.  The documnted interfaces for
reading and modifying these registers are:

  unsigned long __builtin_get_texasr (void);
  unsigned long __builtin_get_texasru (void);
  unsigned long __builtin_get_tfhar (void);
  unsigned long __builtin_get_tfiar (void);

  void __builtin_set_texasr (unsigned long);
  void __builtin_set_texasru (unsigned long);
  void __builtin_set_tfhar (unsigned long);
  void __builtin_set_tfiar (unsigned long);

In reality, these interfaces are defined this way for pure 32-bit and pure 
64-bit,
but for -m32 -mpowerpc64 we have some grotesque hackery that overrides the
expected interfaces to be:

  unsigned long long __builtin_get_texasr (void);
  unsigned long long __builtin_get_texasru (void);
  unsigned long long __builtin_get_tfhar (void);
  unsigned long long __builtin_get_tfiar (void);

  void __builtin_set_texasr (unsigned long long);
  void __builtin_set_texasru (unsigned long long);
  void __builtin_set_tfhar (unsigned long long);
  void __builtin_set_tfiar (unsigned long long);

An undocumented conditional API is a really, really bad idea, given that it
forces users of this interface for general code to #ifdef on the -m32
-mpowerpc64 condition.  Not to mention treating 32-bit registers the same as
64-bit ones, and only modifying half the register on a 32-bit system.  (Is HTM
even supported on a 32-bit system?)

It would have likely been better to have one consistent interface, using
int for TEXASRU and long long for the others, even though that requires
dealing with two registers for the 32-bit case; but that's all water under
the bridge.  We have what we have.

If I sound irritated, it's because, just for this case, I'll have to add a
bunch of extra machinery to track up to two prototypes for each builtin
function, and perform conditional initialization when it applies.  The one
good thing is that these already have a builtin attribute "htmspr" that I
can key off of to do the extra processing.

And somebody ought to fix the misleading documentation...

Thanks,
Bill



Re: [PATCH 0/6] RFC: adding support to GCC for detecting trust boundaries

2021-11-14 Thread Miguel Ojeda via Gcc-patches
On Sat, Nov 13, 2021 at 9:37 PM David Malcolm  wrote:
>
>   #define __user __attribute__((untrusted))
>
> where my patched GCC treats
>   T *
> vs
>   T __attribute__((untrusted)) *
> as being different types and thus the C frontend can complain (even without
> -fanalyzer) about e.g.:

This one sounds similar to the `Untrusted` wrapper I suggested for
the Rust side -- we would have a method to "extract and trust" the
value (instead of a cast).

> Patch 2 in the kit adds:
>   __attribute__((returns_zero_on_success))
> and
>   __attribute__((returns_nonzero_on_success))
> as hints to the analyzer that it's worth bifurcating the analysis of
> such functions (to explore failure vs success, and thus to better
> explore error-handling paths).  It's also a hint to the human reader of
> the source code.

These two sound quite nice to have for most C projects. Would it be
useful to generalize to different values than 0/non-0? e.g.
`returns_on_success(0)` and `returns_on_failure(0)`.

Cheers,
Miguel


Re: [COMMITTED] Do not pass NULL to memset in ssa_global_cache.

2021-11-14 Thread Martin Liška

On 11/14/21 14:15, Aldy Hernandez wrote:

The code computing ranges in PHIs in the path solver reuses the
temporary ssa_global_cache by calling its clear method.  Calling it on
an empty cache causes us to call memset with NULL.

[The testcase doesn't fail without the patch.  I suppose it needs some
usbsan magic, or to live somewhere else?]


Well, the actual test-case is the compiler itself as the source code.
Anyway, the UBSAN error happens for thousands of test-cases when run
during bootstrap-ubsan.mk config file. That said, I would remove
the added test-case.

Cheers,
Martin



Tested on x86-64 Linux.

gcc/ChangeLog:

PR tree-optimization/103229
* gimple-range-cache.cc (ssa_global_cache::clear): Do not pass
null value to memset.

gcc/testsuite/ChangeLog:

* gcc.dg/pr103229.c: New test.
---
  gcc/gimple-range-cache.cc   |  3 ++-
  gcc/testsuite/gcc.dg/pr103229.c | 10 ++
  2 files changed, 12 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/gcc.dg/pr103229.c

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index a63e20e7e49..b347edeb474 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -651,7 +651,8 @@ ssa_global_cache::clear_global_range (tree name)
  void
  ssa_global_cache::clear ()
  {
-  memset (m_tab.address(), 0, m_tab.length () * sizeof (irange *));
+  if (m_tab.address ())
+memset (m_tab.address(), 0, m_tab.length () * sizeof (irange *));
  }
  
  // Dump the contents of the global cache to F.

diff --git a/gcc/testsuite/gcc.dg/pr103229.c b/gcc/testsuite/gcc.dg/pr103229.c
new file mode 100644
index 000..96ef9aff67c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr103229.c
@@ -0,0 +1,10 @@
+// { dg-do compile }
+// { dg-options "-O -w" }
+
+int main() {
+  int i;
+  for (; i;)
+;
+
+  return 0;
+}





[PATCH] Enhance unordered container merge

2021-11-14 Thread François Dumont via Gcc-patches

    libstdc++: Unordered containers merge re-use hash code.

    When merging between 2 unordered containers with same hasher we can 
re-use

    the cached hash code if any.

    Use previous insert iterator as a hint for the next insert in case 
of multi container.



    * include/bits/hashtable_policy.h 
(_Hash_code_base<>::_ReuseOrComputeHash<>): New.
(_Hash_code_base<>::_M_hash_code<_H2>(const _H2&, const 
_Hash_node_value<>&)): New.
    * include/bits/hashtable.h (_Hashtable<>::_M_merge_unique): 
Use latter.

    (_Hashtable<>::_M_merge_multi): Likewise.
    * 
testsuite/23_containers/unordered_multiset/modifiers/merge.cc (test05): 
New test.
    * testsuite/23_containers/unordered_set/modifiers/merge.cc 
(test04): New test.


Tested under Linux x86_64.

Ok to commit ?

François

diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 0e949d73614..6e2d4c10cfe 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -1076,7 +1076,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 	  auto __pos = __i++;
 	  const key_type& __k = _ExtractKey{}(*__pos);
-	  __hash_code __code = this->_M_hash_code(__k);
+	  __hash_code __code
+		= this->_M_hash_code(__src.hash_function(), *__pos._M_cur);
 	  size_type __bkt = _M_bucket_index(__code);
 	  if (_M_find_node(__bkt, __k, __code) == nullptr)
 		{
@@ -1099,14 +1100,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  node_type>, "Node types are compatible");
 	  __glibcxx_assert(get_allocator() == __src.get_allocator());
 
+	  __node_ptr __hint = nullptr;
 	  this->reserve(size() + __src.size());
 	  for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;)
 	{
 	  auto __pos = __i++;
-	  const key_type& __k = _ExtractKey{}(*__pos);
-	  __hash_code __code = this->_M_hash_code(__k);
+	  __hash_code __code
+		= this->_M_hash_code(__src.hash_function(), *__pos._M_cur);
 	  auto __nh = __src.extract(__pos);
-	  _M_insert_multi_node(nullptr, __code, __nh._M_ptr);
+	  __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur;
 	  __nh._M_ptr = nullptr;
 	}
 	}
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index c0295b75963..95a1c45e634 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -1217,6 +1217,26 @@ namespace __detail
   friend struct _Local_iterator_base<_Key, _Value, _ExtractKey,
 	 _Hash, _RangeHash, _Unused, false>;
 
+  template
+	struct _ReuseOrComputeHash
+	{
+	  std::size_t
+	  operator()(const _Hash_node_value<_Value, __with_cache>& __n) const
+	  { return _M_hash_code_base._M_hash_code(_ExtractKey{}(__n._M_v())); }
+
+	  const _Hash_code_base& _M_hash_code_base;
+	};
+
+  template
+	struct _ReuseOrComputeHash<_Hn, _Hn, true>
+	{
+	  _ReuseOrComputeHash(const _Hash_code_base&) { }
+
+	  std::size_t
+	  operator()(const _Hash_node_value<_Value, true>& __n) const
+	  { return __n._M_hash_code; }
+	};
+
 public:
   typedef _Hash	hasher;
 
@@ -1250,6 +1270,12 @@ namespace __detail
 	  return _M_hash()(__k);
 	}
 
+  template
+	__hash_code
+	_M_hash_code(const _H2&,
+		const _Hash_node_value<_Value, __cache_hash_code>& __n) const
+	{ return _ReuseOrComputeHash<_Hash, _H2, __cache_hash_code>{ *this }(__n); }
+
   std::size_t
   _M_bucket_index(__hash_code __c, std::size_t __bkt_count) const
   { return _RangeHash{}(__c, __bkt_count); }
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
index 1ed2ce234a1..07b8a344169 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
@@ -17,6 +17,7 @@
 
 // { dg-do run { target c++17 } }
 
+#include 
 #include 
 #include 
 #include 
@@ -105,6 +106,26 @@ test04()
   VERIFY( c2.empty() );
 }
 
+void
+test05()
+{
+  const std::unordered_multiset c0{ "abcd", "abcd", "efgh", "efgh", "ijkl", "ijkl" };
+  std::unordered_multiset c1 = c0;
+  std::unordered_set c2( c0.begin(), c0.end() );
+
+  c1.merge(c2);
+  VERIFY( c1.size() == (1.5 * c0.size()) );
+  for (auto& i : c1)
+VERIFY( c1.count(i) == (1.5 * c0.count(i)) );
+  VERIFY( c2.empty() );
+
+  c1.clear();
+  c2.insert( c0.begin(), c0.end() );
+  c1.merge(std::move(c2));
+  VERIFY( c1.size() == (0.5 * c0.size()) );
+  VERIFY( c2.empty() );
+}
+
 int
 main()
 {
@@ -112,4 +133,5 @@ main()
   test02();
   test03();
   test04();
+  test05();
 }
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
index c9c8a60fd54..0e184b10c60 100644
--- 

[COMMITTED] Do not pass NULL to memset in ssa_global_cache.

2021-11-14 Thread Aldy Hernandez via Gcc-patches
The code computing ranges in PHIs in the path solver reuses the
temporary ssa_global_cache by calling its clear method.  Calling it on
an empty cache causes us to call memset with NULL.

[The testcase doesn't fail without the patch.  I suppose it needs some
usbsan magic, or to live somewhere else?]

Tested on x86-64 Linux.

gcc/ChangeLog:

PR tree-optimization/103229
* gimple-range-cache.cc (ssa_global_cache::clear): Do not pass
null value to memset.

gcc/testsuite/ChangeLog:

* gcc.dg/pr103229.c: New test.
---
 gcc/gimple-range-cache.cc   |  3 ++-
 gcc/testsuite/gcc.dg/pr103229.c | 10 ++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr103229.c

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index a63e20e7e49..b347edeb474 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -651,7 +651,8 @@ ssa_global_cache::clear_global_range (tree name)
 void
 ssa_global_cache::clear ()
 {
-  memset (m_tab.address(), 0, m_tab.length () * sizeof (irange *));
+  if (m_tab.address ())
+memset (m_tab.address(), 0, m_tab.length () * sizeof (irange *));
 }
 
 // Dump the contents of the global cache to F.
diff --git a/gcc/testsuite/gcc.dg/pr103229.c b/gcc/testsuite/gcc.dg/pr103229.c
new file mode 100644
index 000..96ef9aff67c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr103229.c
@@ -0,0 +1,10 @@
+// { dg-do compile }
+// { dg-options "-O -w" }
+
+int main() {
+  int i;
+  for (; i;)
+;
+
+  return 0;
+}
-- 
2.31.1



Re: [PATCH] tsan: remove not needed -ldl in options

2021-11-14 Thread Jan Hubicka via Gcc-patches
> Tested and pushed to master as obvious.
> 
> Martin
> 
> gcc/testsuite/ChangeLog:
> 
>   * c-c++-common/tsan/free_race.c: Remove unnecessary -ldl.
>   * c-c++-common/tsan/free_race2.c: Likewise.

Thank you, I cut it from the other testcase and forgot to remove
it.  Patch is OK.

Honza


[PATCH] tsan: remove not needed -ldl in options

2021-11-14 Thread Martin Liška

Tested and pushed to master as obvious.

Martin

gcc/testsuite/ChangeLog:

* c-c++-common/tsan/free_race.c: Remove unnecessary -ldl.
* c-c++-common/tsan/free_race2.c: Likewise.
---
 gcc/testsuite/c-c++-common/tsan/free_race.c  | 2 +-
 gcc/testsuite/c-c++-common/tsan/free_race2.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/tsan/free_race.c 
b/gcc/testsuite/c-c++-common/tsan/free_race.c
index 831c23e8859..390ec307dd8 100644
--- a/gcc/testsuite/c-c++-common/tsan/free_race.c
+++ b/gcc/testsuite/c-c++-common/tsan/free_race.c
@@ -1,5 +1,5 @@
 /* { dg-shouldfail "tsan" } */
-/* { dg-additional-options "-ldl -fno-ipa-modref" } */
+/* { dg-additional-options "-fno-ipa-modref" } */
 
 #include 
 
diff --git a/gcc/testsuite/c-c++-common/tsan/free_race2.c b/gcc/testsuite/c-c++-common/tsan/free_race2.c

index a74d9dc3940..89d12c3175b 100644
--- a/gcc/testsuite/c-c++-common/tsan/free_race2.c
+++ b/gcc/testsuite/c-c++-common/tsan/free_race2.c
@@ -1,5 +1,5 @@
 /* { dg-shouldfail "tsan" } */
-/* { dg-additional-options "-ldl -fno-ipa-modref" } */
+/* { dg-additional-options "-fno-ipa-modref" } */
 
 #include 
 
--

2.33.1



Re: Cleanup hadnling of modref access_nodes in tree-ssa-alias and tree-ssa-dse

2021-11-14 Thread Jan Hubicka via Gcc-patches
Hi,
this is variant I comitted.  Commonizing the code exposed that I can
drop memory walking when parameter passed is NULL (under assumption of
flag_delete_null_pointer_checks) since it can not point to useful
memory.  This was already done in tree-ssa-alias, but not in
tree-ssa-dse.  This needed bit of testsuite compensation for cases where
we optimize out invalid memory accesses.

Bootstrapped/regtested x86_64-linux, comitted.

gcc/ChangeLog:

2021-11-14  Jan Hubicka  

* ipa-modref-tree.c (modref_access_node::get_call_arg): New member
function.
(modref_access_node::get_ao_ref): Likewise.
* ipa-modref-tree.h (modref_access_node::get_call_arg): Declare.
(modref_access_node::get_ao_ref): Declare.
* tree-ssa-alias.c (modref_may_conflict): Use new accessors.
* tree-ssa-dse.c (dse_optimize_call): Use new accessors.

gcc/testsuite/ChangeLog:

2021-11-14  Jan Hubicka  

* c-c++-common/asan/null-deref-1.c: Update template.
* c-c++-common/tsan/free_race.c: Update template.
* c-c++-common/tsan/free_race2.c: Update template.
* gcc.dg/ipa/ipa-sra-4.c: Update template.

diff --git a/gcc/ipa-modref-tree.c b/gcc/ipa-modref-tree.c
index 70ec71c3808..6fc2b7298f4 100644
--- a/gcc/ipa-modref-tree.c
+++ b/gcc/ipa-modref-tree.c
@@ -25,6 +25,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree.h"
 #include "ipa-modref-tree.h"
 #include "selftest.h"
+#include "tree-ssa-alias.h"
+#include "gimple.h"
 
 /* Return true if both accesses are the same.  */
 bool
@@ -603,6 +605,39 @@ modref_access_node::dump (FILE *out)
   fprintf (out, "\n");
 }
 
+/* Return tree corresponding to parameter of the range in STMT.  */
+tree
+modref_access_node::get_call_arg (const gcall *stmt) const
+{
+  if (parm_index == MODREF_UNKNOWN_PARM)
+return NULL;
+  if (parm_index == MODREF_STATIC_CHAIN_PARM)
+return gimple_call_chain (stmt);
+  /* MODREF_RETSLOT_PARM should not happen in access trees since the store
+ is seen explicitly in the caller.  */
+  gcc_checking_assert (parm_index >= 0);
+  if (parm_index >= (int)gimple_call_num_args (stmt))
+return NULL;
+  return gimple_call_arg (stmt, parm_index);
+}
+
+/* Return tree corresponding to parameter of the range in STMT.  */
+bool
+modref_access_node::get_ao_ref (const gcall *stmt, ao_ref *ref) const
+{
+  tree arg;
+
+  if (!parm_offset_known || !(arg = get_call_arg (stmt)))
+return false;
+  poly_offset_int off = (poly_offset_int)offset
+   + ((poly_offset_int)parm_offset << LOG2_BITS_PER_UNIT);
+  poly_int64 off2;
+  if (!off.to_shwi ())
+return false;
+  ao_ref_init_from_ptr_and_range (ref, arg, true, off2, size, max_size);
+  return true;
+}
+
 #if CHECKING_P
 
 namespace selftest {
diff --git a/gcc/ipa-modref-tree.h b/gcc/ipa-modref-tree.h
index 1fafd59debe..2fcabe480bd 100644
--- a/gcc/ipa-modref-tree.h
+++ b/gcc/ipa-modref-tree.h
@@ -77,7 +77,7 @@ struct GTY(()) modref_access_node
  This has to be limited in order to keep dataflow finite.  */
   unsigned char adjustments;
 
-  /* Return true if access node holds no useful info.  */
+  /* Return true if access node holds some useful info.  */
   bool useful_p () const
 {
   return parm_index != MODREF_UNKNOWN_PARM;
@@ -88,10 +88,13 @@ struct GTY(()) modref_access_node
   bool operator == (modref_access_node ) const;
   /* Return true if range info is useful.  */
   bool range_info_useful_p () const;
+  /* Return tree corresponding to parameter of the range in STMT.  */
+  tree get_call_arg (const gcall *stmt) const;
+  /* Build ao_ref corresponding to the access and return true if succesful.  */
+  bool get_ao_ref (const gcall *stmt, class ao_ref *ref) const;
   /* Insert A into vector ACCESSES.  Limit size of vector to MAX_ACCESSES and
  if RECORD_ADJUSTMENT is true keep track of adjustment counts.
- Return 0 if nothing changed, 1 is insertion suceeded and -1 if
- failed.  */
+ Return 0 if nothing changed, 1 is insertion suceeded and -1 if failed.  */
   static int insert (vec  *,
 modref_access_node a, size_t max_accesses,
 bool record_adjustments);
diff --git a/gcc/testsuite/c-c++-common/asan/null-deref-1.c 
b/gcc/testsuite/c-c++-common/asan/null-deref-1.c
index bae016d6419..c967b29b9e2 100644
--- a/gcc/testsuite/c-c++-common/asan/null-deref-1.c
+++ b/gcc/testsuite/c-c++-common/asan/null-deref-1.c
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-fno-omit-frame-pointer -fno-shrink-wrap" } */
+/* { dg-options "-fno-omit-frame-pointer -fno-shrink-wrap -fno-ipa-modref" } */
 /* { dg-additional-options "-mno-omit-leaf-frame-pointer" { target { i?86-*-* 
x86_64-*-* } } } */
 /* { dg-shouldfail "asan" } */
 
diff --git a/gcc/testsuite/c-c++-common/tsan/free_race.c 
b/gcc/testsuite/c-c++-common/tsan/free_race.c
index 258f7b7420d..831c23e8859 100644
--- a/gcc/testsuite/c-c++-common/tsan/free_race.c
+++ 

Re: [r12-5236 Regression] FAIL: gcc.dg/tree-prof/merge_block.c scan-tree-dump-not optimized "Invalid sum" on Linux/x86_64

2021-11-14 Thread Jan Hubicka via Gcc-patches
> On Linux/x86_64,
> 
> 5aa91072e24c1e16a5ec641b48b64c9c9f199f13 is the first bad commit
> commit 5aa91072e24c1e16a5ec641b48b64c9c9f199f13
> Author: Jan Hubicka 
> Date:   Sat Nov 13 22:25:23 2021 +0100
> 
> Implement DSE of dead functions calls storing memory.
> 
> caused
> 
> FAIL: c-c++-common/tsan/free_race2.c   -O2  execution test
> FAIL: c-c++-common/tsan/free_race.c   -O2  execution test
> FAIL: gcc.dg/ipa/ipa-sra-4.c scan-ipa-dump-times sra "Will split parameter" 2
> FAIL: gcc.dg/tree-prof/merge_block.c scan-tree-dump-not optimized "Invalid 
> sum"
> 
> with GCC configured with
> 
> ../../gcc/configure 
> --prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r12-5236/usr
>  --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
> --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
> --enable-libmpx x86_64-linux --disable-bootstrap
> 
> To reproduce:
> 
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tsan.exp=c-c++-common/tsan/free_race2.c 
> --target_board='unix{-m64}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tsan.exp=c-c++-common/tsan/free_race2.c 
> --target_board='unix{-m64\ -march=cascadelake}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tsan.exp=c-c++-common/tsan/free_race.c 
> --target_board='unix{-m64}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tsan.exp=c-c++-common/tsan/free_race.c 
> --target_board='unix{-m64\ -march=cascadelake}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="ipa.exp=gcc.dg/ipa/ipa-sra-4.c --target_board='unix{-m32}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="ipa.exp=gcc.dg/ipa/ipa-sra-4.c --target_board='unix{-m32\ 
> -march=cascadelake}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="ipa.exp=gcc.dg/ipa/ipa-sra-4.c --target_board='unix{-m64}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="ipa.exp=gcc.dg/ipa/ipa-sra-4.c --target_board='unix{-m64\ 
> -march=cascadelake}'"

In these two cases we do DSE and the testcase is no longer getting what
it wants (invalid store or SRA transform).  I had patch adding
-fno-ipa-modref and will commit it.

> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tree-prof.exp=gcc.dg/tree-prof/merge_block.c 
> --target_board='unix{-m32}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tree-prof.exp=gcc.dg/tree-prof/merge_block.c 
> --target_board='unix{-m32\ -march=cascadelake}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tree-prof.exp=gcc.dg/tree-prof/merge_block.c 
> --target_board='unix{-m64}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tree-prof.exp=gcc.dg/tree-prof/merge_block.c 
> --target_board='unix{-m64\ -march=cascadelake}'"

This seems real bug in complete loop unrolling.  Not sure what broke
here, but cunrolli now does not get frequencies right.

Honza


Re: [PATCH] Ada, Darwin : Use DSYMUTIL_FOR_TARGET in libgnat/gnarl builds.

2021-11-14 Thread Arnaud Charlet via Gcc-patches
> Most of the time we get away with using the dsymutil that is
> installed with the latest Xcode, however for some cross-compilation
> cases that does not work.
> 
> We now have the ability to specify the correct dsymutil to use for
> the toolchain (--with-dsymutil=) and we should use that specified
> tool for debug link.  Fixes cross-compilers from x86-64 to powerpc.
> 
> Tested on x86_64, i686 and with a cross from x86_64 -> powerpc, and
> with a bootstrap on x86_64-linux.
> 
> OK for master?

OK, thanks!


Cleanup hadnling of modref access_nodes in tree-ssa-alias and tree-ssa-dse

2021-11-14 Thread Jan Hubicka via Gcc-patches
Hi,
this patch implements the cleanup suggested by Richard to move code
getting tree op from access_node and stmt to a common place.  I also commonized
logic to build ao_ref. While I was on it I also replaced FOR_EACH_* by range
for since they reads better.

Bootstrapped/regtesed x86_64-linux, will commit it shortly.
Honza

gcc/ChangeLog:

2021-11-14  Jan Hubicka  

* ipa-modref-tree.c (modref_access_node::get_call_arg): New member
function.
(modref_access_node::get_ao_ref): Likewise.
* ipa-modref-tree.h (modref_access_node::get_call_arg): Declare.
(modref_access_node::get_ao_ref): Declare.
* tree-ssa-alias.c (modref_may_conflict): Use new accessors.
* tree-ssa-dse.c (dse_optimize_call): Use new accessors.

diff --git a/gcc/ipa-modref-tree.c b/gcc/ipa-modref-tree.c
index 70ec71c3808..6fc2b7298f4 100644
--- a/gcc/ipa-modref-tree.c
+++ b/gcc/ipa-modref-tree.c
@@ -25,6 +25,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree.h"
 #include "ipa-modref-tree.h"
 #include "selftest.h"
+#include "tree-ssa-alias.h"
+#include "gimple.h"
 
 /* Return true if both accesses are the same.  */
 bool
@@ -603,6 +605,39 @@ modref_access_node::dump (FILE *out)
   fprintf (out, "\n");
 }
 
+/* Return tree corresponding to parameter of the range in STMT.  */
+tree
+modref_access_node::get_call_arg (const gcall *stmt) const
+{
+  if (parm_index == MODREF_UNKNOWN_PARM)
+return NULL;
+  if (parm_index == MODREF_STATIC_CHAIN_PARM)
+return gimple_call_chain (stmt);
+  /* MODREF_RETSLOT_PARM should not happen in access trees since the store
+ is seen explicitly in the caller.  */
+  gcc_checking_assert (parm_index >= 0);
+  if (parm_index >= (int)gimple_call_num_args (stmt))
+return NULL;
+  return gimple_call_arg (stmt, parm_index);
+}
+
+/* Return tree corresponding to parameter of the range in STMT.  */
+bool
+modref_access_node::get_ao_ref (const gcall *stmt, ao_ref *ref) const
+{
+  tree arg;
+
+  if (!parm_offset_known || !(arg = get_call_arg (stmt)))
+return false;
+  poly_offset_int off = (poly_offset_int)offset
+   + ((poly_offset_int)parm_offset << LOG2_BITS_PER_UNIT);
+  poly_int64 off2;
+  if (!off.to_shwi ())
+return false;
+  ao_ref_init_from_ptr_and_range (ref, arg, true, off2, size, max_size);
+  return true;
+}
+
 #if CHECKING_P
 
 namespace selftest {
diff --git a/gcc/ipa-modref-tree.h b/gcc/ipa-modref-tree.h
index 1fafd59debe..2fcabe480bd 100644
--- a/gcc/ipa-modref-tree.h
+++ b/gcc/ipa-modref-tree.h
@@ -77,7 +77,7 @@ struct GTY(()) modref_access_node
  This has to be limited in order to keep dataflow finite.  */
   unsigned char adjustments;
 
-  /* Return true if access node holds no useful info.  */
+  /* Return true if access node holds some useful info.  */
   bool useful_p () const
 {
   return parm_index != MODREF_UNKNOWN_PARM;
@@ -88,10 +88,13 @@ struct GTY(()) modref_access_node
   bool operator == (modref_access_node ) const;
   /* Return true if range info is useful.  */
   bool range_info_useful_p () const;
+  /* Return tree corresponding to parameter of the range in STMT.  */
+  tree get_call_arg (const gcall *stmt) const;
+  /* Build ao_ref corresponding to the access and return true if succesful.  */
+  bool get_ao_ref (const gcall *stmt, class ao_ref *ref) const;
   /* Insert A into vector ACCESSES.  Limit size of vector to MAX_ACCESSES and
  if RECORD_ADJUSTMENT is true keep track of adjustment counts.
- Return 0 if nothing changed, 1 is insertion suceeded and -1 if
- failed.  */
+ Return 0 if nothing changed, 1 is insertion suceeded and -1 if failed.  */
   static int insert (vec  *,
 modref_access_node a, size_t max_accesses,
 bool record_adjustments);
diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index 2965902912f..ba055730558 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -2535,13 +2535,10 @@ refs_output_dependent_p (tree store1, tree store2)
IF TBAA_P is true, use TBAA oracle.  */
 
 static bool
-modref_may_conflict (const gimple *stmt,
+modref_may_conflict (const gcall *stmt,
 modref_tree  *tt, ao_ref *ref, bool tbaa_p)
 {
   alias_set_type base_set, ref_set;
-  modref_base_node  *base_node;
-  modref_ref_node  *ref_node;
-  size_t i, j, k;
 
   if (tt->every_base)
 return true;
@@ -2554,7 +2551,7 @@ modref_may_conflict (const gimple *stmt,
   ref_set = ao_ref_alias_set (ref);
 
   int num_tests = 0, max_tests = param_modref_max_tests;
-  FOR_EACH_VEC_SAFE_ELT (tt->bases, i, base_node)
+  for (auto base_node : tt->bases)
 {
   if (tbaa_p && flag_strict_aliasing)
{
@@ -2569,7 +2566,7 @@ modref_may_conflict (const gimple *stmt,
   if (base_node->every_ref)
return true;
 
-  FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
+  for (auto ref_node : base_node->refs)
{
  /* Do not repeat same test as before.  */