[PATCH] i386: Fix wrong codegen for -mrelax-cmpxchg-loop

2021-11-17 Thread Hongyu Wang via Gcc-patches
Hi Uros,

For -mrelax-cmpxchg-loop introduced by PR 103069/r12-5265, it would
produce infinite loop. The correct code should be

.L84:
movl(%rdi), %ecx
movl%eax, %edx
orl %esi, %edx
cmpl%eax, %ecx
jne .L82
lock cmpxchgl   %edx, (%rdi)
jne .L84
movl%r8d, %eax  <<< retval is missing in previous impl
ret
.L82:
rep nop
jmp .L84

Adjust corresponding expander to fix such issue, and fix runtime test
so the problem would be exposed.

Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for master?

gcc/ChangeLog:

* config/i386/i386-expand.c (ix86_expand_atomic_fetch_op_loop):
Adjust generated cfg to avoid infinite loop.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr103069-2.c: Adjust.
---
 gcc/config/i386/i386-expand.c  |  7 ++-
 gcc/testsuite/gcc.target/i386/pr103069-2.c | 11 ++-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 3e4de64ec24..0d5d1a0e205 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -23143,13 +23143,14 @@ void ix86_expand_atomic_fetch_op_loop (rtx target, 
rtx mem, rtx val,
   bool doubleword)
 {
   rtx old_reg, new_reg, old_mem, success, oldval, new_mem;
-  rtx_code_label *loop_label, *pause_label;
+  rtx_code_label *loop_label, *pause_label, *done_label;
   machine_mode mode = GET_MODE (target);
 
   old_reg = gen_reg_rtx (mode);
   new_reg = old_reg;
   loop_label = gen_label_rtx ();
   pause_label = gen_label_rtx ();
+  done_label = gen_label_rtx ();
   old_mem = copy_to_reg (mem);
   emit_label (loop_label);
   emit_move_insn (old_reg, old_mem);
@@ -23207,11 +23208,15 @@ void ix86_expand_atomic_fetch_op_loop (rtx target, 
rtx mem, rtx val,
   GET_MODE (success), 1, loop_label,
   profile_probability::guessed_never ());
 
+  emit_jump_insn (gen_jump (done_label));
+  emit_barrier ();
+
   /* If mem is not expected, pause and loop back.  */
   emit_label (pause_label);
   emit_insn (gen_pause ());
   emit_jump_insn (gen_jump (loop_label));
   emit_barrier ();
+  emit_label (done_label);
 }
 
 #include "gt-i386-expand.h"
diff --git a/gcc/testsuite/gcc.target/i386/pr103069-2.c 
b/gcc/testsuite/gcc.target/i386/pr103069-2.c
index 8ac824cc8e8..b3f2235fd55 100644
--- a/gcc/testsuite/gcc.target/i386/pr103069-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr103069-2.c
@@ -1,5 +1,5 @@
-/* PR target/103068 */
-/* { dg-do compile } */
+/* PR target/103069 */
+/* { dg-do run } */
 /* { dg-additional-options "-O2 -march=x86-64 -mtune=generic" } */ 
 
 #include 
@@ -37,13 +37,14 @@ FUNC_ATOMIC_RELAX (char, xor)
 #define TEST_ATOMIC_FETCH_LOGIC(TYPE, OP) \
 { \
   TYPE a = 11, b = 101, res, exp; \
+  TYPE c = 11, d = 101;\
   res = relax_##TYPE##_##OP##_fetch (, b); \
-  exp = f_##TYPE##_##OP##_fetch (, b);  \
+  exp = f_##TYPE##_##OP##_fetch (, d);  \
   if (res != exp) \
 abort (); \
-  a = 21, b = 92; \
+  a = c = 21, b = d = 92; \
   res = relax_##TYPE##_fetch_##OP (, b); \
-  exp = f_##TYPE##_fetch_##OP (, b);  \
+  exp = f_##TYPE##_fetch_##OP (, d);  \
   if (res != exp) \
 abort (); \
 }
-- 
2.18.1



Re: [PATCH 4/4] Darwin, Ada : Add loader path as a default rpath.

2021-11-17 Thread Arnaud Charlet via Gcc-patches
> Allow the Ada runtimes to find GCC runtimes relative to their non-
> standard install positions.
> 
> gcc/ada/
>   * gcc-interface/Makefile.in: Add @loader_path runpaths to the
>   libgnat and libgnarl shared library builds.

OK, thanks.


[PATCH] Don't allow mask/sse/mmx mov in TLS code sequences.

2021-11-17 Thread liuhongt via Gcc-patches
As change in assembler, refer to [1], this patch disallow mask/sse/mmx
mov in TLS code sequences which require integer MOV instructions.

[1] 
https://sourceware.org/git/?p=binutils-gdb.git;a=patch;h=d7e3e627027fcf37d63e284144fe27ff4eba36b5

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk and GCC11 upstream branch?

gcc/ChangeLog:

PR target/103275
* config/i386/i386-protos.h (ix86_notls_memory): Declare.
* config/i386/i386.c (ix86_notls_memory): New function.
* config/i386/i386.md (*movsi_internal): Don't allow
mask/sse/mmx move in TLS code sequences.
(*movdi_internal): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr103275.c: New test.
---
 gcc/config/i386/constraints.md   |   5 +
 gcc/config/i386/i386-protos.h|   1 +
 gcc/config/i386/i386.c   |  34 ++
 gcc/config/i386/i386.md  |  18 +--
 gcc/testsuite/gcc.target/i386/pr103275.c | 148 +++
 5 files changed, 197 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103275.c

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 87cceac4cfb..489c76164a1 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -186,6 +186,11 @@ (define_special_memory_constraint "Bc"
   (and (match_operand 0 "memory_operand")
(match_test "constant_address_p (XEXP (op, 0))")))
 
+(define_special_memory_constraint "Bk"
+  "@internal notls memory operand."
+  (and (match_operand 0 "memory_operand")
+   (match_test "ix86_notls_memory (op)")))
+
 (define_special_memory_constraint "Bn"
   "@internal Memory operand without REX prefix."
   (match_operand 0 "norex_memory_operand"))
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 7e05510c679..1fb09be8b7e 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -243,6 +243,7 @@ extern unsigned int ix86_get_callcvt (const_tree);
 #endif
 
 extern rtx ix86_tls_module_base (void);
+extern bool ix86_notls_memory (rtx);
 extern bool ix86_tls_address_pattern_p (rtx);
 extern rtx ix86_rewrite_tls_address (rtx);
 
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c246c8736f5..f1b7f57b0ca 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -11628,6 +11628,40 @@ legitimize_tls_address (rtx x, enum tls_model model, 
bool for_mov)
   return dest;
 }
 
+/* Return true if it's not tls memory,
+   NB: it's different from ix86_tls_address_pattern_p since it also matchs
+   gottpoff/gotntpoff.
+   It's used to prevent KMOV/VMOV in TLS code sequences which require integer
+   MOV instructions, refer to PR103275.  */
+bool
+ix86_notls_memory (rtx mem)
+{
+  gcc_assert (MEM_P (mem));
+
+  rtx addr = XEXP (mem, 0);
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
+{
+  rtx op = *iter;
+  if (GET_CODE (op) == UNSPEC)
+   switch (XINT (op, 1))
+ {
+ case UNSPEC_GOTNTPOFF:
+   return false;
+ case UNSPEC_TPOFF:
+   if (!TARGET_64BIT)
+ return false;
+   break;
+ default:
+   break;
+ }
+  /* Should iter.skip_subrtxes ();
+if there's no inner UNSPEC in addr???.  */
+}
+
+  return true;
+}
+
 /* Return true if OP refers to a TLS address.  */
 bool
 ix86_tls_address_pattern_p (rtx op)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7b2de60706d..9feba81974b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2164,9 +2164,9 @@ (define_split
 
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-"=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r 
,?*Yd,?r,?*v,?*y,?*x,*k,*k ,*r,*m,*k")
+"=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r 
,?*Yd,?r,?*v,?*y,?*x,*k,*k,*k ,*r,*m,*k")
(match_operand:DI 1 "general_operand"
-"riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,v,*Yd,r   ,*v,r  ,*x 
,*y ,*r,*km,*k,*k,CBC"))]
+"riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,v,*Yd,r   ,*v,r  ,*x 
,*y ,*r,*k,*Bk,*k,*k,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
@@ -2228,7 +2228,7 @@ (define_insn "*movdi_internal"
   [(set (attr "isa")
  (cond [(eq_attr "alternative" "0,1,17,18")
  (const_string "nox64")
-   (eq_attr "alternative" "2,3,4,5,10,11,23,25")
+   (eq_attr "alternative" "2,3,4,5,10,11,23,26")
  (const_string "x64")
(eq_attr "alternative" "19,20")
  (const_string "x64_sse2")
@@ -2249,9 +2249,9 @@ (define_insn "*movdi_internal"
  (const_string "ssemov")
(eq_attr "alternative" "21,22")
  (const_string "ssecvt")
-   (eq_attr "alternative" "23,24,25,26")
+   (eq_attr 

Re: [PATCH] Fix PR target/103100 -mstrict-align and memset on not aligned buffers

2021-11-17 Thread Andrew Pinski via Gcc-patches
On Wed, Nov 17, 2021 at 1:39 AM Richard Sandiford via Gcc-patches
 wrote:
>
> apinski--- via Gcc-patches  writes:
> > From: Andrew Pinski 
> >
> > The problem here is with -mstrict-align, aarch64_expand_setmem needs
> > to check the alginment of the mode to make sure we can use it for
> > doing the stores.
> >
> > gcc/ChangeLog:
> >
> >   PR target/103100
> >   * config/aarch64/aarch64.c (aarch64_expand_setmem):
> >   Add check for alignment of the mode if STRICT_ALIGNMENT is true.
> > ---
> >  gcc/config/aarch64/aarch64.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index fdf05505846..2c00583e12c 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -23738,7 +23738,9 @@ aarch64_expand_setmem (rtx *operands)
> >over writing.  */
> >opt_scalar_int_mode mode_iter;
> >FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
> > - if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
> > + if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit)
> > + && (!STRICT_ALIGNMENT
> > + || MEM_ALIGN (dst) >= GET_MODE_ALIGNMENT (mode_iter.require 
> > (
>
> Sorry for the slow review.  I think instead we should have keep
> track of the alignment of the start byte.  This will be MEM_ALIGN
> for the first iteration but could decrease after writing some bytes.
>
> The net effect should be the same in practice.  It just seems
> more robust.

So looking into this loop further, I think it really needs a rewrite :).
Currently it is not a greedy loop, instead it iterates for each copy
it does and loops over the modes each time too.
Let me rewrite the loop so it is better.

Thanks,
Andrew


>
> Thanks,
> Richard
>
> > cur_mode = mode_iter.require ();
> >
> >gcc_assert (cur_mode != BLKmode);


[PATCH] Reduce cost of aligned sse register store.

2021-11-17 Thread liuhongt via Gcc-patches
Make them be equal to cost of unaligned ones to avoid odd alignment
peeling.

Impact for SPEC2017 on CLX:
fprate:
  503.bwaves_rBuildSame
  507.cactuBSSN_r -0.22
  508.namd_r  -0.02
  510.parest_r-0.28
  511.povray_r-0.20
  519.lbm_r   BuildSame
  521.wrf_r   -0.58
  526.blender_r   -0.30
  527.cam4_r   1.07
  538.imagick_r0.01
  544.nab_r   -0.09
  549.fotonik3d_r BuildSame
  554.roms_r  BuildSame
intrate:
  500.perlbench_r -0.25
  502.gcc_r   -0.15
  505.mcf_r   BuildSame
  520.omnetpp_r1.03
  523.xalancbmk_r -0.13
  525.x264_r  -0.05
  531.deepsjeng_r -0.27
  541.leela_r -0.24
  548.exchange2_r -0.06
  557.xz_r-0.10
  999.specrand_ir  2.69

Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

PR target/102543
* config/i386/x86-tune-costs.h (skylake_cost): Reduce cost of
storing 256/512-bit SSE register to be equal to cost of
unaligned store to avoid odd alignment peeling.
(icelake_cost): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr102543.c: New test.
---
 gcc/config/i386/x86-tune-costs.h |  4 +--
 gcc/testsuite/gcc.target/i386/pr102543.c | 35 
 2 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102543.c

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index dd5563d2e64..60d50c97fca 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1903,7 +1903,7 @@ struct processor_costs skylake_cost = {
   {6, 6, 6},   /* cost of storing integer registers */
   {6, 6, 6, 10, 20},   /* cost of loading SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
-  {8, 8, 8, 12, 24},   /* cost of storing SSE register
+  {8, 8, 8, 8, 16},/* cost of storing SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
   {6, 6, 6, 10, 20},   /* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},/* cost of unaligned stores.  */
@@ -2029,7 +2029,7 @@ struct processor_costs icelake_cost = {
   {6, 6, 6},   /* cost of storing integer registers */
   {6, 6, 6, 10, 20},   /* cost of loading SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
-  {8, 8, 8, 12, 24},   /* cost of storing SSE register
+  {8, 8, 8, 8, 16},/* cost of storing SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
   {6, 6, 6, 10, 20},   /* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},/* cost of unaligned stores.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr102543.c 
b/gcc/testsuite/gcc.target/i386/pr102543.c
new file mode 100644
index 000..893eb9a5902
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102543.c
@@ -0,0 +1,35 @@
+/* PR target/102543 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=skylake-avx512 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "MEM\\\[" "optimized" } } */
+
+struct a
+{
+  int a[100];
+};
+typedef struct a misaligned_t __attribute__ ((aligned (8)));
+typedef struct a aligned_t __attribute__ ((aligned (32)));
+
+__attribute__ ((used))
+__attribute__ ((noinline))
+void
+t(void *a, int misaligned, aligned_t *d)
+{
+  int i,v;
+  for (i=0;i<100;i++)
+{
+  if (misaligned)
+   v=((misaligned_t *)a)->a[i];
+  else
+   v=((aligned_t *)a)->a[i];
+  d->a[i]+=v;
+}
+}
+struct b {int v; misaligned_t m;aligned_t aa;} b;
+aligned_t d;
+int
+main()
+{
+  t(, 1, );
+  return 0;
+}
-- 
2.18.2



Re: [PATCH] c++: template-id ADL and partial instantiation [PR99911]

2021-11-17 Thread Jason Merrill via Gcc-patches

On 11/10/21 11:53, Patrick Palka wrote:

Here when partially instantiating the call get(T{}) with T=N::A
(for which earlier unqualified name lookup for 'get' found nothing)
the arguments after substitution are no longer dependent but the callee
still is, so perform_koenig_lookup postpones ADL.  But then we go on to
diagnose the unresolved template name anyway, as if ADL was already
performed and failed.

This patch fixes this by avoiding the error path in question when the
template arguments of an unresolved template-id are dependent, which
mirrors the dependence check in perform_koenig_lookup.


This change is OK.


In passing, this
patch also disables the -fpermissive fallback that performs a second
unqualified lookup in the template-id ADL case; this fallback seems to be
intended for legacy code and shouldn't be used for C++20 template-id ADL.


Why wouldn't we want the more helpful diagnostic?


Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps 11?

PR c++/99911

gcc/cp/ChangeLog:

* pt.c (tsubst_copy_and_build) : Don't diagnose
name lookup failure if the arguments to an unresolved template
name are still dependent.  Disable the -fpermissive fallback for
template-id ADL.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/fn-template24.C: New test.
---
  gcc/cp/pt.c|  6 --
  gcc/testsuite/g++.dg/cpp2a/fn-template24.C | 16 
  2 files changed, 20 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/fn-template24.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 991a20a85d4..4beddf9caf8 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -20427,12 +20427,14 @@ tsubst_copy_and_build (tree t,
if (function != NULL_TREE
&& (identifier_p (function)
|| (TREE_CODE (function) == TEMPLATE_ID_EXPR
-   && identifier_p (TREE_OPERAND (function, 0
+   && identifier_p (TREE_OPERAND (function, 0))
+   && !any_dependent_template_arguments_p (TREE_OPERAND
+   (function, 1
&& !any_type_dependent_arguments_p (call_args))
  {
if (TREE_CODE (function) == TEMPLATE_ID_EXPR)
  function = TREE_OPERAND (function, 0);
-   if (koenig_p && (complain & tf_warning_or_error))
+   else if (koenig_p && (complain & tf_warning_or_error))
  {
/* For backwards compatibility and good diagnostics, try
   the unqualified lookup again if we aren't in SFINAE
diff --git a/gcc/testsuite/g++.dg/cpp2a/fn-template24.C 
b/gcc/testsuite/g++.dg/cpp2a/fn-template24.C
new file mode 100644
index 000..b444ac6a273
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/fn-template24.C
@@ -0,0 +1,16 @@
+// PR c++/99911
+// { dg-do compile { target c++20 } }
+
+namespace N {
+  struct A { };
+  template void get(A);
+};
+
+template
+auto f() {
+  return [](U) { get(T{}); };
+}
+
+int main() {
+  f()(0);
+}





Re: [PATCH] c++: unqual lookup performed twice w/ template-id ADL [PR102670]

2021-11-17 Thread Jason Merrill via Gcc-patches

On 11/3/21 12:04, Patrick Palka wrote:

Here we're incorrectly performing unqualified lookup of 'adl' again at
substitution time for the call adl(t) (for which name lookup at parse
time found nothing) which causes us to reject the testcase because the
second unqualified lookup finds the later-declared variable template
'adl', leading to confusion.  Fixed thusly.

The testcase concepts-recursive-sat1.C needed to be adjusted use ADL
proper instead of relying on this incorrect behavior.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps 11 given it's a C++20 bugfix?


OK for trunk.  Not for 11, I think, as it also affects non-C++20 code.


PR c++/102670

gcc/cp/ChangeLog:

* pt.c (tsubst_copy_and_build) : When looking
for an identifier callee in the koenig_p case, also look through
TEMPLATE_ID_EXPR.  Use tsubst_copy to substitute through the
template arguments of the template-id.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-recursive-sat1.C:
* g++.dg/cpp2a/fn-template23.C: New test.
---
  gcc/cp/pt.c   | 11 +-
  .../g++.dg/cpp2a/concepts-recursive-sat1.C| 15 +---
  gcc/testsuite/g++.dg/cpp2a/fn-template23.C| 36 +++
  3 files changed, 56 insertions(+), 6 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/fn-template23.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 66040035b2f..40f84648ed2 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -20256,7 +20256,10 @@ tsubst_copy_and_build (tree t,
/*done=*/false,
/*address_p=*/false);
  }
-   else if (koenig_p && identifier_p (function))
+   else if (koenig_p
+&& (identifier_p (function)
+|| (TREE_CODE (function) == TEMPLATE_ID_EXPR
+&& identifier_p (TREE_OPERAND (function, 0)
  {
/* Do nothing; calling tsubst_copy_and_build on an identifier
   would incorrectly perform unqualified lookup again.
@@ -20269,6 +20272,12 @@ tsubst_copy_and_build (tree t,
   FIXME but doing that causes c++/15272, so we need to stop
   using IDENTIFIER_NODE in that situation.  */
qualified_p = false;
+
+   if (TREE_CODE (function) == TEMPLATE_ID_EXPR)
+ /* Use tsubst_copy to substitute through the template arguments
+of the template-id without performing unqualified lookup on
+the template name.  */
+ function = tsubst_copy (function, args, complain, in_decl);
  }
else
  {
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat1.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat1.C
index 22696c30d81..4c178b77946 100644
--- a/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat1.C
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat1.C
@@ -3,16 +3,21 @@
  template
  concept Foo = requires(T t) { foo(t); }; // { dg-error "template 
instantiation depth" }
  
-template

-  requires Foo
-int foo(T t)
+namespace ns
  {
-  return foo(t);
+  struct S { };
+
+  template
+requires Foo
+  int foo(T t)
+  {
+return foo(t);
+  }
  }
  
  int main(int, char**)

  {
-  return foo<1>(1);
+  return ns::foo<1>(ns::S{});
  }
  
  // { dg-prune-output "compilation terminated" }

diff --git a/gcc/testsuite/g++.dg/cpp2a/fn-template23.C 
b/gcc/testsuite/g++.dg/cpp2a/fn-template23.C
new file mode 100644
index 000..b85d4c96dab
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/fn-template23.C
@@ -0,0 +1,36 @@
+// PR c++/102670
+// { dg-do compile { target c++20 } }
+
+namespace ns {
+  struct S { };
+
+  template
+  constexpr int adl(const S &) {
+return I;
+  }
+}
+
+namespace redirect {
+  template
+  concept can_call_adl = requires(T t) {
+adl(t);
+  };
+
+  template
+  struct adl_fn {
+template T>
+constexpr decltype(auto) operator()(T t) const {
+  return adl(t);
+}
+  };
+
+  namespace {
+template
+constexpr inline adl_fn adl{};
+  }
+}
+
+int main() {
+  static_assert(redirect::can_call_adl);
+  redirect::adl<3>(ns::S{});
+}





Re: [PATCH] c++: designated init of char array by string constant [PR55227]

2021-11-17 Thread will wray via Gcc-patches
V2 Patch
https://gcc.gnu.org/bugzilla/attachment.cgi?id=51828

On Wed, Nov 17, 2021 at 10:06 PM will wray  wrote:
>
> Thanks for the review Marek;
> I'll post the updated patch in a follow-on message and on bugzilla.
>
> On Mon, Nov 15, 2021 at 8:03 PM Marek Polacek  wrote:
>
> > I also noticed the C++ FE rejects
> >
> >   struct A { char x[4]; };
> >   struct B { struct A a; };
> >   struct B b = { .a.x = "abc" };
> > but the C FE accepts it.  But that's for another time.
>
> Yes, the nested case is invalid for C++, valid for C.
> c.f. cppreference aggregate init.
>
> > > +  reshape_iter stripd = {};
> >
> > Since the previous variables spell it "stripped" maybe call it 
> > stripped_iter.
>
> I've left it as "stripd"; the top level reshape_iter is just "d", non-verbose,
> so "stripped_d" inappropriately over-verbose.
>
> > > @@ -6836,7 +6838,8 @@ reshape_init_r (tree type, reshape_iter *d, tree 
> > > first_initializer_p,
> > >array types (one value per array element).  */
> > >if (TREE_CODE (stripped_str_init) == STRING_CST)
> > >   {
> > > -   if (has_designator_problem (d, complain))
> >
> > So the logic here is that...
>
> Yes, you get the logic exactly... took me a few rounds to get it.
>
> > Nice to finally remove this, but let's keep this part of the comment.
>
> Agreed, and reinstated.
>
> > BRACE_ENCLOSED_INITIALIZER_P checks that it gets a CONSTRUCTOR so you
> > can remove the first check.
>
> Nice, thanks; missed that.
>
> > > +// { dg-do compile }
> > > +// { dg-options "-pedantic" }
> >
> > FWIW, if you remove the dg-options, -pedantic-errors will be used so you 
> > could
> > drop it and then use dg-error instead of dg-warning below but this is OK 
> > too.
>
> I'd copied that from another desigN.C test, now I've copied the simpler:
>
> +// { dg-options "" }
>
> and removed all of the noisy dg-warning tests
>
> > We should probably test more:
> > - nested structs
> > - anonymous unions
> > - test when the initializer is too long
> > - multidim arrays:
>
> Cut-n-paste'd your multidim array test, and added a couple more
>
> > Hope this is useful...
>
> Very useful, thanks again


Re: [PATCH] i386: add alias for f*mul_*ch intrinsics

2021-11-17 Thread Hongtao Liu via Gcc-patches
On Tue, Nov 16, 2021 at 4:35 PM Hongtao Liu  wrote:
>
> On Tue, Nov 16, 2021 at 4:23 PM Kong, Lingling via Gcc-patches
>  wrote:
> >
> > Hi,
> >
> > This patch is to add alias for f*mul_*ch intrinsics.
> >
> > Ok for master?
> This patch just adds some macro definitions (new aliases for
> intrinsic) to the header file, and I think this should be low risk.
> And considering that the intel intrinsic guide has been updated with
> those aliases, it would be inconvenienced if they were not in the
> latest gcc, so I think we should install this.
> Ok if there's no other objections.
I've installed the patch.
> >
> > gcc/ChangeLog:
> >
> > * config/i386/avx512fp16intrin.h (_mm512_mul_pch): Add alias for 
> > _mm512_fmul_pch.
> > (_mm512_mask_mul_pch): Likewise.
> > (_mm512_maskz_mul_pch): Likewise.
> > (_mm512_mul_round_pch): Likewise.
> > (_mm512_mask_mul_round_pch): Likewise.
> > (_mm512_maskz_mul_round_pch): Likewise.
> > (_mm512_cmul_pch): Likewise.
> > (_mm512_mask_cmul_pch): Likewise.
> > (_mm512_maskz_cmul_pch): Likewise.
> > (_mm512_cmul_round_pch): Likewise.
> > (_mm512_mask_cmul_round_pch): Likewise.
> > (_mm512_maskz_cmul_round_pch): Likewise.
> > (_mm_mul_sch): Likewise.
> > (_mm_mask_mul_sch): Likewise.
> > (_mm_maskz_mul_sch): Likewise.
> > (_mm_mul_round_sch): Likewise.
> > (_mm_mask_mul_round_sch): Likewise.
> > (_mm_maskz_mul_round_sch): Likewise.
> > (_mm_cmul_sch): Likewise.
> > (_mm_mask_cmul_sch): Likewise.
> > (_mm_maskz_cmul_sch): Likewise.
> > (_mm_cmul_round_sch): Likewise.
> > (_mm_mask_cmul_round_sch): Likewise.
> > (_mm_maskz_cmul_round_sch): Likewise.
> > * config/i386/avx512fp16vlintrin.h (_mm_mul_pch): Likewise.
> > (_mm_mask_mul_pch): Likewise.
> > (_mm_maskz_mul_pch): Likewise.
> > (_mm256_mul_pch): Likewise.
> > (_mm256_mask_mul_pch): Likewise.
> > (_mm256_maskz_mul_pch): Likewise.
> > (_mm_cmul_pch): Likewise.
> > (_mm_mask_cmul_pch): Likewise.
> > (_mm_maskz_cmul_pch): Likewise.
> > (_mm256_cmul_pch): Likewise.
> > (_mm256_mask_cmul_pch): Likewise.
> > (_mm256_maskz_cmul_pch): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/avx512fp16-vfcmulcph-1a.c: Add new test for alias.
> > * gcc.target/i386/avx512fp16-vfcmulcsh-1a.c: Likewise.
> > * gcc.target/i386/avx512fp16-vfmulcph-1a.c: Likewise.
> > * gcc.target/i386/avx512fp16-vfmulcsh-1a.c: Likewise.
> > * gcc.target/i386/avx512fp16vl-vfcmulcph-1a.c: Likewise.
> > * gcc.target/i386/avx512fp16vl-vfmulcph-1a.c: Likewise.
> > ---
> >  gcc/config/i386/avx512fp16intrin.h| 39 +++
> >  gcc/config/i386/avx512fp16vlintrin.h  | 17 
> >  .../gcc.target/i386/avx512fp16-vfcmulcph-1a.c | 19 ++---  
> > .../gcc.target/i386/avx512fp16-vfcmulcsh-1a.c | 19 ++---  
> > .../gcc.target/i386/avx512fp16-vfmulcph-1a.c  | 19 ++---  
> > .../gcc.target/i386/avx512fp16-vfmulcsh-1a.c  | 19 ++---
> >  .../i386/avx512fp16vl-vfcmulcph-1a.c  | 20 +++---
> >  .../i386/avx512fp16vl-vfmulcph-1a.c   | 20 +++---
> >  8 files changed, 136 insertions(+), 36 deletions(-)
> >
> > diff --git a/gcc/config/i386/avx512fp16intrin.h 
> > b/gcc/config/i386/avx512fp16intrin.h
> > index 44c5e24f234..fe73e693897 100644
> > --- a/gcc/config/i386/avx512fp16intrin.h
> > +++ b/gcc/config/i386/avx512fp16intrin.h
> > @@ -7162,6 +7162,45 @@ _mm512_set1_pch (_Float16 _Complex __A)
> >return (__m512h) _mm512_set1_ps (u.b);  }
> >
> > +// intrinsics below are alias for f*mul_*ch #define _mm512_mul_pch(A,
> > +B) _mm512_fmul_pch ((A), (B))
> > +#define _mm512_mask_mul_pch(W, U, A, B)
> >   \
> > +  _mm512_mask_fmul_pch ((W), (U), (A), (B)) #define
> > +_mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
> > +#define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
> > +#define _mm512_mask_mul_round_pch(W, U, A, B, R) \
> > +  _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
> > +#define _mm512_maskz_mul_round_pch(U, A, B, R)   \
> > +  _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
> > +
> > +#define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
> > +#define _mm512_mask_cmul_pch(W, U, A, B) \
> > +  _mm512_mask_fcmul_pch ((W), (U), (A), (B)) #define
> > +_mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
> > +#define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), 
> > (R))
> > +#define _mm512_mask_cmul_round_pch(W, U, A, B, R)\
> > +  _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
> > +#define _mm512_maskz_cmul_round_pch(U, A, B, R) 

Re: [PATCH] c++: designated init of char array by string constant [PR55227]

2021-11-17 Thread will wray via Gcc-patches
Thanks for the review Marek;
I'll post the updated patch in a follow-on message and on bugzilla.

On Mon, Nov 15, 2021 at 8:03 PM Marek Polacek  wrote:

> I also noticed the C++ FE rejects
>
>   struct A { char x[4]; };
>   struct B { struct A a; };
>   struct B b = { .a.x = "abc" };
> but the C FE accepts it.  But that's for another time.

Yes, the nested case is invalid for C++, valid for C.
c.f. cppreference aggregate init.

> > +  reshape_iter stripd = {};
>
> Since the previous variables spell it "stripped" maybe call it stripped_iter.

I've left it as "stripd"; the top level reshape_iter is just "d", non-verbose,
so "stripped_d" inappropriately over-verbose.

> > @@ -6836,7 +6838,8 @@ reshape_init_r (tree type, reshape_iter *d, tree 
> > first_initializer_p,
> >array types (one value per array element).  */
> >if (TREE_CODE (stripped_str_init) == STRING_CST)
> >   {
> > -   if (has_designator_problem (d, complain))
>
> So the logic here is that...

Yes, you get the logic exactly... took me a few rounds to get it.

> Nice to finally remove this, but let's keep this part of the comment.

Agreed, and reinstated.

> BRACE_ENCLOSED_INITIALIZER_P checks that it gets a CONSTRUCTOR so you
> can remove the first check.

Nice, thanks; missed that.

> > +// { dg-do compile }
> > +// { dg-options "-pedantic" }
>
> FWIW, if you remove the dg-options, -pedantic-errors will be used so you could
> drop it and then use dg-error instead of dg-warning below but this is OK too.

I'd copied that from another desigN.C test, now I've copied the simpler:

+// { dg-options "" }

and removed all of the noisy dg-warning tests

> We should probably test more:
> - nested structs
> - anonymous unions
> - test when the initializer is too long
> - multidim arrays:

Cut-n-paste'd your multidim array test, and added a couple more

> Hope this is useful...

Very useful, thanks again


Re: [PATCH] restore ancient -Waddress for weak symbols [PR33925]

2021-11-17 Thread Martin Sebor via Gcc-patches

On 11/17/21 12:21 PM, Martin Sebor wrote:

On 11/17/21 11:31 AM, Jason Merrill wrote:

On 11/16/21 20:11, Martin Sebor wrote:

On 11/16/21 1:23 PM, Jason Merrill wrote:

On 10/23/21 19:06, Martin Sebor wrote:

On 10/4/21 3:37 PM, Jason Merrill wrote:

On 10/4/21 14:42, Martin Sebor wrote:

While resolving the recent -Waddress enhancement request (PR
PR102103) I came across a 2007 problem report about GCC 4 having
stopped warning for using the address of inline functions in
equality comparisons with null.  With inline functions being
commonplace in C++ this seems like an important use case for
the warning.

The change that resulted in suppressing the warning in these
cases was introduced inadvertently in a fix for PR 22252.

To restore the warning, the attached patch enhances
the decl_with_nonnull_addr_p() function to return true also for
weak symbols for which a definition has been provided.


I think you probably want to merge this function with 
fold-const.c:maybe_nonzero_address, which already handles more cases.


maybe_nonzero_address() doesn't behave quite like
decl_with_nonnull_addr_p() expects and I'm reluctant to muck
around with the former too much since it's used for codegen,
while the latter just for warnings.  (There is even a case
where the functions don't behave the same, and would result
in different warnings between C and C++ without some extra
help.)

So in the attached revision I just have maybe_nonzero_address()
call decl_with_nonnull_addr_p() and then refine the failing
(or uncertain) cases separately, with some overlap between
them.

Since I worked on this someone complained that some instances
of the warning newly enhanced under PR102103 aren't suppresed
in code resulting from macro expansion.  Since it's trivial,
I include the fix for that report in this patch as well.



+   allocated stroage might have a null address.  */


typo.

OK with that fixed.


After retesting the patch before committing I noticed it triggers
a regression in weak/weak-3.c that I missed the first time around.
Here's the test case:

extern void * ffoo1f (void);
void * foo1f (void)
{
   if (ffoo1f) /* { dg-warning "-Waddress" } */
 ffoo1f ();
   return 0;
}

void * ffoox1f (void) { return (void *)0; }
extern void * ffoo1f (void)  __attribute__((weak, alias ("ffoox1f")));

The unexpected error is:

a.c: At top level:
a.c:1:15: error: ‘ffoo1f’ declared weak after being used
 1 | extern void * ffoo1f (void);
   |   ^~

The error is caused by the new call to maybe_nonzero_address()
made from decl_with_nonnull_addr_p().  The call registers
the symbol as used.

So unless the error is desirable for this case I think it's
best to go back to the originally proposed solution.  I attach
it for reference and will plan to commit it tomorrow unless I
hear otherwise.


Hmm, the error seems correct to me: we tested whether the address is 
nonzero in the dg-warning line, and presumably evaluating that test 
could depend on the absence of weak.


Sorry, I don't know enough yet to judge this.


I've created a test case involving just a weak symbol (no alias)
that shows that the front end folds to true a test of the address
of a symbol subsequently declared weak:
  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103310

Clang and ICC do the same thing here; only Clang and GCC issue
a warning that the inequality is folded to true (here's a live
link to it: https://godbolt.org/z/a8Tx9Psee).

This doesn't seem ideal but I wouldn't call it a serious problem.

The case in weak-3.c is different: there the weak symbol is
an alias for a locally defined function.  There, the alias cannot
become null and so folding the test is safe and giving an error
for it would be a regression.  I would tend to view issuing
a hard error in this case a more serious problem than the first
(especially after reading the discussion below), but YMMV.
The weak-3.c test was added along with a fix for PR 6343.
Here's a discussion of the problem:
  https://gcc.gnu.org/legacy-ml/gcc/2002-04/msg00838.html

Please let me know which of the alternatives below you prefer
or if you want something else.


Since the error is unrelated to what I'm fixing I would prefer
not to introduce it in the same patch.  I'm happy to open
a separate bug for the missing error for the test case above,
look some more into why it isn't issued, and if it's decided
the error is intended either add the call back to trigger it
or do whatever else may be more appropriate).

Are you okay with me going ahead and committing the most recent
patch as is?

If not, do you want me to commit the previous version and change
the weak-3.c test to expect the error?

Martin




PS I don't know enough about the logic behind issuing this error
in other situations to tell for sure that it's wrong in this one
but I see no difference in the emitted code for a case in the same
test that declares the alias first, before taking its address and
that's accepted and this one.  I also 

[PATCH] doc, d: Add note that D front end now requires GDC installed in order to bootstrap.

2021-11-17 Thread Iain Buclaw via Gcc-patches
Hi,

As asked for, this adds the documentation note in install.texi about the
upcoming bootstrap requirements.

Obviously this will be applied alongside the patch posted previously:

https://gcc.gnu.org/pipermail/gcc-patches/2021-October/582917.html

Final batch of testing before proceeding has taking a bit longer than I
expected.  Currently bootstrapping on sparcv9-sun-solaris2.11, and will
push forward once have confirmed that it works as well as the current
C++ implementation of the D front end.

OK for mainline?  Any improvements on wording?

Thanks,
Iain.

---
gcc/ChangeLog:

* doc/install.texi (Prerequisites): Add note that D front end now
requires GDC installed in order to bootstrap.
(Building): Add D compiler section, referencing prerequisites.
---
 gcc/doc/install.texi | 28 
 1 file changed, 28 insertions(+)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 094469b9a4e..6f999a2fd5a 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -289,6 +289,25 @@ Ada runtime libraries. You can check that your build 
environment is clean
 by verifying that @samp{gnatls -v} lists only one explicit path in each
 section.
 
+@item @anchor{GDC-prerequisite}GDC
+
+In order to build GDC, the D compiler, you need a working GDC
+compiler (GCC version 9.1 or later), as the D front end is written in D.
+
+Versions of GDC prior to 12 can be built with an ISO C++11 compiler, which can
+then be installed and used to bootstrap newer versions of the D front end.
+
+It is strongly recommended to use an older version of GDC to build GDC. More
+recent versions of GDC than the version built are not guaranteed to work and
+will often fail during the build with compilation errors relating to
+deprecations or removed features.
+
+Note that @command{configure} does not test whether the GDC installation works
+and has a sufficiently recent version.  Though the implementation of the D
+front end does not make use of any GDC-specific extensions, or novel features
+of the D language, if too old a GDC version is installed and
+@option{--enable-languages=d} is used, the build will fail.
+
 @item A ``working'' POSIX compatible shell, or GNU bash
 
 Necessary when running @command{configure} because some
@@ -2977,6 +2996,15 @@ and network filesystems.
 @uref{prerequisites.html#GNAT-prerequisite,,GNAT prerequisites}.
 @end ifhtml
 
+@section Building the D compiler
+
+@ifnothtml
+@ref{GDC-prerequisite}.
+@end ifnothtml
+@ifhtml
+@uref{prerequisites.html#GDC-prerequisite,,GDC prerequisites}.
+@end ifhtml
+
 @section Building with profile feedback
 
 It is possible to use profile feedback to optimize the compiler itself.  This
-- 
2.30.2



[PATCH v2] Fix PR tree-optimization/103228 and 103228: folding of (type) X op CST where type is a nop convert

2021-11-17 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Currently we fold (type) X op CST into (type) (X op ((type-x) CST)) when the 
conversion widens
but not when the conversion is a nop. For the same reason why we move the 
widening conversion
(the possibility of removing an extra conversion), we should do the same if the 
conversion is a
nop.

Committed as approved with the comment change.

PR tree-optimization/103228
PR tree-optimization/55177

gcc/ChangeLog:

* match.pd ((type) X bitop CST): Also do this
transformation for nop conversions.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr103228-1.c: New test.
* gcc.dg/tree-ssa/pr55177-1.c: New test.
---
 gcc/match.pd   |  6 --
 gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c | 11 +++
 gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c  | 14 ++
 3 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index cd8f349f618..4dc66fb47f2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1616,8 +1616,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  Restrict it to GIMPLE to avoid endless recursions.  */
&& (bitop != BIT_AND_EXPR || GIMPLE)
&& (/* That's a good idea if the conversion widens the operand, thus
- after hoisting the conversion the operation will be narrower.  */
-  TYPE_PRECISION (TREE_TYPE (@0)) < TYPE_PRECISION (type)
+ after hoisting the conversion the operation will be narrower.
+ It is also a good if the conversion is a nop as moves the
+ conversion to one side; allowing for combining of the 
conversions.  */
+  TYPE_PRECISION (TREE_TYPE (@0)) <= TYPE_PRECISION (type)
   /* It's also a good idea if the conversion is to a non-integer
  mode.  */
   || GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c
new file mode 100644
index 000..a7539819cf2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+int f(int a, int b)
+{
+  b|=1u;
+  b|=2;
+  return b;
+}
+/* { dg-final { scan-tree-dump-times "\\\| 3" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "\\\| 1" 0 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "\\\| 2" 0 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c
new file mode 100644
index 000..de1a264345c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+extern int x;
+
+void foo(void)
+{
+  int a = __builtin_bswap32(x);
+  a &= 0x5a5b5c5d;
+  x = __builtin_bswap32(a);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 0 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "& 1566333786" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "& 1515936861" 0 "optimized"} } */
-- 
2.17.1



Re: [PATCH 3/4] Darwin : Allow for configuring Darwin to use embedded runpath.

2021-11-17 Thread Iain Sandoe via Gcc-patches



> On 17 Nov 2021, at 22:50, Joseph Myers  wrote:
> 
> On Wed, 17 Nov 2021, Iain Sandoe via Gcc-patches wrote:
> 
>>  * libtool.m4: Add 'enable-darwin-at-runpath'.  Act  on the
>>  enable flag to alter Darwin libraries to use @rpath names.
> 
> To confirm: has this been sent to upstream libtool (which has recently 
> acquired a new maintainer, so hopefully might become a bit more active at 
> dealing with the backlog of patches)?

No. it has not
(a) I was not aware of the new maintainer
(b) the background problem does seem a pretty GCC-specific issue.

If we are going to try and reconcile GCC’s local libtool for more generic Darwin
use, then I fear that is going to take quite significant work over a period of 
time
(since it will need much more wide testing than GCC).

Having said that, it would be good to have a more up-to-date upstream libtool
the current source has a lot of ancient content - as with all things it’s just a
question of resources.   It’s unlikely to be on my radar in time for GCC12.

Iain



Re: [PATCH] rs6000: Builtins test changes for BFP scalar tests

2021-11-17 Thread Bill Schmidt via Gcc-patches


On 11/17/21 3:32 PM, Segher Boessenkool wrote:
> On Wed, Nov 17, 2021 at 02:58:54PM -0600, Bill Schmidt wrote:
>> Hi!  This patch is broken out of the previous patch for all the builtins test
>> suite adjustments.  Here we have some slight changes in error messages due to
>> how the internals have changed between the old and new builtins methods.
>>
>> For scalar-extract-exp-2.c we change:
>>   error: '__builtin_vec_scalar_extract_exp is not supported in this compiler 
>> configuration'
>>
>> to:
>>   error: '__builtin_vsx_scalar_extract_exp' requires the '-mcpu=power9' 
>> option and either the '-m64' or '-mpowerpc64' option
>>   note: builtin '__builtin_vec_scalar_extract_exp' requires builtin 
>> '__builtin_vsx_scalar_extract_exp'
> I don't like that at all.  The user didn't write the _vsx thing, and it
> isn't documented either (neither is the _vec one, but that is a separate
> issue, specific to this builtin).

I feel like I haven't explained this well.  This kind of thing has been in
existence forever even in the old builtins code.  The combination of the
error showing the internal builtin name, and the note tying the overload
name to the internal builtin name, has been there all along.  The name of
the internal builtin is pretty meaningless.  The only thing that's interesting
in this case is that we previously didn't get this *for this specific case*
because the old code went to a generic fallback.  But in many other cases
you get exactly this same kind of error message for the old code.

>
>> The new message provides more information.  In both cases, it is less than
>> ideal that we don't refer to scalar_extract_exp, which is referenced in
>> the source line, but this is because scalar_extract_exp is #define'd to
>> __builtin_vec_scalar_extract_exp, so it's unavoidable.  Certainly this is no
>> worse than before, and arguably better.
> It is a macro, enough said there
>
> The __builtin_ implementation should be documented (in the GCC manual,
> if not elsewhere).  The warnings should talk about _vec, because the
> _vsx thing only exists as implementation detail, and we should never
> talk about those.  We don't have errors about adddi3 either!
>
>>   error: '__builtin_vsx_scalar_extract_sig' requires the '-mcpu=power9' 
>> option and either the '-m64' or '-mpowerpc64' option
>>   note: builtin '__builtin_vec_scalar_extract_sig' requires builtin 
>> '__builtin_vsx_scalar_extract_sig'
> The rhs in the note does not *exist*, as far as the user is concerned.
> One builtin requiring another is all gobbledygook.

As stated above, this isn't something new that I've added.  That's what
we already do.  It's how the overload error messages have always been.

I haven't been able to eradicate everything awful here...

Thanks,
Bill

>
>> For scalar-test-neg-{2,3,5}.c, we actually change the test case.  This is
>> because we deliberately removed some undocumented and pointless  
>> overloads,
>> where each overload mapped to a single builtin.  These were:
>>  __builtin_vec_scalar_test_neg_sp
>>  __builtin_vec_scalar_test_neg_dp
>>  __builtin_vec_scalar_test_neg_qp
>> which are redundant with the "real" overload:
>>  __builtin_vec_scalar_test_neg
>> The latter maps to three builtins of the appropriate type.
> Yes.  And the new ones are undocumented and useless just as well, they
> just have better names.
>
>
> Segher


Re: [PATCH 2/2] libcpp: capture and underline ranges in -Wbidi-chars= [PR103026]

2021-11-17 Thread Marek Polacek via Gcc-patches
On Wed, Nov 17, 2021 at 05:45:15PM -0500, David Malcolm wrote:
> This patch converts the bidi::vec to use a struct so that we can
> capture location_t values for the bidirectional control characters.

Thanks for these improvements.  I noticed a few nits, but nothing that
needs to be fixed immediately.

> --- a/libcpp/lex.c
> +++ b/libcpp/lex.c
> @@ -1172,11 +1172,34 @@ namespace bidi {
>/* All the UTF-8 encodings of bidi characters start with E2.  */
>constexpr uchar utf8_start = 0xe2;
>  
> +  struct context
> +  {
> +context () {}
> +context (location_t loc, kind k, bool pdf, bool ucn)
> +: m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
> +{
> +}
> +
> +kind get_pop_kind () const
> +{
> +  return m_pdf ? kind::PDF : kind::PDI;
> +}
> +bool ucn_p () const
> +{
> +  return m_ucn;
> +}
> +
> +location_t m_loc;
> +kind m_kind;
> +unsigned m_pdf : 1;
> +unsigned m_ucn : 1;

Should these members be private:, since we have getters for them?

> +  };
> +
>/* A vector holding currently open bidi contexts.  We use a char for
>   each context, its LSB is 1 if it represents a PDF context, 0 if it
>   represents a PDI context.  The next bit is 1 if this context was open
>   by a bidi character written as a UCN, and 0 when it was UTF-8.  */

Looks like this comments needs to be updated now.

> -  semi_embedded_vec  vec;
> +  semi_embedded_vec  vec;
>  
>/* Close the whole comment/identifier/string literal/character constant
>   context.  */
> @@ -1193,19 +1216,19 @@ namespace bidi {
>  vec.truncate (len - 1);
>}
>  
> -  /* Return the context of the Ith element.  */
> -  kind ctx_at (unsigned int i)
> +  /* Return the pop kind of the context of the Ith element.  */
> +  kind pop_kind_at (unsigned int i)
>{
> -return (vec[i] & 1) ? kind::PDF : kind::PDI;
> +return vec[i].get_pop_kind ();
>}
>  
> -  /* Return which context is currently opened.  */
> +  /* Return the pop kind of the context that is currently opened.  */
>kind current_ctx ()
>{
>  unsigned int len = vec.count ();
>  if (len == 0)
>return kind::NONE;
> -return ctx_at (len - 1);
> +return vec[len - 1].get_pop_kind ();
>}
>  
>/* Return true if the current context comes from a UCN origin, that is,
> @@ -1214,11 +1237,19 @@ namespace bidi {
>{
>  unsigned int len = vec.count ();
>  gcc_checking_assert (len > 0);
> -return (vec[len - 1] >> 1) & 1;
> +return vec[len - 1].m_ucn;
>}
>  
> -  /* We've read a bidi char, update the current vector as necessary.  */
> -  void on_char (kind k, bool ucn_p)
> +  location_t current_ctx_loc ()
> +  {
> +unsigned int len = vec.count ();
> +gcc_checking_assert (len > 0);
> +return vec[len - 1].m_loc;
> +  }
> +
> +  /* We've read a bidi char, update the current vector as necessary.
> + LOC is only valid when K is not kind::NONE.  */
> +  void on_char (kind k, bool ucn_p, location_t loc)
>{
>  switch (k)
>{
> @@ -1226,12 +1257,12 @@ namespace bidi {
>case kind::RLE:
>case kind::LRO:
>case kind::RLO:
> - vec.push (ucn_p ? 3u : 1u);
> + vec.push (context (loc, k, true, ucn_p));
>   break;
>case kind::LRI:
>case kind::RLI:
>case kind::FSI:
> - vec.push (ucn_p ? 2u : 0u);
> + vec.push (context (loc, k, false, ucn_p));
>   break;
>/* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
>whose scope has not yet been terminated.  */
> @@ -1245,7 +1276,7 @@ namespace bidi {
>yet been terminated.  */
>case kind::PDI:
>   for (int i = vec.count () - 1; i >= 0; --i)
> -   if (ctx_at (i) == kind::PDI)
> +   if (pop_kind_at (i) == kind::PDI)
>   {
> vec.truncate (i);
> break;
> @@ -1295,10 +1326,47 @@ namespace bidi {
>}
>  }
>  
> +/* Get location_t for the range of bytes [START, START + NUM_BYTES)
> +   within the current line in FILE, with the caret at START.  */
> +
> +static location_t
> +get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
> +  const unsigned char *const start,
> +  size_t num_bytes)
> +{
> +  gcc_checking_assert (num_bytes > 0);
> +
> +  /* CPP_BUF_COLUMN and linemap_position_for_column both refer
> + to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
> + whereas linemap_position_for_column is 1-based.  */
> +
> +  /* Get 0-based offsets within the line.  */
> +  size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start);
> +  size_t end_offset = start_offset + num_bytes - 1;
> +
> +  /* Now convert to location_t, where "columns" are 1-based byte offsets.  */
> +  location_t start_loc = linemap_position_for_column (pfile->line_table,
> +   start_offset + 1);
> +  location_t end_loc = 

Re: [RFC] c++: Print function template parms when relevant (was: [PATCH v4] c++: Add gnu::diagnose_as attribute)

2021-11-17 Thread Matthias Kretz
On Wednesday, 17 November 2021 19:25:46 CET Jason Merrill wrote:
> On 11/17/21 04:04, Matthias Kretz wrote:
> > On Wednesday, 17 November 2021 07:09:18 CET Jason Merrill wrote:
> >>> -  if (CHECKING_P)
> >>> -SET_NON_DEFAULT_TEMPLATE_ARGS_COUNT (a, TREE_VEC_LENGTH (a));
> >>> +  SET_NON_DEFAULT_TEMPLATE_ARGS_COUNT (a, nondefault);
> >> 
> >> should have been
> >> 
> >> if (CHECKING_P || nondefault != TREE_VEC_LENGTH (a))
> >> 
> >> SET_NON_DEFAULT_TEMPLATE_ARGS_COUNT (a, nondefault);
> > 
> > TBH, I don't understand the purpose of CHECKING_P here, or rather it makes
> > me nervous because AFAIU I'm only testing with CHECKING_P enabled. Why
> > make behavior dependent on CHECKING_P? I expected CHECKING_P to basically
> > only add more assertions.
> 
> The idea when NON_DEFAULT_TEMPLATE_ARGS_COUNT was added years back was
> to leave the TREE_CHAIN null when !CHECKING_P and treat that as
> equivalent to TREE_VEC_LENGTH (args).  But perhaps you're right that
> it's not a savings worth the complexity.

Thanks, now I understand.

> >>> (copy_template_args): Jason?
> >> 
> >> Only copy the non-default template args count on TREE_VECs that should
> >> have it.
> > 
> > Why not simply set the count on all args? Is it a performance concern? The
> > INTEGER_CST the TREE_CHAIN has to point to exists anyway, so it's not
> > wasting any memory, right?
> 
> In this case the TREE_VEC we're excluding is the one wrapping multiple
> levels of template args; it doesn't contain args directly, so setting
> NON_DEFAULT_ARGS_COUNT on it doesn't make sense.

Right, I had already added a `gcc_assert (!TMPL_ARGS_HAVE_MULTIPLE_LEVELS 
(args))` to my new set_non_default_template_args_count function and found cp/
constraint.cc:2896 (get_mapped_args), which calls 
SET_NON_DEFAULT_TEMPLATE_ARGS_COUNT on the outer TREE_VEC. Was this supposed 
to apply to all inner TREE_VECs? Or is deleting the line the correct fix?

> >>> +  /* Pretty print only template instantiations. Don't pretty print
> >>> explicit
> >>> + specializations like 'template <> void fun (int)'.
> >> 
> >> This seems like a significant change of behavior unrelated to printing
> >> default template arguments.  What's the rationale for handling
> >> specializations differently from instantiations?
> > 
> > Right, this is about "The general idea of this change is to print template
> > parms wherever they would appear in the source code as well".
> > 
> > Initially, the change to print function template arguments/parameters only
> > if the args were explicitly specified lead to printing 'void fun (T)
> > [with T = ...]' or 'template <> void fun (int)'. Both are not telling the
> > full story, even if the former is how the function would be called.
> 
> and the latter is how I expect the specialization to be declared, not
> with the deducible template argument made explicit.

You're right. From my tests:

template 
  [[deprecated]] void f4(a);

template <>
  [[deprecated]] void f4(int);

template <>
  [[deprecated]] void f4(float);

  f4(1.);  // { dg-warning "'void f4\\(a\\) .with a = double.'" }
  f4(1);   // { dg-warning "'void f4\\(int\\)'" }
  f4(1.f); // { dg-warning "'void f4\\(float\\)'" }

So how it's printed depends on how the specialization is declared. It just 
falls out that way and it didn't seem awfully wrong... ;)

> > But if the reader
> > should quickly recognize what code is getting called, it is helpful to see
> > right away that a function template specialization is called. (It might
> > also reveal an implementation detail of a library, so it's not 100%
> > obvious how to choose here.) Also, saying 'T = int' is kind of wrong.
> > Yes, 'int' was deduced. But there's no T in fun:
> > 
> > template  void fun (T);
> > template <> void fun (int);
> 
> There's a T in the template, and as you said above, that's how it's
> called (and mangled).
> 
> > __FUNCTION__ was 'fun' all the time, but __PRETTY_FUNCTION__ was
> > 'void
> > fun(T) [with T = int]'.
> 
> Isn't that true for instantiations, as well?

No, instantiations don't have template args/parms in __FUNCTION__.

> > It's more consistent that __PRETTY_FUNCTION__ contains __FUNCTION__, IMHO
> 
> I suppose, but I don't see that as a strong enough motivation to mix
> this up.

What about

template  void f();
template <> void f();

With -fpretty-templates shouldn't it print as 'void f() [with T = float]' 
and 'void f()'? Yes, it's probably too subtle for most users to notice 
the difference. But I find it's more consistent this way.

> > so it would have to be at least 'void fun(T) [with T
> > = int]'. But that's strange: How it uses T and int for the same type. So I
> > settled on 'void fun(int)'.
> > 
> >> I also don't understand the purpose of TFF_AS_PRIMARY.
> > 
> > dump_function_decl generalizes the TEMPLATE_DECL (if flag_pretty_templates
> > is true) and, before this change, passes the generalized TEMPLATE_DECL to
> > dump_type (... DECL_CONTEXT (t) ...) and 

Re: [PATCH 3/4] Darwin : Allow for configuring Darwin to use embedded runpath.

2021-11-17 Thread Joseph Myers
On Wed, 17 Nov 2021, Iain Sandoe via Gcc-patches wrote:

>   * libtool.m4: Add 'enable-darwin-at-runpath'.  Act  on the
>   enable flag to alter Darwin libraries to use @rpath names.

To confirm: has this been sent to upstream libtool (which has recently 
acquired a new maintainer, so hopefully might become a bit more active at 
dealing with the backlog of patches)?

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH 2/2] libcpp: capture and underline ranges in -Wbidi-chars= [PR103026]

2021-11-17 Thread David Malcolm via Gcc-patches
This patch converts the bidi::vec to use a struct so that we can
capture location_t values for the bidirectional control characters.

Before:

  Wbidi-chars-1.c: In function ‘main’:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
  6 | /* } if (isAdmin)  begin admins 
only */
|   
^
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
  9 | /* end admins only  { */
|^

After:

  Wbidi-chars-1.c: In function ‘main’:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control 
characters detected [-Wbidi-chars=]
  6 | /* } if (isAdmin)  begin admins 
only */
|   
^
|   |   |   
|
|   |   |   
end of bidirectional context
|   U+202E (RIGHT-TO-LEFT OVERRIDE) U+2066 (LEFT-TO-RIGHT 
ISOLATE)
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control 
characters detected [-Wbidi-chars=]
  9 | /* end admins only  { */
|    ^
||  ||
||  |end of bidirectional 
context
||  U+2066 (LEFT-TO-RIGHT ISOLATE)
|U+202E (RIGHT-TO-LEFT OVERRIDE)

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r12-5356-gbef32d4a28595e933f24fef378cf052a30b674a7.

Signed-off-by: David Malcolm 

gcc/testsuite/ChangeLog:
PR preprocessor/103026
* c-c++-common/Wbidi-chars-ranges.c: New test.

libcpp/ChangeLog:
PR preprocessor/103026
* lex.c (struct bidi::context): New.
(bidi::vec): Convert to a vec of context rather than unsigned
char.
(bidi::ctx_at): Rename to...
(bidi::pop_kind_at): ...this and reimplement for above change.
(bidi::current_ctx): Update for change to vec.
(bidi::current_ctx_ucn_p): Likewise.
(bidi::current_ctx_loc): New.
(bidi::on_char): Update for usage of context struct.  Add "loc"
param and pass it when pushing contexts.
(get_location_for_byte_range_in_cur_line): New.
(get_bidi_utf8): Rename to...
(get_bidi_utf8_1): ...this, reintroducing...
(get_bidi_utf8): ...as a wrapper, setting *OUT when the result is
not NONE.
(get_bidi_ucn): Rename to...
(get_bidi_ucn_1): ...this, reintroducing...
(get_bidi_ucn): ...as a wrapper, setting *OUT when the result is
not NONE.
(class unpaired_bidi_rich_location): New.
(maybe_warn_bidi_on_close): Use unpaired_bidi_rich_location when
reporting on unpaired bidi chars.  Split into singular vs plural
spellings.
(maybe_warn_bidi_on_char): Pass in a location_t rather than a
const uchar * and use it when emitting warnings, and when calling
bidi::on_char.
(_cpp_skip_block_comment): Capture location when kind is not NONE
and pass it to maybe_warn_bidi_on_char.
(skip_line_comment): Likewise.
(forms_identifier_p): Likewise.
(lex_raw_string): Likewise.
(lex_string): Likewise.

Signed-off-by: David Malcolm 
---
 .../c-c++-common/Wbidi-chars-ranges.c |  54 
 libcpp/lex.c  | 251 ++
 2 files changed, 257 insertions(+), 48 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c

diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c 
b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
new file mode 100644
index 000..298750a2a64
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
@@ -0,0 +1,54 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired -fdiagnostics-show-caret" } */
+/* Verify that we escape and underline pertinent bidirectional
+   control characters when quoting the source.  */
+
+int test_unpaired_bidi () {
+int isAdmin = 0;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+#if 0
+   { dg-begin-multiline-output "" }
+ /* } if (isAdmin)  begin admins only */
+   ^
+   |   |   |
+   |   |   end 
of bidirectional context
+   U+202E (RIGHT-TO-LEFT OVERRIDE) U+2066 (LEFT-TO-RIGHT ISOLATE)
+   { dg-end-multiline-output "" }

[committed] libcpp: escape non-ASCII source bytes in -Wbidi-chars= [PR103026]

2021-11-17 Thread David Malcolm via Gcc-patches
This flags rich_locations associated with -Wbidi-chars= so that
non-ASCII bytes will be escaped when printing the source lines
(using the diagnostics support I added in
r12-4825-gbd5e882cf6e0def3dd1bc106075d59a303fe0d1e).

In particular, this ensures that the printed source lines will
be pure ASCII, and thus the visual ordering of the characters
will be the same as the logical ordering.

Before:

  Wbidi-chars-1.c: In function ‘main’:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
  6 | /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
|   ^
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
  9 | /* end admins only ‮ { ⁦*/
|^

  Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
  6 | int LRE_‪_PDF_\u202c;
|   ^
  Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
  8 | int LRE_\u202a_PDF_‬_;
|   ^
  Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
 10 | const char *s1 = "LRE_‪_PDF_\u202c";
|^
  Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
 12 | const char *s2 = "LRE_\u202a_PDF_‬";
| ^

After:

  Wbidi-chars-1.c: In function ‘main’:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
  6 | /* } if (isAdmin)  begin admins 
only */
|   
^
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character 
detected [-Wbidi-chars=]
  9 | /* end admins only  { */
|^

  Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
  6 | int LRE__PDF_\u202c;
|   ^
  Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
  8 | int LRE_\u202a_PDF__;
|   ^
  Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
 10 | const char *s1 = "LRE__PDF_\u202c";
|^
  Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context 
by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
 12 | const char *s2 = "LRE_\u202a_PDF_";
| ^

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r12-5355-g1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3.

libcpp/ChangeLog:
PR preprocessor/103026
* lex.c (maybe_warn_bidi_on_close): Use a rich_location
and call set_escape_on_output (true) on it.
(maybe_warn_bidi_on_char): Likewise.

Signed-off-by: David Malcolm 
---
 libcpp/lex.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/libcpp/lex.c b/libcpp/lex.c
index 6a4fbce6030..8290bc637cd 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1427,9 +1427,11 @@ maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar 
*p)
   const location_t loc
= linemap_position_for_column (pfile->line_table,
   CPP_BUF_COLUMN (pfile->buffer, p));
-  cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-"unpaired UTF-8 bidirectional control character "
-"detected");
+  rich_location rich_loc (pfile->line_table, loc);
+  rich_loc.set_escape_on_output (true);
+  cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, _loc,
+ "unpaired UTF-8 bidirectional control character "
+ "detected");
 }
   /* We're done with this context.  */
   bidi::on_close ();
@@ -1454,6 +1456,9 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar 
*p, bidi::kind kind,
   const location_t loc
= linemap_position_for_column (pfile->line_table,
   CPP_BUF_COLUMN (pfile->buffer, p));
+  rich_location rich_loc (pfile->line_table, loc);
+  rich_loc.set_escape_on_output (true);
+
   /* It seems excessive to warn about a PDI/PDF that is closing
 an opened context because we've already warned about the
 opening character.  Except warn when we have a UCN x UTF-8
@@ -1462,20 

Re: [PATCH 2/6] Add returns_zero_on_success/failure attributes

2021-11-17 Thread Joseph Myers
On Wed, 17 Nov 2021, Prathamesh Kulkarni via Gcc-patches wrote:

> More generally, would it be a good idea to provide attributes for
> mod/ref anaylsis ?
> So sth like:
> void foo(void) __attribute__((modifies(errno)));
> which would state that foo modifies errno, but neither reads nor
> modifies any other global var.
> and
> void bar(void) __attribute__((reads(errno)))
> which would state that bar only reads errno, and doesn't modify or
> read any other global var.

Many math.h functions are const except for possibly setting errno, 
possibly raising floating-point exceptions (which might have other effects 
when using alternate exception handling) and possibly reading the rounding 
mode.  To represent that, it might be useful for such attributes to be 
able to describe state (such as the floating-point environment) that 
doesn't correspond to a C identifier.  (errno tends to be a macro, so 
referring to it as such in an attribute may be awkward as well.)

(See also  with 
some proposals for features to describe const/pure-like properties of 
functions.)

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH v4] x86: Add -mharden-sls=[none|all|return|indirect-branch]

2021-11-17 Thread H.J. Lu via Gcc-patches
On Wed, Nov 17, 2021 at 12:09 PM Uros Bizjak  wrote:
>
> On Wed, Nov 17, 2021 at 9:02 PM H.J. Lu  wrote:
> >
> > On Wed, Nov 17, 2021 at 7:53 AM Uros Bizjak  wrote:
> > >
> > > On Wed, Nov 17, 2021 at 4:35 PM H.J. Lu  wrote:
> > > >
> > > > Add -mharden-sls= to mitigate against straight line speculation (SLS)
> > > > for function return and indirect branch by adding an INT3 instruction
> > > > after function return and indirect branch.
> > > >
> > > > gcc/
> > > >
> > > > PR target/102952
> > > > * config/i386/i386-opts.h (harden_sls): New enum.
> > > > * config/i386/i386.c (output_indirect_thunk): Mitigate against
> > > > SLS for function return.
> > > > (ix86_output_function_return): Likewise.
> > > > (ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
> > > > branch.
> > > > (ix86_output_indirect_jmp): Likewise.
> > > > (ix86_output_call_insn): Likewise.
> > > > * config/i386/i386.opt: Add -mharden-sls=.
> > > > * doc/invoke.texi: Document -mharden-sls=.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > PR target/102952
> > > > * gcc.target/i386/harden-sls-1.c: New test.
> > > > * gcc.target/i386/harden-sls-2.c: Likewise.
> > > > * gcc.target/i386/harden-sls-3.c: Likewise.
> > > > * gcc.target/i386/harden-sls-4.c: Likewise.
> > > > * gcc.target/i386/harden-sls-5.c: Likewise.
>
> OK, with a small nit below.
>
> Thanks,
> Uros.
>
> +mharden-sls=
> +Target RejectNegative Joined Enum(harden_sls) Var(ix86_harden_sls)
> Init(harden_sls_none)
> +Generate code to mitigate against straight line speculation.
> +
> +Enum
> +Name(harden_sls) Type(enum harden_sls)
> +Known choices for mitigation against straight line speculation with
> -mharden-sls=:
> +
> +EnumValue
> +Enum(harden_sls) String(none) Value(harden_sls_none)
> +
> +EnumValue
> +Enum(harden_sls) String(all) Value(harden_sls_all)
>
> Please move the above enum to the last enum.
>
> +
> +EnumValue
> +Enum(harden_sls) String(return) Value(harden_sls_return)
> +
> +EnumValue
> +Enum(harden_sls) String(indirect-branch) Value(harden_sls_indirect_branch)
> +

This is the v4 patch I am checking in.

Thanks.

-- 
H.J.
From 7408038b8f28de44d4b323f7f81b140a78c7689e Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 27 Oct 2021 07:48:54 -0700
Subject: [PATCH v4] x86: Add -mharden-sls=[none|all|return|indirect-branch]

Add -mharden-sls= to mitigate against straight line speculation (SLS)
for function return and indirect branch by adding an INT3 instruction
after function return and indirect branch.

gcc/

	PR target/102952
	* config/i386/i386-opts.h (harden_sls): New enum.
	* config/i386/i386.c (output_indirect_thunk): Mitigate against
	SLS for function return.
	(ix86_output_function_return): Likewise.
	(ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
	branch.
	(ix86_output_indirect_jmp): Likewise.
	(ix86_output_call_insn): Likewise.
	* config/i386/i386.opt: Add -mharden-sls=.
	* doc/invoke.texi: Document -mharden-sls=.

gcc/testsuite/

	PR target/102952
	* gcc.target/i386/harden-sls-1.c: New test.
	* gcc.target/i386/harden-sls-2.c: Likewise.
	* gcc.target/i386/harden-sls-3.c: Likewise.
	* gcc.target/i386/harden-sls-4.c: Likewise.
	* gcc.target/i386/harden-sls-5.c: Likewise.
---
 gcc/config/i386/i386-opts.h  |  7 +++
 gcc/config/i386/i386.c   | 21 +---
 gcc/config/i386/i386.opt | 20 +++
 gcc/doc/invoke.texi  | 10 +-
 gcc/testsuite/gcc.target/i386/harden-sls-1.c | 14 +
 gcc/testsuite/gcc.target/i386/harden-sls-2.c | 14 +
 gcc/testsuite/gcc.target/i386/harden-sls-3.c | 14 +
 gcc/testsuite/gcc.target/i386/harden-sls-4.c | 16 +++
 gcc/testsuite/gcc.target/i386/harden-sls-5.c | 17 
 9 files changed, 125 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-5.c

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index 04e4ad608fb..171d3106d0a 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -121,4 +121,11 @@ enum instrument_return {
   instrument_return_nop5
 };
 
+enum harden_sls {
+  harden_sls_none = 0,
+  harden_sls_return = 1 << 0,
+  harden_sls_indirect_branch = 1 << 1,
+  harden_sls_all = harden_sls_return | harden_sls_indirect_branch
+};
+
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a5bfb9efca9..c246c8736f5 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5914,6 +5914,8 @@ output_indirect_thunk (unsigned int regno)
 }
 
   fputs 

Re: [PATCH] rs6000: Builtins test changes for BFP scalar tests

2021-11-17 Thread Segher Boessenkool
On Wed, Nov 17, 2021 at 02:58:54PM -0600, Bill Schmidt wrote:
> Hi!  This patch is broken out of the previous patch for all the builtins test
> suite adjustments.  Here we have some slight changes in error messages due to
> how the internals have changed between the old and new builtins methods.
> 
> For scalar-extract-exp-2.c we change:
>   error: '__builtin_vec_scalar_extract_exp is not supported in this compiler 
> configuration'
> 
> to:
>   error: '__builtin_vsx_scalar_extract_exp' requires the '-mcpu=power9' 
> option and either the '-m64' or '-mpowerpc64' option
>   note: builtin '__builtin_vec_scalar_extract_exp' requires builtin 
> '__builtin_vsx_scalar_extract_exp'

I don't like that at all.  The user didn't write the _vsx thing, and it
isn't documented either (neither is the _vec one, but that is a separate
issue, specific to this builtin).

> The new message provides more information.  In both cases, it is less than
> ideal that we don't refer to scalar_extract_exp, which is referenced in
> the source line, but this is because scalar_extract_exp is #define'd to
> __builtin_vec_scalar_extract_exp, so it's unavoidable.  Certainly this is no
> worse than before, and arguably better.

It is a macro, enough said there

The __builtin_ implementation should be documented (in the GCC manual,
if not elsewhere).  The warnings should talk about _vec, because the
_vsx thing only exists as implementation detail, and we should never
talk about those.  We don't have errors about adddi3 either!

>   error: '__builtin_vsx_scalar_extract_sig' requires the '-mcpu=power9' 
> option and either the '-m64' or '-mpowerpc64' option
>   note: builtin '__builtin_vec_scalar_extract_sig' requires builtin 
> '__builtin_vsx_scalar_extract_sig'

The rhs in the note does not *exist*, as far as the user is concerned.
One builtin requiring another is all gobbledygook.

> For scalar-test-neg-{2,3,5}.c, we actually change the test case.  This is
> because we deliberately removed some undocumented and pointless   
> overloads,
> where each overload mapped to a single builtin.  These were:
>   __builtin_vec_scalar_test_neg_sp
>   __builtin_vec_scalar_test_neg_dp
>   __builtin_vec_scalar_test_neg_qp
> which are redundant with the "real" overload:
>   __builtin_vec_scalar_test_neg
> The latter maps to three builtins of the appropriate type.

Yes.  And the new ones are undocumented and useless just as well, they
just have better names.


Segher


[PATCH] PR fortran/101329 - ICE: Invalid expression in gfc_element_size

2021-11-17 Thread Harald Anlauf via Gcc-patches
Dear Fortranners,

as NULL() is not interoperable, we have to reject it.
Confirmed by NAG.  Other compilers show "interesting behavior".

Obvious patch by Steve.  Regtested on x86_64-pc-linux-gnu.

OK for mainline?

Thanks,
Harald

From 52a3ee53f0a12e897c4651fa8378e045653b9fd3 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Wed, 17 Nov 2021 22:21:24 +0100
Subject: [PATCH] Fortran: NULL() is not interoperable

gcc/fortran/ChangeLog:

	PR fortran/101329
	* check.c (is_c_interoperable): Reject NULL() as it is not
	interoperable.

gcc/testsuite/ChangeLog:

	PR fortran/101329
	* gfortran.dg/pr101329.f90: New test.

Co-authored-by: Steven G. Kargl 
---
 gcc/fortran/check.c|  6 ++
 gcc/testsuite/gfortran.dg/pr101329.f90 | 13 +
 2 files changed, 19 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/pr101329.f90

diff --git a/gcc/fortran/check.c b/gcc/fortran/check.c
index ffa07b510cd..5a5aca10ebe 100644
--- a/gcc/fortran/check.c
+++ b/gcc/fortran/check.c
@@ -5223,6 +5223,12 @@ is_c_interoperable (gfc_expr *expr, const char **msg, bool c_loc, bool c_f_ptr)
 {
   *msg = NULL;

+  if (expr->expr_type == EXPR_NULL)
+{
+  *msg = "NULL() is not interoperable";
+  return false;
+}
+
   if (expr->ts.type == BT_CLASS)
 {
   *msg = "Expression is polymorphic";
diff --git a/gcc/testsuite/gfortran.dg/pr101329.f90 b/gcc/testsuite/gfortran.dg/pr101329.f90
new file mode 100644
index 000..b82210d4e28
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr101329.f90
@@ -0,0 +1,13 @@
+! { dg-do compile }
+! PR fortran/101329 - ICE: Invalid expression in gfc_element_size
+
+program p
+  use iso_c_binding
+  implicit none
+  integer(c_int), pointer :: ip4
+  integer(c_int64_t), pointer :: ip8
+  print *, c_sizeof (c_null_ptr) ! valid
+  print *, c_sizeof (null ())! { dg-error "is not interoperable" }
+  print *, c_sizeof (null (ip4)) ! { dg-error "is not interoperable" }
+  print *, c_sizeof (null (ip8)) ! { dg-error "is not interoperable" }
+end
--
2.26.2



Fix gamess miscompare

2021-11-17 Thread Jan Hubicka via Gcc-patches
Hi,
this patch fixes bug in streaming in modref access tree that now cause a failure
of gamess benchmark.  The bug is quite old (present in GCC11 release) but it
needs quite interesting series of events to manifest. In particular
 1) At lto time ISRA turns some parameters passed by reference to scalar
 2) At lto time modref computes summaries for old parameters and then updates
them but does so quite stupidly believing that the load from parameters
are now unkonwn loads (rather than optimized out).
This renders summary not very useful since it thinks every memory aliasing
int is now accssed (as opposed as parameter dereference)
 3) At stream in we notice too early that summary is useless, set every_access
flag and drop the list.  However while reading rest of the summary we
overwrite the flag back to 0 which makes us to lose part of summary.
 4) right selection of partitions needs to be done to avoid late modref from
recalculating and thus fixing the summary.

This patch fixes the stream in bug, however we also should fix updating of
summaries.  Martin, would be possible to extend get_original_index by "deref"
parameter that would be set to true when refernce was turned to scalar?

Bootstrapped/regtested x86_64-linux. Comitted.

gcc/ChangeLog:

2021-11-17  Jan Hubicka  

PR ipa/103246
* ipa-modref.c (read_modref_records): Fix streaminig in of every_access
flag.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index 9ceecdd479f..c94f0589d44 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -3460,10 +3460,10 @@ read_modref_records (lto_input_block *ib, struct 
data_in *data_in,
  size_t every_access = streamer_read_uhwi (ib);
  size_t naccesses = streamer_read_uhwi (ib);
 
- if (nolto_ref_node)
-   nolto_ref_node->every_access = every_access;
- if (lto_ref_node)
-   lto_ref_node->every_access = every_access;
+ if (nolto_ref_node && every_access)
+   nolto_ref_node->collapse ();
+ if (lto_ref_node && every_access)
+   lto_ref_node->collapse ();
 
  for (size_t k = 0; k < naccesses; k++)
{


[PATCH 3/4] Darwin : Allow for configuring Darwin to use embedded runpath.

2021-11-17 Thread Iain Sandoe via Gcc-patches
Recent Darwin versions place contraints on the use of run paths
specified in environment variables.  This breaks some assumptions
in the GCC build.

This change allows the user to configure a Darwin build to use
'@rpath/libraryname.dylib' in library names and then to add an
embedded runpath to executables (and libraries with dependents).

The embedded runpath is added by default unless:

1. the user adds an explicit -rpath / -Wl,-rpath,
2. the user adds '-nodefaultrpath'.

For an installed compiler, it means that any executable built with
that compiler will reference the runtimes installed with the
compiler (equivalent to hard-coding the library path into the name
of the library).

During build-time configurations  any "-B" entries will be added to
the runpath thus the newly-built libraries will be found by exes.

Since the install name is set in libtool, that decision needs to be
available here (but might also cause dependent ones in Makefiles,
so we need to export a conditional).

This facility is not available for Darwin 8 or earlier, however the
existing environment variable runpath does work there.

We default this on for systems where the external DYLD_LIBRARY_PATH
does not work and off for Darwin 8 or earlier.  For systems that can
use either method, if the value is unset, we use the default (which
is currently DYLD_LIBRARY_PATH).

ChangeLog:

* configure: Regenerate.
* configure.ac: Do not add default runpaths to GCC exes
when we are building -static-libstdc++/-static-libgcc (the
default).
* libtool.m4: Add 'enable-darwin-at-runpath'.  Act  on the
enable flag to alter Darwin libraries to use @rpath names.

gcc/ChangeLog:

* aclocal.m4: Regenerate.
* configure: Regenerate.

libatomic/ChangeLog:

* Makefile.am: Handle Darwin rpaths.
* Makefile.in: Regenerate.
* configure: Regenerate.
* testsuite/Makefile.in: Regenerate.

libcc1/ChangeLog:

* Makefile.am: Handle Darwin rpaths.
* Makefile.in: Regenerate.
* configure: Regenerate.

libffi/ChangeLog:

* Makefile.am: Handle Darwin rpaths.
* Makefile.in: Regenerate.
* configure: Regenerate.

libgcc/ChangeLog:

* config/t-slibgcc-darwin: Generate libgcc_s
with an @rpath name.

libgfortran/ChangeLog:

* Makefile.am: Handle Darwin rpaths.
* Makefile.in: Regenerate.
* configure: Regenerate.
* configure.ac: Handle Darwin rpaths

libgomp/ChangeLog:

* Makefile.am: Handle Darwin rpaths.
* Makefile.in: Regenerate.
* configure: Regenerate.

libhsail-rt/ChangeLog:

* configure: Regenerate.

libitm/ChangeLog:

* Makefile.am: Handle Darwin rpaths.
* Makefile.in: Regenerate.
* configure: Regenerate.

libobjc/ChangeLog:

* configure: Regenerate.
* configure.ac: Handle Darwin rpaths.

liboffloadmic/ChangeLog:

* configure: Regenerate.
* plugin/Makefile.in: Regenerate.
* plugin/aclocal.m4: Regenerate.
* plugin/configure: Regenerate.

libphobos/ChangeLog:

* configure: Regenerate.
* libdruntime/Makefile.am: Handle Darwin rpaths.
* libdruntime/Makefile.in: Regenerate.
* src/Makefile.am: Handle Darwin rpaths.
* src/Makefile.in: Regenerate.

libquadmath/ChangeLog:

* Makefile.am: Handle Darwin rpaths.
* Makefile.in: Regenerate.
* configure: Regenerate.
* configure.ac: Handle Darwin rpaths.

libsanitizer/ChangeLog:

* asan/Makefile.am: Handle Darwin rpaths.
* asan/Makefile.in: Regenerate.
* configure: Regenerate.
* hwasan/Makefile.am: Handle Darwin rpaths.
* hwasan/Makefile.in: Regenerate.
* lsan/Makefile.am: Handle Darwin rpaths.
* lsan/Makefile.in: Regenerate.
* tsan/Makefile.am: Handle Darwin rpaths.
* tsan/Makefile.in: Regenerate.
* ubsan/Makefile.am: Handle Darwin rpaths.
* ubsan/Makefile.in: Regenerate.

libssp/ChangeLog:

* Makefile.am: Handle Darwin rpaths.
* Makefile.in: Regenerate.
* configure: Regenerate.

libstdc++-v3/ChangeLog:

* configure: Regenerate.
* src/Makefile.am: Handle Darwin rpaths.
* src/Makefile.in: Regenerate.

Darwin, libtool : Provide a mechanism to enable embedded rpaths.

We need to be able to build libraries with install names that begin
with @rpath so that we can use rpaths in DSOs that depend on
them.  Since the install name is set in libtool, that decision needs
to be available here (but might also cause dependent ones in
Makefiles, so we need to export a conditional).
---
 configure |   5 +
 configure.ac  |   5 +
 gcc/aclocal.m4|  50 +++
 gcc/configure | 157 +++--
 libatomic/Makefile.am |   6 +-
 libatomic/Makefile.in 

[PATCH 0/4] Darwin: Replace environment runpath with embedded [PR88590].

2021-11-17 Thread Iain Sandoe via Gcc-patches
This is a fairly long explanation of the problems being addressed by
the patch set.  Most of the changes are Darwin-specific - a change to
the libtool component allowing for this @rpath and some minor additions
to makefiles where libtool is not in use.  At present, this seems pretty
specific to the GCC build; since we depend on accessing newly-built
components during the bootstrap.

There are additional details relevant to each patch in its own commit
message.

=

Darwin builds shared libraries with information on the runpath as part
of the library name.  For example, /installation/path/for/libfoo.dylib.

That is regarded as two components; the 'runpath' /installation/path/for/
and the library name libfoo.dylib.

This means that (at runtime) two libraries with the same name can be
disambiguated by their runpaths, and potentially used by the same exe.

= Problem #1

That is fine and works well; until we disturb the assumptions by
overriding the library runpath with an environment one: DYLD_LIBRARY_PATH.

Now the library runpath(s) can be discarded and the libraries are first
searched on the basis of that provided by the environment; two libraries
with the same name are no longer distinct (if a library with that name is
found in the environment path).

This causes problems in configuring, building and testing GCC because we
set the runpath environment at a very high level so that it applies to
stage1+ target configures and stage2+ host configures.  This is needed so
that executables built during those configures get the newly-built libgcc_s
when the target defaults to using a shared libgcc.

However, it also means that every tool that is used during the configure
has its libgcc_s (or any of the newly-built bootstrapped libs) overriden
to use the new one(s) - which might be buggy.

In the testsuite it is more serious - since more target libs come into
play - especially libstdc++.  Several system tools on Darwin use(d) libc++
and that has caused wrong or crashed test output.  In principle,
LD_LIBRARY_PATH on Linux has the same issue - although perhaps there is
less tendency to default to use of shared dependent libs.

Ideally, one would have several environment paths, and some way to use
the appropriate one at the appropriate time.  I experimented with this
as a solution to both this and the following problem, but it proved
unrealistic - since the process would have to be applied to all relevant
OSS projects using auto-tools to be safe - and mostly the uninstalled
use of libraries is a GCC build-time issue.

= Problem #2

A change in security policy for Darwin means that DYLD_LIBRARY_PATH is
now removed from the environment for all system tools (e.g. /usr/sh, env
etc).  This means that for all realistic build steps that use any system
utility (like sh) will no longer see the the environment runpath and the
only ones available will be those in the libraries.

This breaks GCC's configuration since the steps mentioned above are now
not seeing the newly-built shared libraries, but actually much olders ones
installed on the system.  It means that for all Darwin15+ we misconfigure
libstdc++.

/bin/sh is hardwired into autoconf, one cannot use CONFIG_SHELL to work
around this - because /bin/sh is invoked first, and then passes control to
CONFIG_SHELL.

A second problem is that we cannot bump the SO name for libgcc_s (which
I need to do to solve an EH problem) - since the new SO name is not
available on the system, and therefore none of the stage1+ target configures
will succeed.  This is because the eventual install path is correctly
encoded into the built library, but it is not present at the install
position (and, in general, cannot be installed - since that might not even
be a suitable path on the build system).

This has also meant that we could not do in-tree testing without first
installing the target libraries (which is mostly inconvenient rather than
a show-stopper, but still).

= Tested solution.

Darwin has the ability to make the runpaths install-position independent.

One sets the library runpath to @rpath/ which essentially means "use the
runpath available at the time we want to load this".

One can then add (potentially multiple) runpaths to the executable, the
built library can be put anywhere convenient - providing we can put that
path into the exe.

For GCC's build, test and install process this means that we need at each
stage to build exes with the runpaths that are relevant to the positions
of the dependent libraries.

To do this, we add an rpath for each of the startfile paths.  While we are
building/testing GCC these correspond to (for example gcc/  or 
/libstdc++/src/.libs etc) and then, after the compiler is installed
at its intended install path - these become /compiler/installation/path/lib
etc.

I have tested this widely on i686, powerpc, x86_64 and aarch64 Darwin over
more than a year.

So patch 1 : provides a spec that expands to -rpath xxx for each xxx in the
startfiles 

[PATCH 4/4] Darwin, Ada : Add loader path as a default rpath.

2021-11-17 Thread Iain Sandoe via Gcc-patches
Allow the Ada runtimes to find GCC runtimes relative to their non-
standard install positions.

gcc/ada/
* gcc-interface/Makefile.in: Add @loader_path runpaths to the
libgnat and libgnarl shared library builds.

---
 gcc/ada/gcc-interface/Makefile.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/ada/gcc-interface/Makefile.in 
b/gcc/ada/gcc-interface/Makefile.in
index 53d0739470a..bffe9de4c89 100644
--- a/gcc/ada/gcc-interface/Makefile.in
+++ b/gcc/ada/gcc-interface/Makefile.in
@@ -788,6 +788,7 @@ gnatlib-shared-darwin:
$(GNATRTL_NONTASKING_OBJS) $(LIBGNAT_OBJS) \
$(SO_OPTS) \

-Wl,-install_name,@rpath/libgnat$(hyphen)$(LIBRARY_VERSION)$(soext) \
+   -Wl,-rpath,@loader_path \
$(MISCLIB)
cd $(RTSDIR); `echo "$(GCC_FOR_TARGET)" \
 | sed -e 's,\./xgcc,../../xgcc,' -e 's,-B\./,-B../../,'` 
-dynamiclib $(PICFLAG_FOR_TARGET) \
@@ -795,6 +796,7 @@ gnatlib-shared-darwin:
$(GNATRTL_TASKING_OBJS) \
$(SO_OPTS) \

-Wl,-install_name,@rpath/libgnarl$(hyphen)$(LIBRARY_VERSION)$(soext) \
+   -Wl,-rpath,@loader_path \
$(THREADSLIB) -Wl,libgnat$(hyphen)$(LIBRARY_VERSION)$(soext)
cd $(RTSDIR); $(LN_S) libgnat$(hyphen)$(LIBRARY_VERSION)$(soext) \
libgnat$(soext)
-- 
2.24.3 (Apple Git-128)



[PATCH 2/4] Darwin : Handle rpaths given on the command line.

2021-11-17 Thread Iain Sandoe via Gcc-patches
We want to produce a situation where a default rpath can be added
to each executable (or dylib), but that can be overridden by any
specific rpath provided by the user.

gcc/ChangeLog:

* config.gcc: Include rpath.opt
* config/darwin-driver.c (darwin_driver_init): Detect cases
where the user has added rpaths via a -Wl or -Xlinker command
and suppress default rpaths in that case.
* config/darwin.h (DRIVER_SELF_SPECS): Handle -rpath.
(DARWIN_RPATH_SPEC): New.
* config/darwin.opt: Add nodefaultrpath option.
---
 gcc/config/darwin-driver.c | 18 ++
 gcc/config/darwin.h| 11 ++-
 gcc/config/darwin.opt  |  4 
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/gcc/config/darwin-driver.c b/gcc/config/darwin-driver.c
index 4f0c6bad61f..ccc288f20ce 100644
--- a/gcc/config/darwin-driver.c
+++ b/gcc/config/darwin-driver.c
@@ -281,6 +281,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
   const char *vers_string = NULL;
   bool seen_version_min = false;
   bool seen_sysroot_p = false;
+  bool seen_rpath_p = false;
 
   for (i = 1; i < *decoded_options_count; i++)
 {
@@ -349,6 +350,13 @@ darwin_driver_init (unsigned int *decoded_options_count,
  seen_sysroot_p = true;
  break;
 
+   case OPT_Xlinker:
+   case OPT_Wl_:
+ gcc_checking_assert ((*decoded_options)[i].arg);
+ if (strncmp ((*decoded_options)[i].arg, "-rpath", 6) == 0)
+   seen_rpath_p = true;
+ break;
+
default:
  break;
}
@@ -474,4 +482,14 @@ darwin_driver_init (unsigned int *decoded_options_count,
  &(*decoded_options)[*decoded_options_count - 1]);
 }
 }
+
+  if (seen_rpath_p)
+{
+  ++*decoded_options_count;
+  *decoded_options = XRESIZEVEC (struct cl_decoded_option,
+*decoded_options,
+*decoded_options_count);
+  generate_option (OPT_nodefaultrpath, NULL, 1, CL_DRIVER,
+  &(*decoded_options)[*decoded_options_count - 1]);
+}
 }
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 7ed01efa694..4423933890b 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -384,6 +384,7 @@ extern GTY(()) int darwin_ms_struct;
 DARWIN_NOPIE_SPEC \
 DARWIN_RDYNAMIC \
 DARWIN_NOCOMPACT_UNWIND \
+"%{!r:%{!nostdlib:%{!rpath:%{!nodefaultrpath:%(darwin_rpaths) " \
 "}}} % 10.5 mmacosx-version-min= -lcrt1.o)\
@@ -542,6 +544,13 @@ extern GTY(()) int darwin_ms_struct;
 "%{!static:%:version-compare(< 10.6 mmacosx-version-min= -lbundle1.o)  \
   %{fgnu-tm: -lcrttms.o}}"
 
+/* A default rpath, that picks up dependent libraries installed in the same 
+   director as one being loaded.  */
+#define DARWIN_RPATH_SPEC \
+  "%:version-compare(>= 10.5 mmacosx-version-min= -rpath) \
+   %:version-compare(>= 10.5 mmacosx-version-min= @loader_path) \
+   %P "
+
 #ifdef HAVE_AS_MMACOSX_VERSION_MIN_OPTION
 /* Emit macosx version (but only major).  */
 #define ASM_MMACOSX_VERSION_MIN_SPEC \
diff --git a/gcc/config/darwin.opt b/gcc/config/darwin.opt
index d1d1f816912..021d67b17c7 100644
--- a/gcc/config/darwin.opt
+++ b/gcc/config/darwin.opt
@@ -233,6 +233,10 @@ no_dead_strip_inits_and_terms
 Driver RejectNegative
 (Obsolete) Current linkers never dead-strip these items, so the option is not 
needed.
 
+nodefaultrpath
+Driver RejectNegative
+Do not add a default rpath to executables, modules or dynamic libraries.
+
 nofixprebinding
 Driver RejectNegative
 (Obsolete after 10.3.9) Set MH_NOPREFIXBINDING, in an executable.
-- 
2.24.3 (Apple Git-128)



[PATCH 1/4] Driver : Provide a spec to insert rpaths for compiler lib dirs.

2021-11-17 Thread Iain Sandoe via Gcc-patches
This provides a spec to insert "-rpath DDD" for each DDD corresponding
to a compiler startfile directory.  This allows a target to use @rpath
as the install path for libraries, and have the compiler provide the
necessary rpath to handle this.

gcc/ChangeLog:

* gcc.c (RUNPATH_OPTION): New.
(do_spec_1): Provide '%P' as a spec to insert rpaths for
each compiler startfile path.
---
 gcc/gcc.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/gcc.c b/gcc/gcc.c
index 506c2acc282..7b52d0bcbfd 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -572,6 +572,7 @@ or with constant text in a single argument.
  %l process LINK_SPEC as a spec.
  %L process LIB_SPEC as a spec.
  %M Output multilib_os_dir.
+ %POutput a RUNPATH_OPTION for each directory in startfile_prefixes.
  %G process LIBGCC_SPEC as a spec.
  %R Output the concatenation of target_system_root and
 target_sysroot_suffix.
@@ -1191,6 +1192,10 @@ proper position among the other output files.  */
 # define SYSROOT_HEADERS_SUFFIX_SPEC ""
 #endif
 
+#ifndef RUNPATH_OPTION
+# define RUNPATH_OPTION "-rpath"
+#endif
+
 static const char *asm_debug = ASM_DEBUG_SPEC;
 static const char *asm_debug_option = ASM_DEBUG_OPTION_SPEC;
 static const char *cpp_spec = CPP_SPEC;
@@ -6130,6 +6135,19 @@ do_spec_1 (const char *spec, int inswitch, const char 
*soft_matched_part)
}
break;
 
+ case 'P':
+   {
+ struct spec_path_info info;
+
+ info.option = RUNPATH_OPTION;
+ info.append_len = 0;
+ info.omit_relative = false;
+ info.separate_options = true;
+
+ for_each_path (_prefixes, true, 0, spec_path, );
+   }
+   break;
+
  case 'e':
/* %efoo means report an error with `foo' as error message
   and don't execute any more commands for this file.  */
-- 
2.24.3 (Apple Git-128)



[PATCH] rs6000: Builtins test changes for BFP scalar tests

2021-11-17 Thread Bill Schmidt via Gcc-patches
Hi!  This patch is broken out of the previous patch for all the builtins test
suite adjustments.  Here we have some slight changes in error messages due to
how the internals have changed between the old and new builtins methods.

For scalar-extract-exp-2.c we change:
  error: '__builtin_vec_scalar_extract_exp is not supported in this compiler 
configuration'

to:
  error: '__builtin_vsx_scalar_extract_exp' requires the '-mcpu=power9' option 
and either the '-m64' or '-mpowerpc64' option
  note: builtin '__builtin_vec_scalar_extract_exp' requires builtin 
'__builtin_vsx_scalar_extract_exp'

The new message provides more information.  In both cases, it is less than
ideal that we don't refer to scalar_extract_exp, which is referenced in
the source line, but this is because scalar_extract_exp is #define'd to
__builtin_vec_scalar_extract_exp, so it's unavoidable.  Certainly this is no
worse than before, and arguably better.

The cases for:
scalar-insert-exp-2.c
scalar-insert-exp-5.c
scalar-insert-exp-8.c
are all similar.

For scalar-extract-sig-2.c we again change:
  error: '__builtin_vec_scalar_extract_sig' is not supported in this compiler 
configuration'

to:
  error: '__builtin_vsx_scalar_extract_sig' requires the '-mcpu=power9' option 
and either the '-m64' or '-mpowerpc64' option
  note: builtin '__builtin_vec_scalar_extract_sig' requires builtin 
'__builtin_vsx_scalar_extract_sig'

Here it is clearer because there is no #define to muddy things up, and
again the new message is arguably better than the old.

For scalar-test-neg-{2,3,5}.c, we actually change the test case.  This is
because we deliberately removed some undocumented and pointless overloads,
where each overload mapped to a single builtin.  These were:
__builtin_vec_scalar_test_neg_sp
__builtin_vec_scalar_test_neg_dp
__builtin_vec_scalar_test_neg_qp
which are redundant with the "real" overload:
__builtin_vec_scalar_test_neg
The latter maps to three builtins of the appropriate type.

The revised test case uses the "real" overload instead, and otherwise the
changes to the error messages are the same as for all the other cases.

2021-11-17  Bill Schmidt  

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Adjust error
message.
* gcc.target/powerpc/bfp/scalar-extract-sig-2.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-8.c: Likewise.
* gcc.target/powerpc/bfp/scalar-test-neg-2.c: Likewise.
* gcc.target/powerpc/bfp/scalar-test-neg-3.c: Likewise.
* gcc.target/powerpc/bfp/scalar-test-neg-5.c: Likewise.
---
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-2.c | 2 +-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c | 2 +-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c  | 2 +-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c  | 2 +-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-8.c  | 2 +-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-neg-2.c| 2 +-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-neg-3.c| 2 +-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-neg-5.c| 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-2.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-2.c
index 922180675fc..53b67c95cf9 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-2.c
@@ -14,7 +14,7 @@ get_exponent (double *p)
 {
   double source = *p;
 
-  return scalar_extract_exp (source);  /* { dg-error 
"'__builtin_vec_scalar_extract_exp' is not supported in this compiler 
configuration" } */
+  return scalar_extract_exp (source);  /* { dg-error 
"'__builtin_vsx_scalar_extract_exp' requires the" } */
 }
 
 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
index e24d4bd23fe..39ee74c94dc 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
@@ -12,5 +12,5 @@ get_significand (double *p)
 {
   double source = *p;
 
-  return __builtin_vec_scalar_extract_sig (source); /* { dg-error 
"'__builtin_vec_scalar_extract_sig' is not supported in this compiler 
configuration" } */
+  return __builtin_vec_scalar_extract_sig (source); /* { dg-error 
"'__builtin_vsx_scalar_extract_sig' requires the" } */
 }
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
index feb943104da..efd69725905 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
@@ -16,5 +16,5 @@ 

Re: [PATCH v2] x86: Remove "%!" before ret

2021-11-17 Thread Uros Bizjak via Gcc-patches
On Wed, Nov 17, 2021 at 9:33 PM H.J. Lu  wrote:
>
> On Wed, Nov 17, 2021 at 11:46 AM Uros Bizjak  wrote:
> >
> > On Wed, Nov 17, 2021 at 8:44 PM H.J. Lu  wrote:
> > >
> > > Before MPX was removed, "%!" was mapped to
> > >
> > > case '!':
> > >   if (ix86_bnd_prefixed_insn_p (current_output_insn))
> > > fputs ("bnd ", file);
> > >   return;
> > >
> > > After CET was added and MPX was removed, "%!" was mapped to
> > >
> > >case '!':
> > >   if (ix86_notrack_prefixed_insn_p (current_output_insn))
> > > fputs ("notrack ", file);
> > >   return;
> > >
> > > ix86_notrack_prefixed_insn_p always returns false on ret since the
> > > notrack prefix is only for indirect branches.  Remove the unused "%!"
> > > before ret.
> > >
> > > PR target/103307
> > > * config/i386/i386.c (ix86_code_end): Remove "%!" before ret.
> > > (ix86_output_function_return): Likewise.
> > > * config/i386/i386.md (simple_return_pop_internal): Likewise.
> > > ---
> > >  gcc/config/i386/i386.c  | 4 ++--
> > >  gcc/config/i386/i386.md | 2 +-
> > >  2 files changed, 3 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index 73c4d5115bb..95d238e9efc 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -6116,7 +6116,7 @@ ix86_code_end (void)
> > >xops[0] = gen_rtx_REG (Pmode, regno);
> > >xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
> > >output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
> > > -  output_asm_insn ("%!ret", NULL);
> > > +  output_asm_insn ("ret", NULL);
> >
> > This can use fputs.
>
> Fixed.   Here is the v2 patch.

OK.

Thanks,
Uros.


[PATCH v2] x86: Remove "%!" before ret

2021-11-17 Thread H.J. Lu via Gcc-patches
On Wed, Nov 17, 2021 at 11:46 AM Uros Bizjak  wrote:
>
> On Wed, Nov 17, 2021 at 8:44 PM H.J. Lu  wrote:
> >
> > Before MPX was removed, "%!" was mapped to
> >
> > case '!':
> >   if (ix86_bnd_prefixed_insn_p (current_output_insn))
> > fputs ("bnd ", file);
> >   return;
> >
> > After CET was added and MPX was removed, "%!" was mapped to
> >
> >case '!':
> >   if (ix86_notrack_prefixed_insn_p (current_output_insn))
> > fputs ("notrack ", file);
> >   return;
> >
> > ix86_notrack_prefixed_insn_p always returns false on ret since the
> > notrack prefix is only for indirect branches.  Remove the unused "%!"
> > before ret.
> >
> > PR target/103307
> > * config/i386/i386.c (ix86_code_end): Remove "%!" before ret.
> > (ix86_output_function_return): Likewise.
> > * config/i386/i386.md (simple_return_pop_internal): Likewise.
> > ---
> >  gcc/config/i386/i386.c  | 4 ++--
> >  gcc/config/i386/i386.md | 2 +-
> >  2 files changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 73c4d5115bb..95d238e9efc 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -6116,7 +6116,7 @@ ix86_code_end (void)
> >xops[0] = gen_rtx_REG (Pmode, regno);
> >xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
> >output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
> > -  output_asm_insn ("%!ret", NULL);
> > +  output_asm_insn ("ret", NULL);
>
> This can use fputs.

Fixed.   Here is the v2 patch.

> Uros.
>
> >final_end_function ();
> >init_insn_lengths ();
> >free_after_compilation (cfun);
> > @@ -16278,7 +16278,7 @@ ix86_output_function_return (bool long_p)
> >  }
> >
> >if (!long_p)
> > -return "%!ret";
> > +return "ret";
> >
> >return "rep%; ret";
> >  }
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index 73d15de88b2..7b2de60706d 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -14705,7 +14705,7 @@ (define_insn_and_split "simple_return_pop_internal"
> >[(simple_return)
> > (use (match_operand:SI 0 "const_int_operand"))]
> >"reload_completed"
> > -  "%!ret\t%0"
> > +  "ret\t%0"
> >"&& cfun->machine->function_return_type != indirect_branch_keep"
> >[(const_int 0)]
> >"ix86_split_simple_return_pop_internal (operands[0]); DONE;"
> > --
> > 2.33.1
> >



-- 
H.J.
From 594391d282f0066cb046dd06062e3efad8c74a08 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 17 Nov 2021 11:41:12 -0800
Subject: [PATCH v2] x86: Remove "%!" before ret

Before MPX was removed, "%!" was mapped to

case '!':
  if (ix86_bnd_prefixed_insn_p (current_output_insn))
fputs ("bnd ", file);
  return;

After CET was added and MPX was removed, "%!" was mapped to

   case '!':
  if (ix86_notrack_prefixed_insn_p (current_output_insn))
fputs ("notrack ", file);
  return;

ix86_notrack_prefixed_insn_p always returns false on ret since the
notrack prefix is only for indirect branches.  Remove the unused "%!"
before ret.

	PR target/103307
	* config/i386/i386.c (ix86_code_end): Remove "%!" before ret.
	(ix86_output_function_return): Likewise.
	* config/i386/i386.md (simple_return_pop_internal): Likewise.
---
 gcc/config/i386/i386.c  | 4 ++--
 gcc/config/i386/i386.md | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c9129ae25e4..a5bfb9efca9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6115,7 +6115,7 @@ ix86_code_end (void)
   xops[0] = gen_rtx_REG (Pmode, regno);
   xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
   output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
-  output_asm_insn ("%!ret", NULL);
+  fputs ("\tret\n", asm_out_file);
   final_end_function ();
   init_insn_lengths ();
   free_after_compilation (cfun);
@@ -16273,7 +16273,7 @@ ix86_output_function_return (bool long_p)
 }
 
   if (!long_p)
-return "%!ret";
+return "ret";
 
   return "rep%; ret";
 }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 73d15de88b2..7b2de60706d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14705,7 +14705,7 @@ (define_insn_and_split "simple_return_pop_internal"
   [(simple_return)
(use (match_operand:SI 0 "const_int_operand"))]
   "reload_completed"
-  "%!ret\t%0"
+  "ret\t%0"
   "&& cfun->machine->function_return_type != indirect_branch_keep"
   [(const_int 0)]
   "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
-- 
2.33.1



Re: [PATCH] Fortran: Mark internal symbols as artificial [PR88009,PR68800]

2021-11-17 Thread Harald Anlauf via Gcc-patches

Do you have testcases/reproducers demonstrating that the patch actually
fixes the issues you're describing?

Am 17.11.21 um 09:12 schrieb Bernhard Reutner-Fischer via Gcc-patches:

On Tue, 16 Nov 2021 21:46:32 +0100
Harald Anlauf via Fortran  wrote:


Hi Bernhard,

I'm trying to understand your patch.  What does it really try to solve?


Compiler generated symbols should be marked artificial.
The fix for PR88009 ( f8add009ce300f24b75e9c2e2cc5dd944a020c28 ,
r9-5194 ) added artificial just to the _final component and left out all the 
rest.
Note that the majority of compiler generated symbols in class.c
already had artificial set properly.
The proposed patch amends the other generated symbols to be marked
artificial, too.

The other parts fix memory leaks.



PR88009 is closed and seems to have nothing to do with this.


Well it marked only _final as artificial and forgot to adjust the
others as well.
We can remove the reference to PR88009 if you prefer?

thanks!


Harald

Am 14.11.21 um 23:17 schrieb Bernhard Reutner-Fischer via Fortran:

Hi!

Amend fix for PR88009 to mark all these class components as artificial.

gcc/fortran/ChangeLog:

  * class.c (gfc_build_class_symbol, generate_finalization_wrapper,
  (gfc_find_derived_vtab, find_intrinsic_vtab): Use stringpool for
  names. Mark internal symbols as artificial.
  * decl.c (gfc_match_decl_type_spec, gfc_match_end): Fix
  indentation.
  (gfc_match_derived_decl): Fix indentation. Check extension level
  before incrementing refs counter.
  * parse.c (parse_derived): Fix style.
  * resolve.c (resolve_global_procedure): Likewise.
  * symbol.c (gfc_check_conflict): Do not ignore artificial symbols.
  (gfc_add_flavor): Reorder condition, cheapest first.
  (gfc_new_symbol, gfc_get_sym_tree,
  generate_isocbinding_symbol): Fix style.
  * trans-expr.c (gfc_trans_subcomponent_assign): Remove
  restriction on !artificial.
  * match.c (gfc_match_equivalence): Special-case CLASS_DATA for
  warnings.

---
gfc_match_equivalence(), too, should not bail-out early on the first
error but should diagnose all errors. I.e. not goto cleanup but set
err=true and continue in order to diagnose all constraints of a
statement. Maybe Sandra or somebody else will eventually find time to
tweak that.

I think it also plugs a very minor leak of name in gfc_find_derived_vtab
so i also tagged it [PR68800]. At least that was the initial
motiviation to look at that spot.
We were doing
-  name = xasprintf ("__vtab_%s", tname);
...
gfc_set_sym_referenced (vtab);
- name = xasprintf ("__vtype_%s", tname);

Bootstrapped and regtested without regressions on x86_64-unknown-linux.
Ok for trunk?
   











Re: [PATCH] rs6000: Better error messages for power8/9-vector builtins

2021-11-17 Thread Segher Boessenkool
On Tue, Nov 16, 2021 at 11:12:35AM -0600, Bill Schmidt wrote:
> Hi!  During a previous patch review, Segher asked that I provide better
> messages when builtins are unavailable because they require both a minimum
> CPU and the enablement of VSX instructions.  This patch does just that.
> 
> Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.
> Is this okay for trunk?

It is.  Thank you!


Segher


Re: [PATCH] rs6000: Better error messages for power8/9-vector builtins

2021-11-17 Thread Paul A. Clarke via Gcc-patches
On Wed, Nov 17, 2021 at 02:00:02PM -0600, Segher Boessenkool wrote:
> On Wed, Nov 17, 2021 at 11:45:02AM -0600, Paul A. Clarke wrote:
> > I guess I'm being pedantic.  "requires -mcpu=power8 and -mvsx" is not
> > accurate from a user's point a view, as "-mcpu=power8" is sufficient,
> > since "-mvsx" is enabled when "-mcpu=power8" is specified.
> 
> To be really pedantic, -mcpu=power8 isn't required either: anythng that
> enable the subset of ISA 2.07 that is needed is enough already.  But we
> don't want to encourage users to use those interfaces.
> 
> > The real "requires" is "-mcpu=power8" and no "-mno-vsx".
> 
> And no -mno-altivec.  And and and.  There is a huge web.
> 
> > It's not a strong objection, since specifying "-mno-vsx" should be
> > uncommon.  (Right?)  And, specifying "-mcpu=power8 -mvsx" is harmless.
> 
> Maybe the warning could say "requires -mcpu=power8 (and -mvsx)"?  Is
> that clearer, to your eye?

Hrm. No, but let me withdraw my expression of concern. Both "power8" and
"vsx" are required, and those two options get that explicitly.
That "-mcpu=power8" also pulls in "-mvsx" is a subtlety that is
perhaps not terribly relevant.

Thanks for entertaining my concern, but we've spent too much time on it
already.  :-)

PC


Re: [PATCH v3] x86: Add -mharden-sls=[none|all|return|indirect-branch]

2021-11-17 Thread Uros Bizjak via Gcc-patches
On Wed, Nov 17, 2021 at 9:02 PM H.J. Lu  wrote:
>
> On Wed, Nov 17, 2021 at 7:53 AM Uros Bizjak  wrote:
> >
> > On Wed, Nov 17, 2021 at 4:35 PM H.J. Lu  wrote:
> > >
> > > Add -mharden-sls= to mitigate against straight line speculation (SLS)
> > > for function return and indirect branch by adding an INT3 instruction
> > > after function return and indirect branch.
> > >
> > > gcc/
> > >
> > > PR target/102952
> > > * config/i386/i386-opts.h (harden_sls): New enum.
> > > * config/i386/i386.c (output_indirect_thunk): Mitigate against
> > > SLS for function return.
> > > (ix86_output_function_return): Likewise.
> > > (ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
> > > branch.
> > > (ix86_output_indirect_jmp): Likewise.
> > > (ix86_output_call_insn): Likewise.
> > > * config/i386/i386.opt: Add -mharden-sls=.
> > > * doc/invoke.texi: Document -mharden-sls=.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/102952
> > > * gcc.target/i386/harden-sls-1.c: New test.
> > > * gcc.target/i386/harden-sls-2.c: Likewise.
> > > * gcc.target/i386/harden-sls-3.c: Likewise.
> > > * gcc.target/i386/harden-sls-4.c: Likewise.
> > > * gcc.target/i386/harden-sls-5.c: Likewise.

OK, with a small nit below.

Thanks,
Uros.

+mharden-sls=
+Target RejectNegative Joined Enum(harden_sls) Var(ix86_harden_sls)
Init(harden_sls_none)
+Generate code to mitigate against straight line speculation.
+
+Enum
+Name(harden_sls) Type(enum harden_sls)
+Known choices for mitigation against straight line speculation with
-mharden-sls=:
+
+EnumValue
+Enum(harden_sls) String(none) Value(harden_sls_none)
+
+EnumValue
+Enum(harden_sls) String(all) Value(harden_sls_all)

Please move the above enum to the last enum.

+
+EnumValue
+Enum(harden_sls) String(return) Value(harden_sls_return)
+
+EnumValue
+Enum(harden_sls) String(indirect-branch) Value(harden_sls_indirect_branch)
+


Re: [PATCH] rs6000: Better error messages for power8/9-vector builtins

2021-11-17 Thread David Edelsohn via Gcc-patches
On Wed, Nov 17, 2021 at 3:02 PM Segher Boessenkool
 wrote:
>
> > It's not a strong objection, since specifying "-mno-vsx" should be
> > uncommon.  (Right?)  And, specifying "-mcpu=power8 -mvsx" is harmless.
>
> Maybe the warning could say "requires -mcpu=power8 (and -mvsx)"?  Is
> that clearer, to your eye?

Maybe "requires -mcpu=power8 with VSX" or "requires -mcpu=power8 with
VSX enabled"?

Thanks, David


[PATCH v3] x86: Add -mharden-sls=[none|all|return|indirect-branch]

2021-11-17 Thread H.J. Lu via Gcc-patches
On Wed, Nov 17, 2021 at 7:53 AM Uros Bizjak  wrote:
>
> On Wed, Nov 17, 2021 at 4:35 PM H.J. Lu  wrote:
> >
> > Add -mharden-sls= to mitigate against straight line speculation (SLS)
> > for function return and indirect branch by adding an INT3 instruction
> > after function return and indirect branch.
> >
> > gcc/
> >
> > PR target/102952
> > * config/i386/i386-opts.h (harden_sls): New enum.
> > * config/i386/i386.c (output_indirect_thunk): Mitigate against
> > SLS for function return.
> > (ix86_output_function_return): Likewise.
> > (ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
> > branch.
> > (ix86_output_indirect_jmp): Likewise.
> > (ix86_output_call_insn): Likewise.
> > * config/i386/i386.opt: Add -mharden-sls=.
> > * doc/invoke.texi: Document -mharden-sls=.
> >
> > gcc/testsuite/
> >
> > PR target/102952
> > * gcc.target/i386/harden-sls-1.c: New test.
> > * gcc.target/i386/harden-sls-2.c: Likewise.
> > * gcc.target/i386/harden-sls-3.c: Likewise.
> > * gcc.target/i386/harden-sls-4.c: Likewise.
> > * gcc.target/i386/harden-sls-5.c: Likewise.
> > ---
> >  gcc/config/i386/i386-opts.h  |  7 ++
> >  gcc/config/i386/i386.c   | 23 ++--
> >  gcc/config/i386/i386.opt | 20 +
> >  gcc/doc/invoke.texi  | 10 -
> >  gcc/testsuite/gcc.target/i386/harden-sls-1.c | 14 
> >  gcc/testsuite/gcc.target/i386/harden-sls-2.c | 14 
> >  gcc/testsuite/gcc.target/i386/harden-sls-3.c | 14 
> >  gcc/testsuite/gcc.target/i386/harden-sls-4.c | 16 ++
> >  gcc/testsuite/gcc.target/i386/harden-sls-5.c | 17 +++
> >  9 files changed, 127 insertions(+), 8 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-3.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-4.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-5.c
> >
> > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> > index 04e4ad608fb..171d3106d0a 100644
> > --- a/gcc/config/i386/i386-opts.h
> > +++ b/gcc/config/i386/i386-opts.h
> > @@ -121,4 +121,11 @@ enum instrument_return {
> >instrument_return_nop5
> >  };
> >
> > +enum harden_sls {
> > +  harden_sls_none = 0,
> > +  harden_sls_return = 1 << 0,
> > +  harden_sls_indirect_branch = 1 << 1,
> > +  harden_sls_all = harden_sls_return | harden_sls_indirect_branch
> > +};
> > +
> >  #endif
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 73c4d5115bb..8bbf6ae9875 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -5914,6 +5914,8 @@ output_indirect_thunk (unsigned int regno)
> >  }
> >
> >fputs ("\tret\n", asm_out_file);
> > +  if ((ix86_harden_sls & harden_sls_return))
> > +fputs ("\tint3\n", asm_out_file);
> >  }
> >
> >  /* Output a funtion with a call and return thunk for indirect branch.
> > @@ -15984,6 +15986,8 @@ ix86_output_jmp_thunk_or_indirect (const char 
> > *thunk_name, const int regno)
> >fprintf (asm_out_file, "\tjmp\t");
> >assemble_name (asm_out_file, thunk_name);
> >putc ('\n', asm_out_file);
> > +  if ((ix86_harden_sls & harden_sls_indirect_branch))
> > +   fputs ("\tint3\n", asm_out_file);
> >  }
> >else
> >  output_indirect_thunk (regno);
> > @@ -16206,10 +16210,10 @@ ix86_output_indirect_jmp (rtx call_op)
> > gcc_unreachable ();
> >
> >ix86_output_indirect_branch (call_op, "%0", true);
> > -  return "";
> >  }
> >else
> > -return "%!jmp\t%A0";
> > +output_asm_insn ("%!jmp\t%A0", _op);
> > +  return (ix86_harden_sls & harden_sls_indirect_branch) ? "int3" : "";
> >  }
> >
> >  /* Output return instrumentation for current function if needed.  */
> > @@ -16277,10 +16281,10 @@ ix86_output_function_return (bool long_p)
> >return "";
> >  }
> >
> > -  if (!long_p)
> > -return "%!ret";
> > -
> > -  return "rep%; ret";
> > +  if ((ix86_harden_sls & harden_sls_return))
> > +long_p = false;
>
> Is the above really needed? This will change "rep ret" to a "[notrack]
> ret" when SLS hardening is in effect, with a conditional [notrack]
> prefix, even when long ret was requested.

Fixed in the v3 patch.

> On a related note, "notrack ret" does not assemble for me, the
> assembler reports:
>
> notrack.s:1: Error: expecting indirect branch instruction after `notrack'
>
> Can you please clarify the above change?

I opened:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103307

Here is the v3 patch.


-- 
H.J.
From ed5e4a06b0488bff1fcdf218d93b54e0abf7ff3b Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 27 Oct 2021 

Re: [PATCH] rs6000: Better error messages for power8/9-vector builtins

2021-11-17 Thread Segher Boessenkool
On Wed, Nov 17, 2021 at 11:45:02AM -0600, Paul A. Clarke wrote:
> I guess I'm being pedantic.  "requires -mcpu=power8 and -mvsx" is not
> accurate from a user's point a view, as "-mcpu=power8" is sufficient,
> since "-mvsx" is enabled when "-mcpu=power8" is specified.

To be really pedantic, -mcpu=power8 isn't required either: anythng that
enable the subset of ISA 2.07 that is needed is enough already.  But we
don't want to encourage users to use those interfaces.

> The real "requires" is "-mcpu=power8" and no "-mno-vsx".

And no -mno-altivec.  And and and.  There is a huge web.

> It's not a strong objection, since specifying "-mno-vsx" should be
> uncommon.  (Right?)  And, specifying "-mcpu=power8 -mvsx" is harmless.

Maybe the warning could say "requires -mcpu=power8 (and -mvsx)"?  Is
that clearer, to your eye?


Segher


Re: [PATCH] c++: implicit dummy object in requires clause [PR103198]

2021-11-17 Thread Patrick Palka via Gcc-patches
On Wed, 17 Nov 2021, Jason Merrill wrote:

> On 11/11/21 20:25, Patrick Palka wrote:
> > In the testcase below satisfaction misbehaves for f and g ultimately
> > because find_template_parameters fails to notice that the constraint
> > 'val.x' depends on the template parameters of the class template.
> > In contrast, satisfaction works just fine for h.
> > 
> > The problem seems to come down to a difference in how any_template_parm_r
> > handles 'this' vs a dummy object: we walk TREE_TYPE of the former but
> > not the latter, and this causes us to miss the tparm dependencies in
> > f/g's constraints since in their case the implicit object parameter
> > through which we access 'val' is a dummy object.  (For h, since we know
> > it's a non-static member function when parsing its trailing constraints,
> > the implicit object parameter is 'this' instead of a dummy object.)
> > 
> > This patch fixes this inconsistency by making any_template_parm_r also
> > walk into the TREE_TYPE of a dummy object, as is already done for
> > 'this'.
> > 
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, also tested on
> > cmcstl2 and range-v3, does this look OK for trunk and 11?
> > 
> > PR c++/103198
> > 
> > gcc/cp/ChangeLog:
> > 
> > * pt.c (any_template_parm_r): Walk the TREE_TYPE of a dummy
> > object.
> 
> Should we handle CONVERT_EXPR with the various casts in cp_walk_subtrees?

This seems to work well too.  But I'm not sure about doing this since
IIUC cp_walk_subtrees is generally supposed to walk subtrees that are
explicitly written in the source code, but when a CONVERT_EXPR
corresponds to an implicit conversion then the target type doesn't
explicitly appear anywhere.

> 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp2a/concepts-this1.C: New test.
> > ---
> >   gcc/cp/pt.c |  5 
> >   gcc/testsuite/g++.dg/cpp2a/concepts-this1.C | 30 +
> >   2 files changed, 35 insertions(+)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-this1.C
> > 
> > diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
> > index 82bf7dc26f6..fa55857d783 100644
> > --- a/gcc/cp/pt.c
> > +++ b/gcc/cp/pt.c
> > @@ -10766,6 +10766,11 @@ any_template_parm_r (tree t, void *data)
> > WALK_SUBTREE (TREE_TYPE (t));
> > break;
> >   +case CONVERT_EXPR:
> > +  if (is_dummy_object (t))
> > +   WALK_SUBTREE (TREE_TYPE (t));
> > +  break;
> > +
> >   default:
> > break;
> >   }
> > diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-this1.C
> > b/gcc/testsuite/g++.dg/cpp2a/concepts-this1.C
> > new file mode 100644
> > index 000..d717028201a
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-this1.C
> > @@ -0,0 +1,30 @@
> > +// PR c++/103198
> > +// { dg-do compile { target c++20 } }
> > +
> > +template
> > +struct A {
> > +  T val;
> > +
> > +  template
> > +requires requires { val.x; }
> > +  void f(U);
> > +
> > +  static void g(int)
> > +requires requires { val.x; };
> > +
> > +  void h(int)
> > +requires requires { val.x; };
> > +};
> > +
> > +struct B { int x; };
> > +struct C { };
> > +
> > +int main() {
> > +  A().f(0);
> > +  A().g(0);
> > +  A().h(0);
> > +
> > +  A().f(0); // { dg-error "no match" }
> > +  A().g(0); // { dg-error "no match" }
> > +  A().h(0); // { dg-error "no match" }
> > +}
> > 
> 
> 



[PATCH] i386: Redefine indirect_thunks_used as HARD_REG_SET.

2021-11-17 Thread Uros Bizjak via Gcc-patches
Change indirect_thunks_used to HARD_REG_SET to avoid recalculations
of correct register numbers and allow usage of SET/TEST_HARD_REG_BIT
accessors.

2021-11-17  Uroš Bizjak  

gcc/ChangeLog:

* config/i386/i386.c (indirect_thunks_used): Redefine as HARD_REG_SET.
(ix86_code_end): Use TEST_HARD_REG_BIT on indirect_thunks_used.
(ix86_output_indirect_branch_via_reg): Use SET_HARD_REG_BIT
on indirect_thunks_used.
(ix86_output_indirect_function_return): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0c5439dc7a7..c9129ae25e4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5733,7 +5733,7 @@ static bool indirect_thunk_needed = false;
 
 /* Bit masks of integer registers, which contain branch target, used
by call thunk functions.  */
-static int indirect_thunks_used;
+static HARD_REG_SET indirect_thunks_used;
 
 /* True if return thunk function is needed.  */
 static bool indirect_return_needed = false;
@@ -6030,8 +6030,7 @@ ix86_code_end (void)
 
   for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
 {
-  unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
-  if ((indirect_thunks_used & (1 << i)))
+  if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
output_indirect_thunk_function (indirect_thunk_prefix_none,
regno, false);
 }
@@ -6041,7 +6040,7 @@ ix86_code_end (void)
   char name[32];
   tree decl;
 
-  if ((indirect_thunks_used & (1 << regno)))
+  if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
output_indirect_thunk_function (indirect_thunk_prefix_none,
regno, false);
 
@@ -16014,12 +16013,8 @@ ix86_output_indirect_branch_via_reg (rtx call_op, bool 
sibcall_p)
   != indirect_branch_thunk_inline)
 {
   if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
-   {
- int i = regno;
- if (i >= FIRST_REX_INT_REG)
-   i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
- indirect_thunks_used |= 1 << i;
-   }
+   SET_HARD_REG_BIT (indirect_thunks_used, regno);
+
   indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
   thunk_name = thunk_name_buf;
 }
@@ -16307,7 +16302,7 @@ ix86_output_indirect_function_return (rtx ret_op)
  if (need_thunk)
{
  indirect_return_via_cx = true;
- indirect_thunks_used |= 1 << CX_REG;
+ SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
}
  fprintf (asm_out_file, "\tjmp\t");
  assemble_name (asm_out_file, thunk_name);


Re: [PATCH] x86: Remove "%!" before ret

2021-11-17 Thread Uros Bizjak via Gcc-patches
On Wed, Nov 17, 2021 at 8:44 PM H.J. Lu  wrote:
>
> Before MPX was removed, "%!" was mapped to
>
> case '!':
>   if (ix86_bnd_prefixed_insn_p (current_output_insn))
> fputs ("bnd ", file);
>   return;
>
> After CET was added and MPX was removed, "%!" was mapped to
>
>case '!':
>   if (ix86_notrack_prefixed_insn_p (current_output_insn))
> fputs ("notrack ", file);
>   return;
>
> ix86_notrack_prefixed_insn_p always returns false on ret since the
> notrack prefix is only for indirect branches.  Remove the unused "%!"
> before ret.
>
> PR target/103307
> * config/i386/i386.c (ix86_code_end): Remove "%!" before ret.
> (ix86_output_function_return): Likewise.
> * config/i386/i386.md (simple_return_pop_internal): Likewise.
> ---
>  gcc/config/i386/i386.c  | 4 ++--
>  gcc/config/i386/i386.md | 2 +-
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 73c4d5115bb..95d238e9efc 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -6116,7 +6116,7 @@ ix86_code_end (void)
>xops[0] = gen_rtx_REG (Pmode, regno);
>xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
>output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
> -  output_asm_insn ("%!ret", NULL);
> +  output_asm_insn ("ret", NULL);

This can use fputs.

Uros.

>final_end_function ();
>init_insn_lengths ();
>free_after_compilation (cfun);
> @@ -16278,7 +16278,7 @@ ix86_output_function_return (bool long_p)
>  }
>
>if (!long_p)
> -return "%!ret";
> +return "ret";
>
>return "rep%; ret";
>  }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 73d15de88b2..7b2de60706d 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -14705,7 +14705,7 @@ (define_insn_and_split "simple_return_pop_internal"
>[(simple_return)
> (use (match_operand:SI 0 "const_int_operand"))]
>"reload_completed"
> -  "%!ret\t%0"
> +  "ret\t%0"
>"&& cfun->machine->function_return_type != indirect_branch_keep"
>[(const_int 0)]
>"ix86_split_simple_return_pop_internal (operands[0]); DONE;"
> --
> 2.33.1
>


[PATCH] x86: Remove "%!" before ret

2021-11-17 Thread H.J. Lu via Gcc-patches
Before MPX was removed, "%!" was mapped to

case '!':
  if (ix86_bnd_prefixed_insn_p (current_output_insn))
fputs ("bnd ", file);
  return;

After CET was added and MPX was removed, "%!" was mapped to

   case '!':
  if (ix86_notrack_prefixed_insn_p (current_output_insn))
fputs ("notrack ", file);
  return;

ix86_notrack_prefixed_insn_p always returns false on ret since the
notrack prefix is only for indirect branches.  Remove the unused "%!"
before ret.

PR target/103307
* config/i386/i386.c (ix86_code_end): Remove "%!" before ret.
(ix86_output_function_return): Likewise.
* config/i386/i386.md (simple_return_pop_internal): Likewise.
---
 gcc/config/i386/i386.c  | 4 ++--
 gcc/config/i386/i386.md | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 73c4d5115bb..95d238e9efc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6116,7 +6116,7 @@ ix86_code_end (void)
   xops[0] = gen_rtx_REG (Pmode, regno);
   xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
   output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
-  output_asm_insn ("%!ret", NULL);
+  output_asm_insn ("ret", NULL);
   final_end_function ();
   init_insn_lengths ();
   free_after_compilation (cfun);
@@ -16278,7 +16278,7 @@ ix86_output_function_return (bool long_p)
 }
 
   if (!long_p)
-return "%!ret";
+return "ret";
 
   return "rep%; ret";
 }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 73d15de88b2..7b2de60706d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14705,7 +14705,7 @@ (define_insn_and_split "simple_return_pop_internal"
   [(simple_return)
(use (match_operand:SI 0 "const_int_operand"))]
   "reload_completed"
-  "%!ret\t%0"
+  "ret\t%0"
   "&& cfun->machine->function_return_type != indirect_branch_keep"
   [(const_int 0)]
   "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
-- 
2.33.1



Re: [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1)

2021-11-17 Thread Philipp Tomsich
On Wed, 17 Nov 2021 at 20:40, Palmer Dabbelt  wrote:

> [This is my first time trying my Rivos address on the lists, so sorry if
> something goes off the rails.]
>
> On Wed, 17 Nov 2021 06:05:04 PST (-0800), gcc-patches@gcc.gnu.org wrote:
> > Hi Philipp:
> >
> > Thanks for the patch, I like this approach, that can easily configure
> > different capabilities for each core :)
> >
> > So there are only a few minor comments for this patch.
> >
> > On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
> >  wrote:
> >>
> >> From: Philipp Tomsich 
> >>
> >> The Ventana VT1 core supports quad-issue and instruction fusion.
> >> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
> >> together and adds idiom matcheing for the supported fusion cases.
>
> There's a typo at "matcheing".
>
> >>
> >> gcc/ChangeLog:
> >>
> >> * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
> >> constants to identify supported fusion patterns.
> >> (struct riscv_tune_param): Add fusible_op field.
> >> (riscv_macro_fusion_p): Implement.
> >> (riscv_fusion_enabled_p): Implement.
> >> (riscv_macro_fusion_pair_p): Implement and recoginze fusible
> >> idioms for Ventana VT1.
> >> (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
> >> (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to
> riscv_macro_fusion_pair_p.
> >>
> >> Signed-off-by: Philipp Tomsich 
>
> This doesn't match the From (though admittedly I'm pretty new to the SoB
> stuff in GCC, so I'm not sure if that's even a rule here).
>

I noticed that I hadn't reset the authors and that patman had inserted a
Signed-off-by: for that reason, right after I sent this out.
Given that it's all me and there's both individual assignment paperwork and
company disclaimers on file for all of the email-addresses, this should be
fine.

>> ---
> >>
> >>  gcc/config/riscv/riscv.c | 196 +++
> >>  1 file changed, 196 insertions(+)
> >>
> >> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> >> index 6b918db65e9..8eac52101a3 100644
> >> --- a/gcc/config/riscv/riscv.c
> >> +++ b/gcc/config/riscv/riscv.c
> >> @@ -211,6 +211,19 @@ struct riscv_integer_op {
> >> The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
> >>  #define RISCV_MAX_INTEGER_OPS 8
> >>
> >> +enum riscv_fusion_pairs
> >> +{
> >> +  RISCV_FUSE_NOTHING = 0,
> >> +  RISCV_FUSE_ZEXTW = (1 << 0),
> >> +  RISCV_FUSE_ZEXTH = (1 << 1),
> >> +  RISCV_FUSE_ZEXTWS = (1 << 2),
> >> +  RISCV_FUSE_LDINDEXED = (1 << 3),
> >
> > RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED
> >
> > Could you add some comment for above enums, like that:
> > /* slli rx, rx, 32 + srli rx, rx, 32 */
> > RISCV_FUSE_ZEXTW
> >
> > So that we could know what kind of instruction will be funded for this
> enum.
> >
> >> +  RISCV_FUSE_LUI_ADDI = (1 << 4),
> >> +  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
> >> +  RISCV_FUSE_LUI_LD = (1 << 6),
> >> +  RISCV_FUSE_AUIPC_LD = (1 << 7),
> >> +};
> >> +
> >>  /* Costs of various operations on the different architectures.  */
> >>
> >>  struct riscv_tune_param
> >> @@ -224,6 +237,7 @@ struct riscv_tune_param
> >>unsigned short branch_cost;
> >>unsigned short memory_cost;
> >>bool slow_unaligned_access;
> >> +  unsigned int fusible_ops;
> >>  };
> >>
> >>  /* Information about one micro-arch we know about.  */
> >> @@ -289,6 +303,7 @@ static const struct riscv_tune_param
> rocket_tune_info = {
> >>3,   /* branch_cost */
> >>5,   /* memory_cost */
> >>true,/*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,   /* fusible_ops */
> >>  };
>
> There's some tab/space issues here (and in the below ones).  They align
> when merged, but the new lines are spaces-only and the old ones have
> internal spaces mixed with tabs (IIRC that's to the GCC style, if not we
> should fix these to at least be consistent).
>
> >>
> >>  /* Costs to use when optimizing for Sifive 7 Series.  */
> >> @@ -302,6 +317,7 @@ static const struct riscv_tune_param
> sifive_7_tune_info = {
> >>4,   /* branch_cost */
> >>3,   /* memory_cost */
> >>true,/*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,   /* fusible_ops */
> >>  };
> >>
> >>  /* Costs to use when optimizing for T-HEAD c906.  */
> >> @@ -328,6 +344,7 @@ static const struct riscv_tune_param
> optimize_size_tune_info = {
> >>1,   /* branch_cost */
> >>2,   /* memory_cost */
> >>false,   /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,   /* 

Re: [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1)

2021-11-17 Thread Palmer Dabbelt
[This is my first time trying my Rivos address on the lists, so sorry if 
something goes off the rails.]


On Wed, 17 Nov 2021 06:05:04 PST (-0800), gcc-patches@gcc.gnu.org wrote:

Hi Philipp:

Thanks for the patch, I like this approach, that can easily configure
different capabilities for each core :)

So there are only a few minor comments for this patch.

On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
 wrote:


From: Philipp Tomsich 

The Ventana VT1 core supports quad-issue and instruction fusion.
This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
together and adds idiom matcheing for the supported fusion cases.


There's a typo at "matcheing".



gcc/ChangeLog:

* config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
constants to identify supported fusion patterns.
(struct riscv_tune_param): Add fusible_op field.
(riscv_macro_fusion_p): Implement.
(riscv_fusion_enabled_p): Implement.
(riscv_macro_fusion_pair_p): Implement and recoginze fusible
idioms for Ventana VT1.
(TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
(TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to riscv_macro_fusion_pair_p.

Signed-off-by: Philipp Tomsich 


This doesn't match the From (though admittedly I'm pretty new to the SoB 
stuff in GCC, so I'm not sure if that's even a rule here).



---

 gcc/config/riscv/riscv.c | 196 +++
 1 file changed, 196 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 6b918db65e9..8eac52101a3 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -211,6 +211,19 @@ struct riscv_integer_op {
The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
 #define RISCV_MAX_INTEGER_OPS 8

+enum riscv_fusion_pairs
+{
+  RISCV_FUSE_NOTHING = 0,
+  RISCV_FUSE_ZEXTW = (1 << 0),
+  RISCV_FUSE_ZEXTH = (1 << 1),
+  RISCV_FUSE_ZEXTWS = (1 << 2),
+  RISCV_FUSE_LDINDEXED = (1 << 3),


RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED

Could you add some comment for above enums, like that:
/* slli rx, rx, 32 + srli rx, rx, 32 */
RISCV_FUSE_ZEXTW

So that we could know what kind of instruction will be funded for this enum.


+  RISCV_FUSE_LUI_ADDI = (1 << 4),
+  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
+  RISCV_FUSE_LUI_LD = (1 << 6),
+  RISCV_FUSE_AUIPC_LD = (1 << 7),
+};
+
 /* Costs of various operations on the different architectures.  */

 struct riscv_tune_param
@@ -224,6 +237,7 @@ struct riscv_tune_param
   unsigned short branch_cost;
   unsigned short memory_cost;
   bool slow_unaligned_access;
+  unsigned int fusible_ops;
 };

 /* Information about one micro-arch we know about.  */
@@ -289,6 +303,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   3,   /* branch_cost */
   5,   /* memory_cost */
   true,/* 
slow_unaligned_access */
+  RISCV_FUSE_NOTHING,   /* fusible_ops */
 };


There's some tab/space issues here (and in the below ones).  They align 
when merged, but the new lines are spaces-only and the old ones have 
internal spaces mixed with tabs (IIRC that's to the GCC style, if not we 
should fix these to at least be consistent).




 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -302,6 +317,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   4,   /* branch_cost */
   3,   /* memory_cost */
   true,/* 
slow_unaligned_access */
+  RISCV_FUSE_NOTHING,   /* fusible_ops */
 };

 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -328,6 +344,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   1,   /* branch_cost */
   2,   /* memory_cost */
   false,   /* slow_unaligned_access */
+  RISCV_FUSE_NOTHING,   /* fusible_ops */
 };

 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -341,6 +358,10 @@ static const struct riscv_tune_param ventana_vt1_tune_info 
= {
   4,   /* branch_cost */
   5,   /* memory_cost */
   false,   /* slow_unaligned_access */
+  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |   /* fusible_ops */
+RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
+RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
+RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
 };

 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
   return tune_param->issue_rate;
 }

+/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if 

[PATCH] i386: Introduce LEGACY_SSE_REGNO_P predicate

2021-11-17 Thread Uros Bizjak via Gcc-patches
Introduce LEGACY_SSE_REGNO_P predicate to simplify a couple of places.

No functional changes.

2021-11-17  Uroš Bizjak  

gcc/ChangeLog:

* config/i386/i386.h (LEGACY_SSE_REGNO_P): New predicate.
(SSE_REGNO_P): Use LEGACY_SSE_REGNO_P predicate.
* config/i386/i386.c (zero_all_vector_registers):
Use LEGACY_SSE_REGNO_P predicate.
(ix86_register_priority): Use REX_INT_REGNO_P, REX_SSE_REGNO_P
and EXT_REG_SSE_REGNO_P predicates.
(ix86_hard_regno_call_part_clobbered): Use REX_SSE_REGNO_P
and LEGACY_SSE_REGNO_P predicates.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 73c4d5115bb..0c5439dc7a7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3665,7 +3665,7 @@ zero_all_vector_registers (HARD_REG_SET 
need_zeroed_hardregs)
 return NULL;
 
   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-if ((IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG)
+if ((LEGACY_SSE_REGNO_P (regno)
 || (TARGET_64BIT
 && (REX_SSE_REGNO_P (regno)
 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)
@@ -19089,15 +19089,13 @@ ix86_register_priority (int hard_regno)
 return 0;
   if (hard_regno == BP_REG)
 return 1;
-  /* New x86-64 int registers result in bigger code size.  Discourage
- them.  */
-  if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
+  /* New x86-64 int registers result in bigger code size.  Discourage them.  */
+  if (REX_INT_REGNO_P (hard_regno))
 return 2;
-  /* New x86-64 SSE registers result in bigger code size.  Discourage
- them.  */
-  if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
+  /* New x86-64 SSE registers result in bigger code size.  Discourage them.  */
+  if (REX_SSE_REGNO_P (hard_regno))
 return 2;
-  if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
+  if (EXT_REX_SSE_REGNO_P (hard_regno))
 return 1;
   /* Usage of AX register results in smaller code.  Prefer it.  */
   if (hard_regno == AX_REG)
@@ -19974,9 +19972,8 @@ ix86_hard_regno_call_part_clobbered (unsigned int 
abi_id, unsigned int regno,
   /* Special ABI for vzeroupper which only clobber higher part of sse regs.  */
   if (abi_id == ABI_VZEROUPPER)
   return (GET_MODE_SIZE (mode) > 16
- && ((TARGET_64BIT
-  && (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG)))
- || (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG;
+ && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
+ || LEGACY_SSE_REGNO_P (regno)));
 
   return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
 }
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e35c79c192c..2fda1e0686e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1409,10 +1409,13 @@ enum reg_class
 
 #define SSE_REG_P(X) (REG_P (X) && SSE_REGNO_P (REGNO (X)))
 #define SSE_REGNO_P(N) \
-  (IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG) \
+  (LEGACY_SSE_REGNO_P (N)  \
|| REX_SSE_REGNO_P (N)  \
|| EXT_REX_SSE_REGNO_P (N))
 
+#define LEGACY_SSE_REGNO_P(N) \
+  IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG)
+
 #define REX_SSE_REGNO_P(N) \
   IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG)
 


Re: [PATCH] restore ancient -Waddress for weak symbols [PR33925]

2021-11-17 Thread Martin Sebor via Gcc-patches

On 11/17/21 11:31 AM, Jason Merrill wrote:

On 11/16/21 20:11, Martin Sebor wrote:

On 11/16/21 1:23 PM, Jason Merrill wrote:

On 10/23/21 19:06, Martin Sebor wrote:

On 10/4/21 3:37 PM, Jason Merrill wrote:

On 10/4/21 14:42, Martin Sebor wrote:

While resolving the recent -Waddress enhancement request (PR
PR102103) I came across a 2007 problem report about GCC 4 having
stopped warning for using the address of inline functions in
equality comparisons with null.  With inline functions being
commonplace in C++ this seems like an important use case for
the warning.

The change that resulted in suppressing the warning in these
cases was introduced inadvertently in a fix for PR 22252.

To restore the warning, the attached patch enhances
the decl_with_nonnull_addr_p() function to return true also for
weak symbols for which a definition has been provided.


I think you probably want to merge this function with 
fold-const.c:maybe_nonzero_address, which already handles more cases.


maybe_nonzero_address() doesn't behave quite like
decl_with_nonnull_addr_p() expects and I'm reluctant to muck
around with the former too much since it's used for codegen,
while the latter just for warnings.  (There is even a case
where the functions don't behave the same, and would result
in different warnings between C and C++ without some extra
help.)

So in the attached revision I just have maybe_nonzero_address()
call decl_with_nonnull_addr_p() and then refine the failing
(or uncertain) cases separately, with some overlap between
them.

Since I worked on this someone complained that some instances
of the warning newly enhanced under PR102103 aren't suppresed
in code resulting from macro expansion.  Since it's trivial,
I include the fix for that report in this patch as well.



+   allocated stroage might have a null address.  */


typo.

OK with that fixed.


After retesting the patch before committing I noticed it triggers
a regression in weak/weak-3.c that I missed the first time around.
Here's the test case:

extern void * ffoo1f (void);
void * foo1f (void)
{
   if (ffoo1f) /* { dg-warning "-Waddress" } */
 ffoo1f ();
   return 0;
}

void * ffoox1f (void) { return (void *)0; }
extern void * ffoo1f (void)  __attribute__((weak, alias ("ffoox1f")));

The unexpected error is:

a.c: At top level:
a.c:1:15: error: ‘ffoo1f’ declared weak after being used
 1 | extern void * ffoo1f (void);
   |   ^~

The error is caused by the new call to maybe_nonzero_address()
made from decl_with_nonnull_addr_p().  The call registers
the symbol as used.

So unless the error is desirable for this case I think it's
best to go back to the originally proposed solution.  I attach
it for reference and will plan to commit it tomorrow unless I
hear otherwise.


Hmm, the error seems correct to me: we tested whether the address is 
nonzero in the dg-warning line, and presumably evaluating that test 
could depend on the absence of weak.


Sorry, I don't know enough yet to judge this.

Since the error is unrelated to what I'm fixing I would prefer
not to introduce it in the same patch.  I'm happy to open
a separate bug for the missing error for the test case above,
look some more into why it isn't issued, and if it's decided
the error is intended either add the call back to trigger it
or do whatever else may be more appropriate).

Are you okay with me going ahead and committing the most recent
patch as is?

If not, do you want me to commit the previous version and change
the weak-3.c test to expect the error?

Martin




PS I don't know enough about the logic behind issuing this error
in other situations to tell for sure that it's wrong in this one
but I see no difference in the emitted code for a case in the same
test that declares the alias first, before taking its address and
that's accepted and this one.  I also checked that both Clang and
ICC accept the code either way, so I'm inclined to think the error
would be a bug.






Re: [PATCH v2] rs6000: Test case adjustments for new builtins

2021-11-17 Thread Segher Boessenkool
On Wed, Nov 17, 2021 at 07:52:38AM -0600, Bill Schmidt wrote:
> >>  - For int_128bit-runnable.c, I chose not to do gimple folding on the 
> >> 128-bit
> >>comparison operations in the new implementation, because doing so 
> >> results in
> >>bad code that splits things into two 64-bit values.  That needs separate
> >>attention; but the point here is, when I did that, I started generating
> >>more of the vcmpequq, vcmpgtsq, and vcmpgtuq instructions.
> > And you now get worse code (albeit in some cases no longer invalid)?
> 
> No, sorry that this wasn't more clear.  The "old" builtins code performs
> gimple folding on 128-bit compares.  This results in correct but very
> inefficient code.  The "new" builtins code has removed the gimple folding
> for 128-bit compares.  This results in directly generating vcmpequq and
> friends, which is the efficient code we're looking for.  This test case
> then needs modification to show we're doing better.  I'll submit this
> separately.

Hrm.  Folding should always be a good thing to do; and folding should
never split an operation on a 128-bit datum into two operations on
64-bit things.  That kind of optimisation cannot be sanely done on
Gimple level: the abstractions are not close enough to the hardware for
that, and the instruction stream is not close at all to what the
eventual machine insns will be.  We have an RTL pass that does this
("subreg"), it runs almost immediately after expand (and two more
times, even again after the split pass).

So there is a generic bug that you counteract with a target bug :-(

> >> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-2.c
> >> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-2.c
> >> @@ -14,7 +14,7 @@ get_exponent (double *p)
> >>  {
> >>double source = *p;
> >>  
> >> -  return scalar_extract_exp (source); /* { dg-error 
> >> "'__builtin_vec_scalar_extract_exp' is not supported in this compiler 
> >> configuration" } */
> >> +  return scalar_extract_exp (source); /* { dg-error 
> >> "'__builtin_vsx_scalar_extract_exp' requires the" } */
> >>  }
> > The testcase uses __builtin_vec_scalar_extract_exp, so this is not okay.
> 
> Sorry, this is a case of my bad eyesight not identifying this had changed.
> As with the test case (cmpb-3.c) in the 32-bit patch, this error message
> isn't all that the user sees.  There is also a "note" diagnostic that ties
> the generic overload name to the specific underlying builtin name so that
> confusion is avoided.  I'll just submit these separately with a full
> explanation.

Can't you go just two inches further and report the actual builtin used
by the user (which even is documented!), and not cause any confusion?

> > It is not okay to blindly adjust the testcases to accept what the new
> > code does.  This is a regression.  It is okay to have it regressed for a
> > while.  It is also okay to xfail things, if there is no expectation it
> > can be fixed before the next release (or some other suitably big time
> > frame, this isn't an exact science).
> 
> This isn't really a regression, as I'll describe with each patch.

Looking forward to it :-)

> >> --- a/gcc/testsuite/gcc.target/powerpc/byte-in-set-2.c
> >> +++ b/gcc/testsuite/gcc.target/powerpc/byte-in-set-2.c
> >> @@ -10,5 +10,5 @@
> >>  int
> >>  test_byte_in_set (unsigned char b, unsigned long long set_members)
> >>  {
> >> -  return __builtin_byte_in_set (b, set_members); /* { dg-warning 
> >> "implicit declaration of function" } */
> >> +  return __builtin_byte_in_set (b, set_members); /* { dg-error 
> >> "'__builtin_scalar_byte_in_set' requires the" } */
> >>  }
> > Huh.  How can the old warning ever have fired?  Was the builtin not
> > declared on 32-bit before?  Ouch.
> 
> I'll remind myself what changed here, but yes, that's what it looks like --
> an inadvertent problem with the old logic for 32-bit.

In general it is better to always have all builtins (and other
interfaces) declared internally, so that you can give much better error
messages (and so that you get errors if there are conflicts, etc.)

There can be exceptions, but this is not a case like that :-)  (So your
change is great :-) )

> >> --- a/gcc/testsuite/gcc.target/powerpc/pr80315-2.c
> >> +++ b/gcc/testsuite/gcc.target/powerpc/pr80315-2.c
> >> @@ -10,6 +10,6 @@ main ()
> >>int mask;
> >>  
> >>/* Argument 2 must be 0 or 1.  Argument 3 must be in range 0..15.  */
> >> -  res = __builtin_crypto_vshasigmad (test, 1, 0xff); /* { dg-error 
> >> {argument 3 must be in the range \[0, 15\]} } */
> >> +  res = __builtin_crypto_vshasigmad (test, 1, 0xff); /* { dg-error 
> >> {argument 3 must be a 4-bit unsigned literal} } */
> >>return 0;
> >>  }
> > Hrm, make this say "must be a literal between 0 and 15, inclusive" like
> > the other errors?
> 
> The "n-bit unsigned literal" is the usual case.  I'll provide more explanation
> in the separate patch.

We should use the same formulation always.  I like the 

Re: [PATCH] c++: implicit dummy object in requires clause [PR103198]

2021-11-17 Thread Jason Merrill via Gcc-patches

On 11/11/21 20:25, Patrick Palka wrote:

In the testcase below satisfaction misbehaves for f and g ultimately
because find_template_parameters fails to notice that the constraint
'val.x' depends on the template parameters of the class template.
In contrast, satisfaction works just fine for h.

The problem seems to come down to a difference in how any_template_parm_r
handles 'this' vs a dummy object: we walk TREE_TYPE of the former but
not the latter, and this causes us to miss the tparm dependencies in
f/g's constraints since in their case the implicit object parameter
through which we access 'val' is a dummy object.  (For h, since we know
it's a non-static member function when parsing its trailing constraints,
the implicit object parameter is 'this' instead of a dummy object.)

This patch fixes this inconsistency by making any_template_parm_r also
walk into the TREE_TYPE of a dummy object, as is already done for
'this'.

Bootstrapped and regtested on x86_64-pc-linux-gnu, also tested on
cmcstl2 and range-v3, does this look OK for trunk and 11?

PR c++/103198

gcc/cp/ChangeLog:

* pt.c (any_template_parm_r): Walk the TREE_TYPE of a dummy
object.


Should we handle CONVERT_EXPR with the various casts in cp_walk_subtrees?


gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-this1.C: New test.
---
  gcc/cp/pt.c |  5 
  gcc/testsuite/g++.dg/cpp2a/concepts-this1.C | 30 +
  2 files changed, 35 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-this1.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 82bf7dc26f6..fa55857d783 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -10766,6 +10766,11 @@ any_template_parm_r (tree t, void *data)
WALK_SUBTREE (TREE_TYPE (t));
break;
  
+case CONVERT_EXPR:

+  if (is_dummy_object (t))
+   WALK_SUBTREE (TREE_TYPE (t));
+  break;
+
  default:
break;
  }
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-this1.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-this1.C
new file mode 100644
index 000..d717028201a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-this1.C
@@ -0,0 +1,30 @@
+// PR c++/103198
+// { dg-do compile { target c++20 } }
+
+template
+struct A {
+  T val;
+
+  template
+requires requires { val.x; }
+  void f(U);
+
+  static void g(int)
+requires requires { val.x; };
+
+  void h(int)
+requires requires { val.x; };
+};
+
+struct B { int x; };
+struct C { };
+
+int main() {
+  A().f(0);
+  A().g(0);
+  A().h(0);
+
+  A().f(0); // { dg-error "no match" }
+  A().g(0); // { dg-error "no match" }
+  A().h(0); // { dg-error "no match" }
+}





Re: [PATCH v3] c-family: Add __builtin_assoc_barrier

2021-11-17 Thread Jason Merrill via Gcc-patches

On 11/11/21 03:49, Matthias Kretz wrote:

On Wednesday, 8 September 2021 15:49:27 CET Matthias Kretz wrote:

On Wednesday, 8 September 2021 15:44:28 CEST Jason Merrill wrote:

On 9/8/21 5:37 AM, Matthias Kretz wrote:

On Tuesday, 7 September 2021 19:36:22 CEST Jason Merrill wrote:

case PAREN_EXPR:
-  RETURN (finish_parenthesized_expr (RECUR (TREE_OPERAND (t,
0;
+  if (REF_PARENTHESIZED_P (t))
+   RETURN (finish_parenthesized_expr (RECUR (TREE_OPERAND (t,
0;
+  else
+   RETURN (RECUR (TREE_OPERAND (t, 0)));


I think you need to build a new PAREN_EXPR in the assoc barrier case as
well, for it to have any effect in templates.


My intent was to ignore __builtin_assoc_barrier in templates / constexpr
evaluation since it's not affected by -fassociative-math anyway. Or do
you
mean something else?


I agree about constexpr, but why wouldn't template instantiations be
affected by -fassociative-math like any other function?


Oh, that seems like a major misunderstanding on my part. I assumed
tsubst_copy_and_build would evaluate the expressions in template arguments
臘. I'll expand the test and will fix.


Sorry for the long delay. New patch is attached. OK for trunk?


OK.


New builtin to enable explicit use of PAREN_EXPR in C & C++ code.

Signed-off-by: Matthias Kretz 

gcc/testsuite/ChangeLog:

 * c-c++-common/builtin-assoc-barrier-1.c: New test.

gcc/cp/ChangeLog:

 * constexpr.c (cxx_eval_constant_expression): Handle PAREN_EXPR
 via cxx_eval_constant_expression.
 * cp-objcp-common.c (names_builtin_p): Handle
 RID_BUILTIN_ASSOC_BARRIER.
 * cp-tree.h: Adjust TREE_LANG_FLAG documentation to include
 PAREN_EXPR in REF_PARENTHESIZED_P.
 (REF_PARENTHESIZED_P): Add PAREN_EXPR.
 * parser.c (cp_parser_postfix_expression): Handle
 RID_BUILTIN_ASSOC_BARRIER.
 * pt.c (tsubst_copy_and_build): If the PAREN_EXPR is not a
 parenthesized initializer, build a new PAREN_EXPR.
 * semantics.c (force_paren_expr): Simplify conditionals. Set
 REF_PARENTHESIZED_P on PAREN_EXPR.
 (maybe_undo_parenthesized_ref): Test PAREN_EXPR for
 REF_PARENTHESIZED_P.

gcc/c-family/ChangeLog:

 * c-common.c (c_common_reswords): Add __builtin_assoc_barrier.
 * c-common.h (enum rid): Add RID_BUILTIN_ASSOC_BARRIER.

gcc/c/ChangeLog:

 * c-decl.c (names_builtin_p): Handle RID_BUILTIN_ASSOC_BARRIER.
 * c-parser.c (c_parser_postfix_expression): Likewise.

gcc/ChangeLog:

 * doc/extend.texi: Document __builtin_assoc_barrier.
---
  gcc/c-family/c-common.c   |  1 +
  gcc/c-family/c-common.h   |  2 +-
  gcc/c/c-decl.c|  1 +
  gcc/c/c-parser.c  | 20 ++
  gcc/cp/constexpr.c|  8 +++
  gcc/cp/cp-objcp-common.c  |  1 +
  gcc/cp/cp-tree.h  | 12 ++--
  gcc/cp/parser.c   | 14 
  gcc/cp/pt.c   | 10 ++-
  gcc/cp/semantics.c| 23 ++
  gcc/doc/extend.texi   | 18 +
  .../c-c++-common/builtin-assoc-barrier-1.c| 71 +++
  12 files changed, 158 insertions(+), 23 deletions(-)
  create mode 100644 gcc/testsuite/c-c++-common/builtin-assoc-barrier-1.c






Re: [PATCH] restore ancient -Waddress for weak symbols [PR33925]

2021-11-17 Thread Jason Merrill via Gcc-patches

On 11/16/21 20:11, Martin Sebor wrote:

On 11/16/21 1:23 PM, Jason Merrill wrote:

On 10/23/21 19:06, Martin Sebor wrote:

On 10/4/21 3:37 PM, Jason Merrill wrote:

On 10/4/21 14:42, Martin Sebor wrote:

While resolving the recent -Waddress enhancement request (PR
PR102103) I came across a 2007 problem report about GCC 4 having
stopped warning for using the address of inline functions in
equality comparisons with null.  With inline functions being
commonplace in C++ this seems like an important use case for
the warning.

The change that resulted in suppressing the warning in these
cases was introduced inadvertently in a fix for PR 22252.

To restore the warning, the attached patch enhances
the decl_with_nonnull_addr_p() function to return true also for
weak symbols for which a definition has been provided.


I think you probably want to merge this function with 
fold-const.c:maybe_nonzero_address, which already handles more cases.


maybe_nonzero_address() doesn't behave quite like
decl_with_nonnull_addr_p() expects and I'm reluctant to muck
around with the former too much since it's used for codegen,
while the latter just for warnings.  (There is even a case
where the functions don't behave the same, and would result
in different warnings between C and C++ without some extra
help.)

So in the attached revision I just have maybe_nonzero_address()
call decl_with_nonnull_addr_p() and then refine the failing
(or uncertain) cases separately, with some overlap between
them.

Since I worked on this someone complained that some instances
of the warning newly enhanced under PR102103 aren't suppresed
in code resulting from macro expansion.  Since it's trivial,
I include the fix for that report in this patch as well.



+   allocated stroage might have a null address.  */


typo.

OK with that fixed.


After retesting the patch before committing I noticed it triggers
a regression in weak/weak-3.c that I missed the first time around.
Here's the test case:

extern void * ffoo1f (void);
void * foo1f (void)
{
   if (ffoo1f) /* { dg-warning "-Waddress" } */
     ffoo1f ();
   return 0;
}

void * ffoox1f (void) { return (void *)0; }
extern void * ffoo1f (void)  __attribute__((weak, alias ("ffoox1f")));

The unexpected error is:

a.c: At top level:
a.c:1:15: error: ‘ffoo1f’ declared weak after being used
     1 | extern void * ffoo1f (void);
   |   ^~

The error is caused by the new call to maybe_nonzero_address()
made from decl_with_nonnull_addr_p().  The call registers
the symbol as used.

So unless the error is desirable for this case I think it's
best to go back to the originally proposed solution.  I attach
it for reference and will plan to commit it tomorrow unless I
hear otherwise.


Hmm, the error seems correct to me: we tested whether the address is 
nonzero in the dg-warning line, and presumably evaluating that test 
could depend on the absence of weak.



PS I don't know enough about the logic behind issuing this error
in other situations to tell for sure that it's wrong in this one
but I see no difference in the emitted code for a case in the same
test that declares the alias first, before taking its address and
that's accepted and this one.  I also checked that both Clang and
ICC accept the code either way, so I'm inclined to think the error
would be a bug.




Re: [RFC] c++: Print function template parms when relevant (was: [PATCH v4] c++: Add gnu::diagnose_as attribute)

2021-11-17 Thread Jason Merrill via Gcc-patches

On 11/17/21 04:04, Matthias Kretz wrote:

On Wednesday, 17 November 2021 07:09:18 CET Jason Merrill wrote:

-  if (CHECKING_P)
-SET_NON_DEFAULT_TEMPLATE_ARGS_COUNT (a, TREE_VEC_LENGTH (a));
+  SET_NON_DEFAULT_TEMPLATE_ARGS_COUNT (a, nondefault);


should have been

if (CHECKING_P || nondefault != TREE_VEC_LENGTH (a))
SET_NON_DEFAULT_TEMPLATE_ARGS_COUNT (a, nondefault);


TBH, I don't understand the purpose of CHECKING_P here, or rather it makes me
nervous because AFAIU I'm only testing with CHECKING_P enabled. Why make
behavior dependent on CHECKING_P? I expected CHECKING_P to basically only add
more assertions.


The idea when NON_DEFAULT_TEMPLATE_ARGS_COUNT was added years back was 
to leave the TREE_CHAIN null when !CHECKING_P and treat that as 
equivalent to TREE_VEC_LENGTH (args).  But perhaps you're right that 
it's not a savings worth the complexity.



(copy_template_args): Jason?


Only copy the non-default template args count on TREE_VECs that should
have it.


Why not simply set the count on all args? Is it a performance concern? The
INTEGER_CST the TREE_CHAIN has to point to exists anyway, so it's not wasting
any memory, right?


In this case the TREE_VEC we're excluding is the one wrapping multiple 
levels of template args; it doesn't contain args directly, so setting 
NON_DEFAULT_ARGS_COUNT on it doesn't make sense.



+  /* Pretty print only template instantiations. Don't pretty print
explicit
+ specializations like 'template <> void fun (int)'.


This seems like a significant change of behavior unrelated to printing
default template arguments.  What's the rationale for handling
specializations differently from instantiations?


Right, this is about "The general idea of this change is to print template
parms wherever they would appear in the source code as well".

Initially, the change to print function template arguments/parameters only if
the args were explicitly specified lead to printing 'void fun (T) [with T =
...]' or 'template <> void fun (int)'. Both are not telling the full story,
even if the former is how the function would be called.


and the latter is how I expect the specialization to be declared, not 
with the deducible template argument made explicit.



But if the reader
should quickly recognize what code is getting called, it is helpful to see
right away that a function template specialization is called. (It might also
reveal an implementation detail of a library, so it's not 100% obvious how to
choose here.) Also, saying 'T = int' is kind of wrong. Yes, 'int' was deduced.
But there's no T in fun:

template  void fun (T);
template <> void fun (int);


There's a T in the template, and as you said above, that's how it's 
called (and mangled).



__FUNCTION__ was 'fun' all the time, but __PRETTY_FUNCTION__ was 'void
fun(T) [with T = int]'.


Isn't that true for instantiations, as well?


It's more consistent that __PRETTY_FUNCTION__ contains __FUNCTION__, IMHO


I suppose, but I don't see that as a strong enough motivation to mix 
this up.



so it would have to be at least 'void fun(T) [with T
= int]'. But that's strange: How it uses T and int for the same type. So I
settled on 'void fun(int)'.


I also don't understand the purpose of TFF_AS_PRIMARY.


dump_function_decl generalizes the TEMPLATE_DECL (if flag_pretty_templates is
true) and, before this change, passes the generalized TEMPLATE_DECL to
dump_type (... DECL_CONTEXT (t) ...) and dump_function_name (... t ...).
That's why the whole template is printed as primary template (i.e. with
template parms instead of template args, as is needed for
flag_pretty_templates). But this drops the count of non-default template args.


Ah, you're trying to omit defaulted parms from the ?  I'm not sure 
that's necessary, leaving them out of the [with ...] list should be 
sufficient.



To retain the count, dump_type and dump_function_name need to be called with
the original TEMPLATE_DECL. But if I do this, pretty-templates is broken.
'template  struct A { template  void f(T, U); };' would
print as 'A::f(T, U) [with U = float, T = int]'. To get back to
'A::f(T, U) [with U = float, T = int]' I needed to tell
dump_template_parms that even though the template args are there, it should
print only the template parms. The most obvious way to do that was to carry it
through via flags.

Note that this creates another problem. Given

template  struct Outer {
   template  struct A;
   template  struct A {
 void f();
   };
};

we want to print e.g. 'void Outer::A::f() [with X = int, T0 =
int]', but certainly not 'void Outer::A::f() [with X = int, T0 =
int]'. However, specialized_t holds A which is printed as A
with TFF_AS_PRIMARY. Only most_general_template of the function's
TEMPLATE_DECL can give us A as DECL_CONTEXT.

I have a solution in the diagnose_as patch, where I had to solve a similar
problem because for the diagnose_as attribute (dump_template_scope).


+/* Print function template parameters if:
+   1. t is template, 

Re: [PATCH] rs6000: Better error messages for power8/9-vector builtins

2021-11-17 Thread Paul A. Clarke via Gcc-patches
On Wed, Nov 17, 2021 at 11:00:07AM -0600, Bill Schmidt via Gcc-patches wrote:
> On 11/17/21 10:54 AM, Paul A. Clarke wrote:
> > On Tue, Nov 16, 2021 at 11:12:35AM -0600, Bill Schmidt via Gcc-patches 
> > wrote:
> >> Hi!  During a previous patch review, Segher asked that I provide better
> >> messages when builtins are unavailable because they require both a minimum
> >> CPU and the enablement of VSX instructions.  This patch does just that.
> > ...
> >> gcc/
> >>* config/rs6000/rs6000-call.c (rs6000_invalid_new_builtin): Change
> >>error messages for ENB_P8V and ENB_P9V.
> >> ---
> >>  gcc/config/rs6000/rs6000-call.c | 6 --
> >>  1 file changed, 4 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/gcc/config/rs6000/rs6000-call.c 
> >> b/gcc/config/rs6000/rs6000-call.c
> >> index 85fec80c6d7..035266eb001 100644
> >> --- a/gcc/config/rs6000/rs6000-call.c
> >> +++ b/gcc/config/rs6000/rs6000-call.c
> >> @@ -11943,7 +11943,8 @@ rs6000_invalid_new_builtin (enum 
> >> rs6000_gen_builtins fncode)
> >>error ("%qs requires the %qs option", name, "-mcpu=power8");
> >>break;
> >>  case ENB_P8V:
> >> -  error ("%qs requires the %qs option", name, "-mpower8-vector");
> >> +  error ("%qs requires the %qs and %qs options", name, "-mcpu=power8",
> >> +   "-mvsx");
> > "-mcpu=power8" itself enables "-mvsx", doesn't it?
> 
> Of course, but it can be disabled with -mno-vsx.  Then you get this error.
> You won't get it unless you deliberately did something strange with the
> compile options.
> 
> >
> >>break;
> >>  case ENB_P9:
> >>error ("%qs requires the %qs option", name, "-mcpu=power9");
> >> @@ -11953,7 +11954,8 @@ rs6000_invalid_new_builtin (enum 
> >> rs6000_gen_builtins fncode)
> >> name, "-mcpu=power9", "-m64", "-mpowerpc64");
> >>break;
> >>  case ENB_P9V:
> >> -  error ("%qs requires the %qs option", name, "-mpower9-vector");
> >> +  error ("%qs requires the %qs and %qs options", name, "-mcpu=power9",
> >> +   "-mvsx");
> > Similarly, "-mcpu=power9" itself enables "-mvsx", doesn't it?
> >
> > Are you trying to also say "don't use -mno-vsx"?  If so, maybe s/and/with/
> > would be slightly less confusing? This is going to be awkward unless it can
> > be more precise, like two messages depending on actual context:
> > - with "-mcpu=power8 -mno-vsx:  "...requires -mvsx".
> > - without "-mcpu=power8":  "...requires -mcpu=power8".
> 
> This seems like a YMMV situation...I don't see the confusion myself.

I guess I'm being pedantic.  "requires -mcpu=power8 and -mvsx" is not
accurate from a user's point a view, as "-mcpu=power8" is sufficient,
since "-mvsx" is enabled when "-mcpu=power8" is specified.

The real "requires" is "-mcpu=power8" and no "-mno-vsx".

(I'm just picturing myself fumbling around in a Makefile written by
somebody else. ;-)

It's not a strong objection, since specifying "-mno-vsx" should be
uncommon.  (Right?)  And, specifying "-mcpu=power8 -mvsx" is harmless.

PC


[committed] libstdc++: Use std::construct_at in net::ip::address

2021-11-17 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


Using placement-new isn't valid in constant expressions, so this
replaces it with std::construct_at (via the std::_Construct function
that is usable before C++20).

libstdc++-v3/ChangeLog:

* include/experimental/internet (address): Use std::_Construct
to initialize union members.
---
 libstdc++-v3/include/experimental/internet | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/experimental/internet 
b/libstdc++-v3/include/experimental/internet
index 95b8cdc9963..5e2ef00c16f 100644
--- a/libstdc++-v3/include/experimental/internet
+++ b/libstdc++-v3/include/experimental/internet
@@ -466,9 +466,9 @@ namespace ip
 address(const address& __a) noexcept : _M_uninit(), _M_is_v4(__a._M_is_v4)
 {
   if (_M_is_v4)
-   ::new (std::addressof(_M_v4)) address_v4(__a.to_v4());
+   std::_Construct(std::addressof(_M_v4), __a.to_v4());
   else
-   ::new (std::addressof(_M_v6)) address_v6(__a.to_v6());
+   std::_Construct(std::addressof(_M_v6), __a.to_v6());
 }
 
 constexpr
@@ -491,7 +491,7 @@ namespace ip
 address&
 operator=(const address_v4& __a) noexcept
 {
-  ::new (std::addressof(_M_v4)) address_v4(__a);
+  std::_Construct(std::addressof(_M_v4), __a);
   _M_is_v4 = true;
   return *this;
 }
@@ -499,7 +499,7 @@ namespace ip
 address&
 operator=(const address_v6& __a) noexcept
 {
-  ::new (std::addressof(_M_v6)) address_v6(__a);
+  std::_Construct(std::addressof(_M_v6), __a);
   _M_is_v4 = false;
   return *this;
 }
-- 
2.31.1



[committed] libstdc++: Simplify std::string constructors

2021-11-17 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


Several std::basic_string constructors dispatch to one of the
two-argument overloads of _M_construct, which then dispatches again to
_M_construct_aux to detect whether the arguments are iterators or not.
That then dispatches to one of _M_construct(size_type, char_type) or
_M_construct(Iter, Iter, iterator_traits::iterator_category{}).

For most of those constructors this is a waste of time, because we know
the arguments are already iterators. For basic_string(const CharT*) and
basic_string(initializer_list) we know that we call _M_construct with
two pointers, and for basic_string(const basic_string&) we call it with
two const_iterators.  Those constructors can call the three-argument
overload of _M_construct with the iterator category tag right away,
without the intermediate dispatching.

The case where this doesn't apply is basic_string(InputIter, InputIter),
but for C++11 and later this is constrained so we know it's an iterator
here as well. We can restrict the dispatching in this constructor to
only be done for C++98 and to call _M_construct_aux directly, which
allows us to remove the two-argument _M_construct(InputIter, InputIter)
overload entirely.

N.B. When calling the three-arg _M_construct with pointers or string
iterators, we pass forward_iterator_tag not random_access_iterator_tag.
This is because it makes no difference which overload gets called, and
simplifies overload resolution to not have to do a base-to-derived
check. If we ever add a new overload of M_construct for random access
iterators we would have to revisit this, but that seems unlikely.

This patch also moves the __is_null_pointer checks from the three-arg
_M_construct into the constructors where a null pointer argument is
actually possible. This avoids redundant checks where we know we have a
non-null pointer, or don't have a pointer at all.

Finally, this patch replaces some try-blocks with an RAII type, so that
memory is deallocated during unwinding. This avoids the overhead of
catching and rethrowing an exception.

libstdc++-v3/ChangeLog:

* include/bits/basic_string.h (_M_construct_aux): Only define
for C++98. Remove constexpr.
(_M_construct_aux_2): Likewise.
(_M_construct(InputIter, InputIter)): Remove.
(basic_string(const basic_string&)): Call _M_construct with
iterator category argument.
(basic_string(const basic_string&, size_type, const Alloc&)):
Likewise.
(basic_string(const basic_string&, size_type, size_type)):
Likewise.
(basic_string(const charT*, size_type, const Alloc&)): Likewise.
Check for null pointer.
(basic_string(const charT*, const Alloc&)): Likewise.
(basic_string(initializer_list, const Alloc&)): Call
_M_construct with iterator category argument.
(basic_string(const basic_string&, const Alloc&)): Likewise.
(basic_string(basic_string&&, const Alloc&)): Likewise.
(basic_string(_InputIter, _InputIter, const Alloc&)): Likewise
for C++11 and later, call _M_construct_aux for C++98.
* include/bits/basic_string.tcc
(_M_construct(I, I, input_iterator_tag)): Replace try-block with
RAII type.
(_M_construct(I, I, forward_iterator_tag)): Likewise. Remove
__is_null_pointer check.
---
 libstdc++-v3/include/bits/basic_string.h   | 61 +++
 libstdc++-v3/include/bits/basic_string.tcc | 69 --
 2 files changed, 74 insertions(+), 56 deletions(-)

diff --git a/libstdc++-v3/include/bits/basic_string.h 
b/libstdc++-v3/include/bits/basic_string.h
index 9d281f5daf2..d29c9cdc410 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -262,10 +262,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   _M_destroy(size_type __size) throw()
   { _Alloc_traits::deallocate(_M_get_allocator(), _M_data(), __size + 1); }
 
+#if __cplusplus < 201103L || defined _GLIBCXX_DEFINING_STRING_INSTANTIATIONS
   // _M_construct_aux is used to implement the 21.3.1 para 15 which
   // requires special behaviour if _InIterator is an integral type
   template
-   _GLIBCXX20_CONSTEXPR
 void
 _M_construct_aux(_InIterator __beg, _InIterator __end,
 std::__false_type)
@@ -277,24 +277,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   // _GLIBCXX_RESOLVE_LIB_DEFECTS
   // 438. Ambiguity in the "do the right thing" clause
   template
-   _GLIBCXX20_CONSTEXPR
 void
 _M_construct_aux(_Integer __beg, _Integer __end, std::__true_type)
{ _M_construct_aux_2(static_cast(__beg), __end); }
 
-  _GLIBCXX20_CONSTEXPR
   void
   _M_construct_aux_2(size_type __req, _CharT __c)
   { _M_construct(__req, __c); }
-
-  template
-   _GLIBCXX20_CONSTEXPR
-void
-_M_construct(_InIterator __beg, _InIterator __end)
-   {
- typedef typename 

[committed] libstdc++: Set active member of union in std::string [PR103295]

2021-11-17 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


Clang diagnoses that the new constexpr std::string constructors are not
usable in constant expressions, because they start to write to members
of the union without setting an active member.

This adds a new helper function which returns the address of the local
buffer after making it the active member.

This doesn't fix all problems with Clang, because it still refuses to
write to memory returned by the allocator.

libstdc++-v3/ChangeLog:

PR libstdc++/103295
* include/bits/basic_string.h (_M_use_local_data()): New
member function to make local buffer the active member.
(assign(const basic_string&)): Use it.
* include/bits/basic_string.tcc (_M_construct, reserve()):
Likewise.
---
 libstdc++-v3/include/bits/basic_string.h   | 15 ++-
 libstdc++-v3/include/bits/basic_string.tcc | 10 --
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/bits/basic_string.h 
b/libstdc++-v3/include/bits/basic_string.h
index 0b7d6c0a981..9d281f5daf2 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -325,6 +325,19 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   _M_get_allocator() const
   { return _M_dataplus; }
 
+  // Ensure that _M_local_buf is the active member of the union.
+  __attribute__((__always_inline__))
+  _GLIBCXX14_CONSTEXPR
+  pointer
+  _M_use_local_data() _GLIBCXX_NOEXCEPT
+  {
+#if __cpp_lib_is_constant_evaluated
+   if (__builtin_is_constant_evaluated())
+ _M_local_buf[0] = _CharT();
+#endif
+   return _M_local_data();
+  }
+
 private:
 
 #ifdef _GLIBCXX_DISAMBIGUATE_REPLACE_INST
@@ -1487,7 +1500,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
if (__str.size() <= _S_local_capacity)
  {
_M_destroy(_M_allocated_capacity);
-   _M_data(_M_local_data());
+   _M_data(_M_use_local_data());
_M_set_length(0);
  }
else
diff --git a/libstdc++-v3/include/bits/basic_string.tcc 
b/libstdc++-v3/include/bits/basic_string.tcc
index 5743770b42a..5a51f7e21b5 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -170,9 +170,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
size_type __len = 0;
size_type __capacity = size_type(_S_local_capacity);
 
+   pointer __p = _M_use_local_data();
+
while (__beg != __end && __len < __capacity)
  {
-   _M_data()[__len++] = *__beg;
+   __p[__len++] = *__beg;
++__beg;
  }
 
@@ -223,6 +225,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_data(_M_create(__dnew, size_type(0)));
_M_capacity(__dnew);
  }
+   else
+ _M_use_local_data();
 
// Check for out_of_range and length_error exceptions.
__try
@@ -247,6 +251,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _M_data(_M_create(__n, size_type(0)));
  _M_capacity(__n);
}
+  else
+   _M_use_local_data();
 
   if (__n)
this->_S_assign(_M_data(), __n, __c);
@@ -355,7 +361,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   if (__length <= size_type(_S_local_capacity))
{
- this->_S_copy(_M_local_data(), _M_data(), __length + 1);
+ this->_S_copy(_M_use_local_data(), _M_data(), __length + 1);
  _M_destroy(__capacity);
  _M_data(_M_local_data());
}
-- 
2.31.1



[committed] libstdc++: Fix std::type_info::before for ARM [PR103240]

2021-11-17 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, and briefly checkd on armv7hl-linux-gnueabi,
pushed to trunk.


The r179236 fix for std::type_info::operator== should also have been
applied to std::type_info::before. Otherwise two distinct types can
compare equivalent due to using a string comparison, when they should do
a pointer comparison.

libstdc++-v3/ChangeLog:

PR libstdc++/103240
* libsupc++/tinfo2.cc (type_info::before): Use unadjusted name
to check for the '*' prefix.
* testsuite/util/testsuite_shared.cc: Add type_info object for
use in new test.
* testsuite/18_support/type_info/103240.cc: New test.
---
 libstdc++-v3/libsupc++/tinfo2.cc  |  5 ++-
 .../testsuite/18_support/type_info/103240.cc  | 36 +++
 .../testsuite/util/testsuite_shared.cc| 12 +++
 3 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 libstdc++-v3/testsuite/18_support/type_info/103240.cc

diff --git a/libstdc++-v3/libsupc++/tinfo2.cc b/libstdc++-v3/libsupc++/tinfo2.cc
index b587cfd037b..d02021fe538 100644
--- a/libstdc++-v3/libsupc++/tinfo2.cc
+++ b/libstdc++-v3/libsupc++/tinfo2.cc
@@ -36,7 +36,10 @@ type_info::before (const type_info ) const 
_GLIBCXX_NOEXCEPT
 #if __GXX_MERGED_TYPEINFO_NAMES
   return name () < arg.name ();
 #else
-  return (name ()[0] == '*') ? name () < arg.name ()
+  /* The name() method will strip any leading '*' prefix. Therefore
+ take care to look at __name rather than name() when looking for
+ the "pointer" prefix.  */
+  return (__name[0] == '*') ? name () < arg.name ()
 :  __builtin_strcmp (name (), arg.name ()) < 0;
 #endif
 }
diff --git a/libstdc++-v3/testsuite/18_support/type_info/103240.cc 
b/libstdc++-v3/testsuite/18_support/type_info/103240.cc
new file mode 100644
index 000..3d5968ac25c
--- /dev/null
+++ b/libstdc++-v3/testsuite/18_support/type_info/103240.cc
@@ -0,0 +1,36 @@
+// { dg-do run }
+// { dg-require-sharedlib "" }
+// { dg-options "./testsuite_shared.so" }
+
+#include 
+#include 
+
+namespace __gnu_test
+{
+namespace
+{
+  struct S { };
+  struct T { };
+}
+
+// Defined in testsuite_shared.so, referring to private type in that library
+// with the same mangled name as __gnu_testS defined here.
+extern const std::type_info& pr103240_private_S;
+}
+
+const std::type_info& private_S = __gnu_test::pr103240_private_S;
+const std::type_info& local_S = typeid(__gnu_test::S);
+const std::type_info& local_T = typeid(__gnu_test::T);
+
+int main()
+{
+  VERIFY( local_S == local_S );
+  VERIFY( ! local_S.before(local_S) );
+
+  VERIFY( local_S != local_T );
+  VERIFY( local_S.before(local_T) || local_T.before(local_S) );
+
+  VERIFY( local_S != private_S );
+  // PR libstdc++/103240
+  VERIFY( local_S.before(private_S) || private_S.before(local_S) );
+}
diff --git a/libstdc++-v3/testsuite/util/testsuite_shared.cc 
b/libstdc++-v3/testsuite/util/testsuite_shared.cc
index c4a7ed4abe5..8c10534c511 100644
--- a/libstdc++-v3/testsuite/util/testsuite_shared.cc
+++ b/libstdc++-v3/testsuite/util/testsuite_shared.cc
@@ -23,6 +23,9 @@
 #include 
 #include 
 #include 
+#if __cpp_rtti
+# include 
+#endif
 
 namespace __gnu_test
 {
@@ -130,4 +133,13 @@ try_function_random_fail()
   }
 #endif
 
+#if __cpp_rtti
+// PR libstdc++/103240
+namespace
+{
+  struct S { };
+}
+const std::type_info& pr103240_private_S = typeid(S);
+#endif
+
 } // end namepace __gnu_test
-- 
2.31.1



[PATCH] DWARF: Match behaviour of .cfi_xxx when doing manual frame output.

2021-11-17 Thread Iain Sandoe via Gcc-patches
At present, for several reasons, it is not possible to switch
Darwin to use .cfi instructions for frame output.

When GCC uses .cfi_ instructions, the behaviour w.r.t frame
sections (for a target with unwind frames by defaults):

(no options ) .eh_frame
(-g ) .eh_frame
(-g -fno-unwind-tables -fno-asynchronous-unwind-tables) .debug_frame
(   -fno-unwind-tables -fno-asynchronous-unwind-tables) ---

However, for a target which outputs the FDEs "manually" (using
output_call_frame_info()) we have:

(no options ) __eh_frame
(-g ) __eh_frame *and* __debug_frame
(-g -fno-unwind-tables -fno-asynchronous-unwind-tables) __debug_frame
(   -fno-unwind-tables -fno-asynchronous-unwind-tables) ---

The first two cases are, of course, the most common and the extra
frame table is (a) a waste of space and (b) actually triggers a bug
when used with the LLVM assembler [with assertions enabled] for
Mach-O when we have hot/cold partitioning on, since that emits
Letext{.cold}0 labels *after* the __DWARF,__debug_frame and the
assembler is set up reject switches to non-debug sections after the
first __DWARF debug one has been seen.

The following patch makes the manual output of frame data follow the
same pattern as the .cfi instructions.

(a) From testing on Darwin which uses the 'manual frame output' I see
around 200Mb saving on gcc/ for master (5%).
(b) Since Darwin defaults to unwind frames for all languages, we see
only eh_frame sections before the "real debug" is emitted, so that
the LLVM constraint is avoided.

On testing on x86_64 and powerpc64le Linux, I see only a single test
that would need amendment (it counts the number of references to the
start/end local labels).

Since the majority of targets are using .cfi instructions, it is hard
to get wider testing.

It would be possible, of course, to wrap the change in a target hook
but it's not clear that we need to.

Is there some case that I've missed?
or - OK for master (the testcase amendments are not attached here)
but are simple.

thanks,
Iain

Signed-off-by: Iain Sandoe 

gcc/ChangeLog:

* dwarf2out.c (output_call_frame_info): Output the FDEs when
either EH or debug support is needed.
(dwarf2out_frame_finish): When either EH or debug support is
needed, call output_call_frame_info().
---
 gcc/dwarf2out.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index e1d6a79ecd7..96307d6747a 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -283,7 +283,7 @@ static GTY(()) dw_die_ref decltype_auto_die;
 
 /* Forward declarations for functions defined in this file.  */
 
-static void output_call_frame_info (int);
+static void output_call_frame_info (bool, bool);
 
 /* Personality decl of current unit.  Used only when assembler does not support
personality CFI.  */
@@ -750,7 +750,7 @@ fde_needed_for_eh_p (dw_fde_ref fde)
location of saved registers.  */
 
 static void
-output_call_frame_info (int for_eh)
+output_call_frame_info (bool for_eh, bool for_debug)
 {
   unsigned int i;
   dw_fde_ref fde;
@@ -795,7 +795,7 @@ output_call_frame_info (int for_eh)
targetm.asm_out.emit_unwind_label (asm_out_file, fde->decl, 1, 1);
}
 
-  if (!any_eh_needed)
+  if (!any_eh_needed && !for_debug)
return;
 }
 
@@ -1271,12 +1271,9 @@ void
 dwarf2out_frame_finish (void)
 {
   /* Output call frame information.  */
-  if (targetm.debug_unwind_info () == UI_DWARF2)
-output_call_frame_info (0);
-
-  /* Output another copy for the unwinder.  */
-  if (do_eh_frame)
-output_call_frame_info (1);
+  if (targetm.debug_unwind_info () == UI_DWARF2 || do_eh_frame)
+output_call_frame_info (do_eh_frame,
+   targetm.debug_unwind_info () == UI_DWARF2);
 }
 
 static void var_location_switch_text_section (void);
-- 
2.24.3 (Apple Git-128)



Re: [PATCH v5 1/1] [ARM] Add support for TLS register based stack protector canary access

2021-11-17 Thread Ard Biesheuvel via Gcc-patches
(+ Ramana)

On Mon, 15 Nov 2021 at 19:04, Ard Biesheuvel  wrote:
>
> Add support for accessing the stack canary value via the TLS register,
> so that multiple threads running in the same address space can use
> distinct canary values. This is intended for the Linux kernel running in
> SMP mode, where processes entering the kernel are essentially threads
> running the same program concurrently: using a global variable for the
> canary in that context is problematic because it can never be rotated,
> and so the OS is forced to use the same value as long as it remains up.
>
> Using the TLS register to index the stack canary helps with this, as it
> allows each CPU to context switch the TLS register along with the rest
> of the process, permitting each process to use its own value for the
> stack canary.
>
> 2021-11-15 Ard Biesheuvel 
>
> * config/arm/arm-opts.h (enum stack_protector_guard): New
> * config/arm/arm-protos.h (arm_stack_protect_tls_canary_mem):
> New
> * config/arm/arm.c (TARGET_STACK_PROTECT_GUARD): Define
> (arm_option_override_internal): Handle and put in error checks
> for stack protector guard options.
> (arm_option_reconfigure_globals): Likewise
> (arm_stack_protect_tls_canary_mem): New
> (arm_stack_protect_guard): New
> * config/arm/arm.md (stack_protect_set): New
> (stack_protect_set_tls): Likewise
> (stack_protect_test): Likewise
> (stack_protect_test_tls): Likewise
> (reload_tp_hard): Likewise
> * config/arm/arm.opt (-mstack-protector-guard): New
> (-mstack-protector-guard-offset): New.
> * doc/invoke.texi: Document new options
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/arm/stack-protector-7.c: New test.
> * gcc.target/arm/stack-protector-8.c: New test.
>
> Signed-off-by: Ard Biesheuvel 
> ---
>  gcc/config/arm/arm-opts.h|  6 ++
>  gcc/config/arm/arm-protos.h  |  2 +
>  gcc/config/arm/arm.c | 55 +++
>  gcc/config/arm/arm.md| 71 +++-
>  gcc/config/arm/arm.opt   | 22 ++
>  gcc/doc/invoke.texi  | 11 +++
>  gcc/testsuite/gcc.target/arm/stack-protector-7.c | 10 +++
>  gcc/testsuite/gcc.target/arm/stack-protector-8.c |  5 ++
>  8 files changed, 180 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h
> index 5c4b62f404f7..581ba3c4fbbb 100644
> --- a/gcc/config/arm/arm-opts.h
> +++ b/gcc/config/arm/arm-opts.h
> @@ -69,4 +69,10 @@ enum arm_tls_type {
>TLS_GNU,
>TLS_GNU2
>  };
> +
> +/* Where to get the canary for the stack protector.  */
> +enum stack_protector_guard {
> +  SSP_TLSREG,  /* per-thread canary in TLS register */
> +  SSP_GLOBAL   /* global canary */
> +};
>  #endif
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index 9b1f61394ad7..d8d605920c97 100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -195,6 +195,8 @@ extern void arm_split_atomic_op (enum rtx_code, rtx, rtx, 
> rtx, rtx, rtx, rtx);
>  extern rtx arm_load_tp (rtx);
>  extern bool arm_coproc_builtin_available (enum unspecv);
>  extern bool arm_coproc_ldc_stc_legitimate_address (rtx);
> +extern rtx arm_stack_protect_tls_canary_mem (bool);
> +
>
>  #if defined TREE_CODE
>  extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index a5b403eb3e49..e5077348ce07 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -829,6 +829,9 @@ static const struct attribute_spec arm_attribute_table[] =
>
>  #undef TARGET_MD_ASM_ADJUST
>  #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
> +
> +#undef TARGET_STACK_PROTECT_GUARD
> +#define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
>
>  /* Obstack for minipool constant handling.  */
>  static struct obstack minipool_obstack;
> @@ -3176,6 +3179,26 @@ arm_option_override_internal (struct gcc_options *opts,
>if (TARGET_THUMB2_P (opts->x_target_flags))
>  opts->x_inline_asm_unified = true;
>
> +  if (arm_stack_protector_guard == SSP_GLOBAL
> +  && opts->x_arm_stack_protector_guard_offset_str)
> +{
> +  error ("incompatible options %'-mstack-protector-guard=global%' and"
> +"%'-mstack-protector-guard-offset=%qs%'",
> +arm_stack_protector_guard_offset_str);
> +}
> +
> +  if (opts->x_arm_stack_protector_guard_offset_str)
> +{
> +  char *end;
> +  const char *str = arm_stack_protector_guard_offset_str;
> +  errno = 0;
> +  long offs = strtol (arm_stack_protector_guard_offset_str, , 0);
> +  if (!*str || *end || errno)
> +   error ("%qs is not a valid offset in %qs", str,
> +  "-mstack-protector-guard-offset=");
> +  

Re: [PATCH] rs6000: Better error messages for power8/9-vector builtins

2021-11-17 Thread Bill Schmidt via Gcc-patches
On 11/17/21 10:54 AM, Paul A. Clarke wrote:
> On Tue, Nov 16, 2021 at 11:12:35AM -0600, Bill Schmidt via Gcc-patches wrote:
>> Hi!  During a previous patch review, Segher asked that I provide better
>> messages when builtins are unavailable because they require both a minimum
>> CPU and the enablement of VSX instructions.  This patch does just that.
> ...
>> gcc/
>>  * config/rs6000/rs6000-call.c (rs6000_invalid_new_builtin): Change
>>  error messages for ENB_P8V and ENB_P9V.
>> ---
>>  gcc/config/rs6000/rs6000-call.c | 6 --
>>  1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/gcc/config/rs6000/rs6000-call.c 
>> b/gcc/config/rs6000/rs6000-call.c
>> index 85fec80c6d7..035266eb001 100644
>> --- a/gcc/config/rs6000/rs6000-call.c
>> +++ b/gcc/config/rs6000/rs6000-call.c
>> @@ -11943,7 +11943,8 @@ rs6000_invalid_new_builtin (enum rs6000_gen_builtins 
>> fncode)
>>error ("%qs requires the %qs option", name, "-mcpu=power8");
>>break;
>>  case ENB_P8V:
>> -  error ("%qs requires the %qs option", name, "-mpower8-vector");
>> +  error ("%qs requires the %qs and %qs options", name, "-mcpu=power8",
>> + "-mvsx");
> "-mcpu=power8" itself enables "-mvsx", doesn't it?

Of course, but it can be disabled with -mno-vsx.  Then you get this error.
You won't get it unless you deliberately did something strange with the
compile options.

>
>>break;
>>  case ENB_P9:
>>error ("%qs requires the %qs option", name, "-mcpu=power9");
>> @@ -11953,7 +11954,8 @@ rs6000_invalid_new_builtin (enum rs6000_gen_builtins 
>> fncode)
>>   name, "-mcpu=power9", "-m64", "-mpowerpc64");
>>break;
>>  case ENB_P9V:
>> -  error ("%qs requires the %qs option", name, "-mpower9-vector");
>> +  error ("%qs requires the %qs and %qs options", name, "-mcpu=power9",
>> + "-mvsx");
> Similarly, "-mcpu=power9" itself enables "-mvsx", doesn't it?
>
> Are you trying to also say "don't use -mno-vsx"?  If so, maybe s/and/with/
> would be slightly less confusing? This is going to be awkward unless it can
> be more precise, like two messages depending on actual context:
> - with "-mcpu=power8 -mno-vsx:  "...requires -mvsx".
> - without "-mcpu=power8":  "...requires -mcpu=power8".

This seems like a YMMV situation...I don't see the confusion myself.

Bill

>
> PC


[committed] Fix two mips target tests compromised by recent IPA work

2021-11-17 Thread Jeff Law via Gcc-patches
Jan's recent IPA work compromised two mips tests.   This restores the 
tests by disabling IPA analysis on the key function in both tests.


Committed to the trunk,

Jeffcommit c70546482388951b5c9c19cff002ee6ab920b7f5
Author: Jeff Law 
Date:   Wed Nov 17 11:55:50 2021 -0500

Fix two mips target tests compromised by recent IPA work

gcc/testsuite
* gcc.target/mips/frame-header-1.c (bar): Add noipa attribute.
* gcc.target/mips/frame-header-2.c (bar): Likewise.

diff --git a/gcc/testsuite/gcc.target/mips/frame-header-1.c 
b/gcc/testsuite/gcc.target/mips/frame-header-1.c
index 971656ddaa3..55efc0b02f8 100644
--- a/gcc/testsuite/gcc.target/mips/frame-header-1.c
+++ b/gcc/testsuite/gcc.target/mips/frame-header-1.c
@@ -8,7 +8,7 @@
 /* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
 /* { dg-final { scan-assembler "\taddiu\t\\\$sp,\\\$sp,-24" } } */
 
-NOMIPS16 void __attribute__((noinline))
+NOMIPS16 void __attribute__((noinline)) __attribute__((noipa))
 bar (int* a)
 {
   *a = 1;
diff --git a/gcc/testsuite/gcc.target/mips/frame-header-2.c 
b/gcc/testsuite/gcc.target/mips/frame-header-2.c
index 0e86bc91994..31aa27e990f 100644
--- a/gcc/testsuite/gcc.target/mips/frame-header-2.c
+++ b/gcc/testsuite/gcc.target/mips/frame-header-2.c
@@ -8,7 +8,7 @@
 /* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
 /* { dg-final { scan-assembler "\taddiu\t\\\$sp,\\\$sp,-8" } } */
 
-NOMIPS16 void __attribute__((noinline))
+NOMIPS16 void __attribute__((noinline)) __attribute__((noipa))
 bar (int* a)
 {
   *a = 1;


Re: [PATCH] rs6000: Better error messages for power8/9-vector builtins

2021-11-17 Thread Paul A. Clarke via Gcc-patches
On Tue, Nov 16, 2021 at 11:12:35AM -0600, Bill Schmidt via Gcc-patches wrote:
> Hi!  During a previous patch review, Segher asked that I provide better
> messages when builtins are unavailable because they require both a minimum
> CPU and the enablement of VSX instructions.  This patch does just that.
...
> gcc/
>   * config/rs6000/rs6000-call.c (rs6000_invalid_new_builtin): Change
>   error messages for ENB_P8V and ENB_P9V.
> ---
>  gcc/config/rs6000/rs6000-call.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index 85fec80c6d7..035266eb001 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -11943,7 +11943,8 @@ rs6000_invalid_new_builtin (enum rs6000_gen_builtins 
> fncode)
>error ("%qs requires the %qs option", name, "-mcpu=power8");
>break;
>  case ENB_P8V:
> -  error ("%qs requires the %qs option", name, "-mpower8-vector");
> +  error ("%qs requires the %qs and %qs options", name, "-mcpu=power8",
> +  "-mvsx");

"-mcpu=power8" itself enables "-mvsx", doesn't it?

>break;
>  case ENB_P9:
>error ("%qs requires the %qs option", name, "-mcpu=power9");
> @@ -11953,7 +11954,8 @@ rs6000_invalid_new_builtin (enum rs6000_gen_builtins 
> fncode)
>name, "-mcpu=power9", "-m64", "-mpowerpc64");
>break;
>  case ENB_P9V:
> -  error ("%qs requires the %qs option", name, "-mpower9-vector");
> +  error ("%qs requires the %qs and %qs options", name, "-mcpu=power9",
> +  "-mvsx");

Similarly, "-mcpu=power9" itself enables "-mvsx", doesn't it?

Are you trying to also say "don't use -mno-vsx"?  If so, maybe s/and/with/
would be slightly less confusing? This is going to be awkward unless it can
be more precise, like two messages depending on actual context:
- with "-mcpu=power8 -mno-vsx:  "...requires -mvsx".
- without "-mcpu=power8":  "...requires -mcpu=power8".

PC


Re: [committed] analyzer: fix missing -Wanalyzer-write-to-const [PR102695]

2021-11-17 Thread Martin Sebor via Gcc-patches

On 11/16/21 7:05 PM, David Malcolm via Gcc-patches wrote:

This patch fixes -Wanalyzer-write-to-const so that it will complain
about attempts to write to functions, to labels.
It also "teaches" the analyzer about strchr, in that strchr can either
return a pointer into the input area (and thus -Wanalyzer-write-to-const
can now complain about writes into a string literal seen this way),
or return NULL (and thus the analyzer can complain about NULL
dereferences if the result is used without a check).


Fow what it's worth, I used strchr in the test case as an example.
There are a few other built-ins like it, including index, rindex,
memchr, strrchr, and strstr (just going through the switch
statements in my code).

At least some of these built-ins have an attribute "fn spec" that
describes some of their properties (like what argument they read
from; see builtin_fnspec in builtins.c).  But it doesn't look
like attr_fnspec has a way of encoding a function that returns
a pointer argument plus some offset.  That seems like a useful
enhancement both for our work and also for optimizers.  It would
let us avoid having to hardcode these properties in duplicate
case and switch statements in multiple places.

Martin



Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r12-5330-g111fd515f2894d7cddf62f80c69765c43ae18577.

gcc/analyzer/ChangeLog:
PR analyzer/102695
* region-model-impl-calls.cc (region_model::impl_call_strchr): New.
* region-model-manager.cc
(region_model_manager::maybe_fold_unaryop): Simplify cast to
pointer type of an existing pointer to a region.
* region-model.cc (region_model::on_call_pre): Handle
BUILT_IN_STRCHR and "strchr".
(write_to_const_diagnostic::emit): Add auto_diagnostic_group.  Add
alternate wordings for functions and labels.
(write_to_const_diagnostic::describe_final_event): Add alternate
wordings for functions and labels.
(region_model::check_for_writable_region): Handle RK_FUNCTION and
RK_LABEL.
* region-model.h (region_model::impl_call_strchr): New decl.

gcc/testsuite/ChangeLog:
PR analyzer/102695
* gcc.dg/analyzer/pr102695.c: New test.
* gcc.dg/analyzer/strchr-1.c: New test.

Signed-off-by: David Malcolm 
---
  gcc/analyzer/region-model-impl-calls.cc  | 69 
  gcc/analyzer/region-model-manager.cc |  7 +++
  gcc/analyzer/region-model.cc | 52 --
  gcc/analyzer/region-model.h  |  1 +
  gcc/testsuite/gcc.dg/analyzer/pr102695.c | 44 +++
  gcc/testsuite/gcc.dg/analyzer/strchr-1.c | 26 +
  6 files changed, 196 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr102695.c
  create mode 100644 gcc/testsuite/gcc.dg/analyzer/strchr-1.c

diff --git a/gcc/analyzer/region-model-impl-calls.cc 
b/gcc/analyzer/region-model-impl-calls.cc
index 90d4cf9c2db..ae50e69542e 100644
--- a/gcc/analyzer/region-model-impl-calls.cc
+++ b/gcc/analyzer/region-model-impl-calls.cc
@@ -678,6 +678,75 @@ region_model::impl_call_realloc (const call_details )
  }
  }
  
+/* Handle the on_call_pre part of "strchr" and "__builtin_strchr".  */

+
+void
+region_model::impl_call_strchr (const call_details )
+{
+  class strchr_call_info : public call_info
+  {
+  public:
+strchr_call_info (const call_details , bool found)
+: call_info (cd), m_found (found)
+{
+}
+
+label_text get_desc (bool can_colorize) const FINAL OVERRIDE
+{
+  if (m_found)
+   return make_label_text (can_colorize,
+   "when %qE returns non-NULL",
+   get_fndecl ());
+  else
+   return make_label_text (can_colorize,
+   "when %qE returns NULL",
+   get_fndecl ());
+}
+
+bool update_model (region_model *model,
+  const exploded_edge *,
+  region_model_context *ctxt) const FINAL OVERRIDE
+{
+  const call_details cd (get_call_details (model, ctxt));
+  if (tree lhs_type = cd.get_lhs_type ())
+   {
+ region_model_manager *mgr = model->get_manager ();
+ const svalue *result;
+ if (m_found)
+   {
+ const svalue *str_sval = cd.get_arg_svalue (0);
+ const region *str_reg
+   = model->deref_rvalue (str_sval, cd.get_arg_tree (0),
+  cd.get_ctxt ());
+ /* We want str_sval + OFFSET for some unknown OFFSET.
+Use a conjured_svalue to represent the offset,
+using the str_reg as the id of the conjured_svalue.  */
+ const svalue *offset
+   = mgr->get_or_create_conjured_svalue (size_type_node,
+ cd.get_call_stmt (),
+   

[OG11][committed][PATCH 21/22] graphite: Accept loops without data references

2021-11-17 Thread Frederik Harwath
It seems that the check that rejects loops without data references is
only included to avoid handling non-profitable loops.  Including those
loops in Graphite's analysis enables more consistent diagnostic
messages in OpenACC "kernels" code and does not introduce any
testsuite regressions.  If executing Graphite on loops without
data references leads to noticeable compile time slow-downs for
non-OpenACC users of Graphite, the check can be re-introduced but
restricted to non-OpenACC functions.

gcc/ChangeLog:

* graphite-scop-detection.c (scop_detection::harmful_loop_in_region):
Remove check for loops without data references.
---
 gcc/graphite-scop-detection.c | 13 -
 1 file changed, 13 deletions(-)

diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
index 99e906a5d120..9311a0e42a57 100644
--- a/gcc/graphite-scop-detection.c
+++ b/gcc/graphite-scop-detection.c
@@ -851,19 +851,6 @@ scop_detection::harmful_loop_in_region (sese_l scop) const
  return true;
}

-  /* Check if all loop nests have at least one data reference.
-???  This check is expensive and loops premature at this point.
-If important to retain we can pre-compute this for all innermost
-loops and reject those when we build a SESE region for a loop
-during SESE discovery.  */
-  if (! loop->inner
- && ! loop_nest_has_data_refs (loop))
-   {
- DEBUG_PRINT (dp << "[scop-detection-fail] loop_" << loop->num
-  << " does not have any data reference.\n");
- return true;
-   }
-
   DEBUG_PRINT (dp << "[scop-detection] loop_" << loop->num << " is 
harmless.\n");
 }

--
2.33.0

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[OG11][committed][PATCH 20/22] graphite: Adjust scop loop-nest choice

2021-11-17 Thread Frederik Harwath
The find_common_loop function is used in Graphite to obtain a common
super-loop of all loops inside a SCoP.  The function is applied to the
loop of the destination block of the edge that leads into the SESE
region and the loop of the source block of the edge that exits the
region.  The exit block is usually introduced by the canonicalization
of the loop structure that Graphite does to support its code
generation. If it is empty, it may happen that it belongs to the outer
fake loop.  This way, build_alias_set may end up analysing
data-references with respect to this loop although there may exist a
proper super-loop of the SCoP loops.  This does not seem to be correct
in general and it leads to problems with runtime alias check creation
which fails if executed on a loop without niter information.

gcc/ChangeLog:

* graphite-scop-detection.c (scop_context_loop): New function.
(build_alias_set): Use scop_context_loop instead of find_common_loop.
* graphite-isl-ast-to-gimple.c (graphite_regenerate_ast_isl): Likewise.
* graphite.h (scop_context_loop): New declaration.
---
 gcc/graphite-isl-ast-to-gimple.c |  4 +---
 gcc/graphite-scop-detection.c| 21 ++---
 gcc/graphite.h   |  1 +
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c
index bdabe588c3d8..ec055a358f39 100644
--- a/gcc/graphite-isl-ast-to-gimple.c
+++ b/gcc/graphite-isl-ast-to-gimple.c
@@ -1543,9 +1543,7 @@ graphite_regenerate_ast_isl (scop_p scop)
 conditional if aliasing can be ruled out at runtime and the original
 version of the SCoP, otherwise. */

-  loop_p loop
-  = find_common_loop (scop->scop_info->region.entry->dest->loop_father,
-  scop->scop_info->region.exit->src->loop_father);
+  loop_p loop = scop_context_loop (scop);
   tree cond = generate_alias_cond (scop->unhandled_alias_ddrs, loop);
   tree non_alias_cond = build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
   set_ifsese_condition (region->if_region, non_alias_cond);
diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
index afc955cc97eb..99e906a5d120 100644
--- a/gcc/graphite-scop-detection.c
+++ b/gcc/graphite-scop-detection.c
@@ -297,6 +297,23 @@ single_pred_cond_non_loop_exit (basic_block bb)
   return NULL;
 }

+
+/* Return the innermost loop that encloses all loops in SCOP. */
+
+loop_p
+scop_context_loop (scop_p scop)
+{
+  edge scop_entry = scop->scop_info->region.entry;
+  edge scop_exit = scop->scop_info->region.exit;
+  basic_block exit_bb = scop_exit->src;
+
+  while (sese_trivially_empty_bb_p (exit_bb) && single_pred_p (exit_bb))
+exit_bb = single_pred (exit_bb);
+
+  loop_p entry_loop = scop_entry->dest->loop_father;
+  return find_common_loop (entry_loop, exit_bb->loop_father);
+}
+
 namespace
 {

@@ -1776,9 +1793,7 @@ build_alias_set (scop_p scop)
   int i, j;
   int *all_vertices;

-  struct loop *nest
-= find_common_loop (scop->scop_info->region.entry->dest->loop_father,
-   scop->scop_info->region.exit->src->loop_father);
+  struct loop *nest = scop_context_loop (scop);

   gcc_checking_assert (nest);

diff --git a/gcc/graphite.h b/gcc/graphite.h
index 9c508f31109f..dacb27a9073c 100644
--- a/gcc/graphite.h
+++ b/gcc/graphite.h
@@ -480,4 +480,5 @@ extern tree cached_scalar_evolution_in_region (const sese_l 
&, loop_p, tree);
 extern void dot_all_sese (FILE *, vec &);
 extern void dot_sese (sese_l &);
 extern void dot_cfg ();
+extern loop_p scop_context_loop (scop_p);
 #endif
--
2.33.0

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[OG11][committed][PATCH 19/22] graphite: Tune parameters for OpenACC use

2021-11-17 Thread Frederik Harwath
The default values of some parameters that restrict Graphite's
resource usage are too low for many OpenACC codes.  Furthermore,
exceeding the limits does not alwas lead to user-visible diagnostic
messages.

This commit increases the parameter values on OpenACC functions.  The
values were chosen to allow for the analysis of all "kernels" regions
in the SPEC ACCEL v1.3 benchmark suite.  Warnings about exceeded
Graphite-related limits are added to the -fopt-info-missed
output. Those warnings are phrased in a uniform way that intentionally
refers to the "data-dependence analysis" of "OpenACC loops" instead of
"a failure in Graphite" to make them easier to understand for users.

gcc/ChangeLog:

* graphite-optimize-isl.c (optimize_isl): Adjust
param_max_isl_operations value for OpenACC functions and add
special warnings if value gets exceeded.

* graphite-scop-detection.c (build_scops): Likewise for
param_graphite_max_arrays_per_scop.

gcc/testsuite/ChangeLog:

* gcc.dg/goacc/graphite-parameter-1.c: New test.
* gcc.dg/goacc/graphite-parameter-2.c: New test.
---
 gcc/graphite-optimize-isl.c   | 35 ---
 gcc/graphite-scop-detection.c | 28 ++-
 .../gcc.dg/goacc/graphite-parameter-1.c   | 21 +++
 .../gcc.dg/goacc/graphite-parameter-2.c   | 23 
 4 files changed, 101 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/goacc/graphite-parameter-1.c
 create mode 100644 gcc/testsuite/gcc.dg/goacc/graphite-parameter-2.c

diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c
index 019452700a49..4eecbd20b740 100644
--- a/gcc/graphite-optimize-isl.c
+++ b/gcc/graphite-optimize-isl.c
@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "dumpfile.h"
 #include "tree-vectorizer.h"
 #include "graphite.h"
+#include "graphite-oacc.h"


 /* get_schedule_for_node_st - Improve schedule for the schedule node.
@@ -115,6 +116,14 @@ optimize_isl (scop_p scop, bool oacc_enabled_graphite)
   int old_err = isl_options_get_on_error (scop->isl_context);
   int old_max_operations = isl_ctx_get_max_operations (scop->isl_context);
   int max_operations = param_max_isl_operations;
+
+  /* The default value for param_max_isl_operations is easily exceeded
+ by "kernels" loops in existing OpenACC codes.  Raise the values
+ significantly since analyzing those loops is crucial. */
+  if (param_max_isl_operations == 35 /* default value */
+  && oacc_function_p (cfun))
+max_operations = 200;
+
   if (max_operations)
 isl_ctx_set_max_operations (scop->isl_context, max_operations);
   isl_options_set_on_error (scop->isl_context, ISL_ON_ERROR_CONTINUE);
@@ -164,11 +173,27 @@ optimize_isl (scop_p scop, bool oacc_enabled_graphite)
  dump_user_location_t loc = find_loop_location
(scop->scop_info->region.entry->dest->loop_father);
  if (isl_ctx_last_error (scop->isl_context) == isl_error_quota)
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
-"loop nest not optimized, optimization timed out "
-"after %d operations [--param 
max-isl-operations]\n",
-max_operations);
- else
+   {
+  if (oacc_function_p (cfun))
+   {
+ /* Special casing for OpenACC to unify diagnostic messages
+here and in graphite-scop-detection.c. */
+  dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
+   "data-dependence analysis of OpenACC loop "
+   "nest "
+   "failed; try increasing the value of "
+   "--param="
+   "max-isl-operations=%d.\n",
+   max_operations);
+}
+  else
+dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
+ "loop nest not optimized, optimization timed "
+ "out after %d operations [--param "
+ "max-isl-operations]\n",
+ max_operations);
+}
+  else
dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
 "loop nest not optimized, ISL signalled an 
error\n");
}
diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
index 8b41044bce5e..afc955cc97eb 100644
--- a/gcc/graphite-scop-detection.c
+++ b/gcc/graphite-scop-detection.c
@@ -2056,6 +2056,9 @@ determine_openacc_reductions (scop_p scop)
   }
 }

+
+extern dump_user_location_t find_loop_location (class loop *);
+
 /* Find Static Control Parts (SCoP) in the current function and pushes
them to SCOPS.  */

@@ -2109,6 +2112,11 @@ build_scops (vec *scops)

[OG11][committed][PATCH 18/22] openacc: Disable pass_pre on outlined functions analyzed by Graphite

2021-11-17 Thread Frederik Harwath
The additional dependences introduced by partial redundancy
elimination proper and by the code hoisting step of the pass very
often cause Graphite to fail on OpenACC functions. On the other hand,
the pass can also enable the analysis of OpenACC loops (cf. e.g. the
loop-auto-transfer-4.f90 testcase), for instance, because full
redundancy elimination removes definitions that would otherwise
prevent the creation of runtime alias checks outside of the SCoP.

This commit disables the actual partial redundancy elimination step as
well as the code hoisting step of pass_pre on OpenACC functions that
might be handled by Graphite.

gcc/ChangeLog:

* tree-ssa-pre.c (insert): Skip any insertions in OpenACC
functions that might be processed by Graphite.
---
 gcc/tree-ssa-pre.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index 2aedc31e1d73..b904354e4c78 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-dce.h"
 #include "tree-cfgcleanup.h"
 #include "alias.h"
+#include "graphite-oacc.h"

 /* Even though this file is called tree-ssa-pre.c, we actually
implement a bit more than just PRE here.  All of them piggy-back
@@ -3736,6 +3737,22 @@ do_hoist_insertion (basic_block block)
 static void
 insert (void)
 {
+
+/* The additional dependences introduced by the code insertions
+ can cause Graphite's dependence analysis to fail .  Without
+ special handling of those dependences in Graphite, it seems
+ better to skip this step if OpenACC loops that need to be handled
+ by Graphite are found.  Note that the full redundancy elimination
+ step of this pass is useful for the purpose of dependence
+ analysis, for instance, because it can remove definitions from
+ SCoPs that would otherwise prevent the creation of runtime alias
+ checks since those may only use definitions that are available
+ before the SCoP. */
+
+  if (oacc_function_p (cfun)
+  && ::graphite_analyze_oacc_function_p (cfun))
+return;
+
   basic_block bb;

   FOR_ALL_BB_FN (bb, cfun)
--
2.33.0

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[OG11][committed][PATCH 17/22] openacc: Handle internal function calls in pass_lim

2021-11-17 Thread Frederik Harwath
The loop invariant motion pass correctly refuses to move statements
out of a loop if any other statement in the loop is unanalyzable.  The
pass does not know how to handle the OpenACC internal function calls
which was not necessary until recently when the OpenACC device
lowering pass was moved to a later position in the pass pipeline.

This commit changes pass_lim to ignore the OpenACC internal function
calls which do not contain any memory references. The hoisting enabled
by this change can be useful for the data-dependence analysis in
Graphite; for instance, in the outlined functions for OpenACC regions,
all invariant accesses to the ".omp_data_i" struct should be hoisted
out of the OpenACC loop.  This is particularly important for variables
that were scalars in the original loop and which have been turned into
accesses to the struct by the outlining process.  Not hoisting those
can prevent scalar evolution analysis which is crucial for Graphite.
Since any hoisting that introduces intermediate names - and hence,
"fake" dependences - inside the analyzed nest can be harmful to
data-dependence analysis, a flag to restrict the hoisting in OpenACC
functions is added to the pass. The pass instance that executes before
Graphite now runs with this flag set to true and the pass instance
after Graphite runs unrestricted.

A more precise way of selecting the statements for which hoisting
should be enabled is left for a future improvement.

gcc/ChangeLog:
* passes.def: Set restrict_oacc_hoisting to true for the early
pass_lim instance.
* tree-ssa-loop-im.c (movement_possibility): Add
restrict_oacc_hoisting flag to function; restrict movement if set.
(compute_invariantness): Add restrict_oacc_hoisting flag and pass it on.
(gather_mem_refs_stmt): Skip IFN_GOACC_LOOP and IFN_UNIQUE
calls.
(loop_invariant_motion_in_fun): Add restrict_oacc_hoisting flag and
pass it on.
(pass_lim::execute): Pass on new flags.
* tree-ssa-loop-manip.h (loop_invariant_motion_in_fun): Adjust 
declaration.
* gimple-loop-interchange.cc (pass_linterchange::execute): Adjust call 
to
loop_invariant_motion_in_fun.
---
 gcc/gimple-loop-interchange.cc |  2 +-
 gcc/passes.def |  2 +-
 gcc/tree-ssa-loop-im.c | 58 --
 gcc/tree-ssa-loop-manip.h  |  2 +-
 4 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc
index 7b799eca805c..d617438910fd 100644
--- a/gcc/gimple-loop-interchange.cc
+++ b/gcc/gimple-loop-interchange.cc
@@ -2096,7 +2096,7 @@ pass_linterchange::execute (function *fun)
   if (changed_p)
 {
   unsigned todo = TODO_update_ssa_only_virtuals;
-  todo |= loop_invariant_motion_in_fun (cfun, false);
+  todo |= loop_invariant_motion_in_fun (cfun, false, false);
   scev_reset ();
   return todo;
 }
diff --git a/gcc/passes.def b/gcc/passes.def
index 48c9821011f0..d1dedbc287e2 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -247,7 +247,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_cse_sincos);
   NEXT_PASS (pass_optimize_bswap);
   NEXT_PASS (pass_laddress);
-  NEXT_PASS (pass_lim);
+  NEXT_PASS (pass_lim, true /* restrict_oacc_hoisting */);
   NEXT_PASS (pass_walloca, false);
   NEXT_PASS (pass_pre);
   NEXT_PASS (pass_sink_code);
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 7de47edbcb30..b392ae609aaf 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -47,6 +47,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "tree-dfa.h"
 #include "dbgcnt.h"
+#include "graphite-oacc.h"
+#include "internal-fn.h"

 /* TODO:  Support for predicated code motion.  I.e.

@@ -320,11 +322,23 @@ enum move_pos
Otherwise return MOVE_IMPOSSIBLE.  */

 enum move_pos
-movement_possibility (gimple *stmt)
+movement_possibility (gimple *stmt, bool restrict_oacc_hoisting)
 {
   tree lhs;
   enum move_pos ret = MOVE_POSSIBLE;

+  if (restrict_oacc_hoisting && oacc_get_fn_attrib (cfun->decl)
+  && gimple_code (stmt) == GIMPLE_ASSIGN)
+{
+  tree rhs = gimple_assign_rhs1 (stmt);
+
+  if (TREE_CODE (rhs) == VIEW_CONVERT_EXPR)
+   rhs = TREE_OPERAND (rhs, 0);
+
+  if (TREE_CODE (rhs) == ARRAY_REF)
+ return MOVE_IMPOSSIBLE;
+}
+
   if (flag_unswitch_loops
   && gimple_code (stmt) == GIMPLE_COND)
 {
@@ -974,7 +988,7 @@ rewrite_bittest (gimple_stmt_iterator *bsi)
statements.  */

 static void
-compute_invariantness (basic_block bb)
+compute_invariantness (basic_block bb, bool restrict_oacc_hoisting)
 {
   enum move_pos pos;
   gimple_stmt_iterator bsi;
@@ -1002,7 +1016,7 @@ compute_invariantness (basic_block bb)
   {
stmt = gsi_stmt (bsi);

-   pos = movement_possibility (stmt);
+   pos = movement_possibility (stmt, 

[OG11][committed][PATCH 16/22] openacc: Warn about "independent" "kernels" loops with data-dependences

2021-11-17 Thread Frederik Harwath
This commit concerns loops in OpenACC "kernels" region that have been marked
up with an explicit "independent" clause by the user, but for which Graphite
found data dependences.  A discussion on the private internal OpenACC mailing
list suggested that warning the user about the dependences woud be a more
acceptable solution than reverting the user's decision. This behavior is
implemented by the present commit.

gcc/ChangeLog:

* common.opt: Add flag Wopenacc-false-independent.
* omp-offload.c (oacc_loop_warn_if_false_independent): New function.
(oacc_loop_fixed_partitions): Call from here.
---
 gcc/common.opt|  5 +
 gcc/omp-offload.c | 49 +++
 2 files changed, 54 insertions(+)

diff --git a/gcc/common.opt b/gcc/common.opt
index aa695e56dc48..4c38ed5cf9ab 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -838,6 +838,11 @@ Wtsan
 Common Var(warn_tsan) Init(1) Warning
 Warn about unsupported features in ThreadSanitizer.

+Wopenacc-false-independent
+Common Var(warn_openacc_false_independent) Init(1) Warning
+Warn in case a loop in an OpenACC \"kernels\" region has an \"independent\"
+clause but analysis shows that it has loop-carried dependences.
+
 Xassembler
 Driver Separate

diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 94a975a88660..b806e36ef515 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -2043,6 +2043,51 @@ oacc_loop_transform_auto_into_independent (oacc_loop 
*loop)
   return true;
 }

+/* Emit a warning if LOOP has an "independent" clause but Graphite's
+   analysis shows that it has data dependences. Note that we respect
+   the user's explicit decision to parallelize the loop but we
+   nevertheless warn that this decision could be wrong. */
+
+static void
+oacc_loop_warn_if_false_independent (oacc_loop *loop)
+{
+  if (!optimize)
+return;
+
+  if (loop->routine)
+return;
+
+  /* TODO Warn about "auto" & "independent" in "parallel" regions? */
+  if (!oacc_parallel_kernels_graphite_fun_p ())
+return;
+
+  if (!(loop->flags & OLF_INDEPENDENT))
+return;
+
+  bool analyzed = false;
+  bool can_be_parallel = oacc_loop_can_be_parallel_p (loop, analyzed);
+  loop_p cfg_loop = oacc_loop_get_cfg_loop (loop);
+
+  if (cfg_loop && cfg_loop->inner && !analyzed)
+{
+  if (dump_enabled_p ())
+   {
+ const dump_user_location_t loc
+   = dump_user_location_t::from_location_t (loop->loc);
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
+  "'independent' loop in 'kernels' region has not been 
"
+  "analyzed (cf. 'graphite' "
+  "dumps for more information).\n");
+   }
+  return;
+}
+
+  if (!can_be_parallel)
+warning_at (loop->loc, 0,
+"loop has \"independent\" clause but data dependences were "
+"found.");
+}
+
 /* Walk the OpenACC loop hierarchy checking and assigning the
programmer-specified partitionings.  OUTER_MASK is the partitioning
this loop is contained within.  Return mask of partitioning
@@ -2094,6 +2139,10 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned 
outer_mask)
}
}

+  /* TODO Is this flag needed? Perhaps use -Wopenacc-parallelism? */
+  if (warn_openacc_false_independent)
+oacc_loop_warn_if_false_independent (loop);
+
   if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
{
  loop->flags |= OLF_AUTO;
--
2.33.0

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[OG11][committed][PATCH 14/22] openacc: Add data optimization pass

2021-11-17 Thread Frederik Harwath
From: Andrew Stubbs 

Address PR90591 "Avoid unnecessary data transfer out of OMP
construct", for simple (but common) cases.

This commit adds a pass that optimizes data mapping clauses.
Currently, it can optimize copy/map(tofrom) clauses involving scalars
to copyin/map(to) and further to "private".  The pass is restricted
"kernels" regions but could be extended to other types of regions.

gcc/ChangeLog:

* Makefile.in: Add pass.
* doc/gimple.texi: TODO.
* gimple-walk.c (walk_gimple_seq_mod): Adjust for backward walking.
* gimple-walk.h (struct walk_stmt_info): Add field.
* passes.def: Add new pass.
* tree-pass.h (make_pass_omp_data_optimize): New declaration.
* omp-data-optimize.cc: New file.

libgomp/ChangeLog:

* testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c:
Expect optimization messages.
* testsuite/libgomp.oacc-fortran/pr94358-1.f90: Likewise.

gcc/testsuite/ChangeLog:

* c-c++-common/goacc/note-parallelism-1-kernels-loops.c: Likewise.
* c-c++-common/goacc/note-parallelism-1-kernels-straight-line.c:
Likewise.
* c-c++-common/goacc/note-parallelism-kernels-loops.c: Likewise.
* c-c++-common/goacc/uninit-copy-clause.c: Likewise.
* gfortran.dg/goacc/uninit-copy-clause.f95: Likewise.
* c-c++-common/goacc/omp_data_optimize-1.c: New test.
* g++.dg/goacc/omp_data_optimize-1.C: New test.
* gfortran.dg/goacc/omp_data_optimize-1.f90: New test.

Co-Authored-By: Thomas Schwinge 
---
 gcc/Makefile.in   |   1 +
 gcc/doc/gimple.texi   |   2 +
 gcc/gimple-walk.c |  15 +-
 gcc/gimple-walk.h |   6 +
 gcc/omp-data-optimize.cc  | 951 ++
 gcc/passes.def|   1 +
 .../goacc/note-parallelism-1-kernels-loops.c  |   7 +-
 ...note-parallelism-1-kernels-straight-line.c |   9 +-
 .../goacc/note-parallelism-kernels-loops.c|  10 +-
 .../c-c++-common/goacc/omp_data_optimize-1.c  | 677 +
 .../c-c++-common/goacc/uninit-copy-clause.c   |   6 +
 .../g++.dg/goacc/omp_data_optimize-1.C| 169 
 .../gfortran.dg/goacc/omp_data_optimize-1.f90 | 588 +++
 .../gfortran.dg/goacc/uninit-copy-clause.f95  |   2 +
 gcc/tree-pass.h   |   1 +
 .../kernels-decompose-1.c |   2 +
 .../libgomp.oacc-fortran/pr94358-1.f90|   4 +
 17 files changed, 2444 insertions(+), 7 deletions(-)
 create mode 100644 gcc/omp-data-optimize.cc
 create mode 100644 gcc/testsuite/c-c++-common/goacc/omp_data_optimize-1.c
 create mode 100644 gcc/testsuite/g++.dg/goacc/omp_data_optimize-1.C
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/omp_data_optimize-1.f90

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 4ebdcdbc5f8c..8c02b85d2a96 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1507,6 +1507,7 @@ OBJS = \
omp-low.o \
omp-oacc-kernels-decompose.o \
omp-simd-clone.o \
+   omp-data-optimize.o \
opt-problem.o \
optabs.o \
optabs-libfuncs.o \
diff --git a/gcc/doc/gimple.texi b/gcc/doc/gimple.texi
index 4b3d7d7452e3..a83e17f71a40 100644
--- a/gcc/doc/gimple.texi
+++ b/gcc/doc/gimple.texi
@@ -2778,4 +2778,6 @@ calling @code{walk_gimple_stmt} on each one.  @code{WI} 
is as in
 @code{walk_gimple_stmt}.  If @code{walk_gimple_stmt} returns non-@code{NULL}, 
the walk
 is stopped and the value returned.  Otherwise, all the statements
 are walked and @code{NULL_TREE} returned.
+
+TODO update for forward vs. backward.
 @end deftypefn
diff --git a/gcc/gimple-walk.c b/gcc/gimple-walk.c
index cd287860994e..66fd491844d7 100644
--- a/gcc/gimple-walk.c
+++ b/gcc/gimple-walk.c
@@ -32,6 +32,8 @@ along with GCC; see the file COPYING3.  If not see
 /* Walk all the statements in the sequence *PSEQ calling walk_gimple_stmt
on each one.  WI is as in walk_gimple_stmt.

+   TODO update for forward vs. backward.
+
If walk_gimple_stmt returns non-NULL, the walk is stopped, and the
value is stored in WI->CALLBACK_RESULT.  Also, the statement that
produced the value is returned if this statement has not been
@@ -44,9 +46,10 @@ gimple *
 walk_gimple_seq_mod (gimple_seq *pseq, walk_stmt_fn callback_stmt,
 walk_tree_fn callback_op, struct walk_stmt_info *wi)
 {
-  gimple_stmt_iterator gsi;
+  bool forward = !(wi && wi->backward);

-  for (gsi = gsi_start (*pseq); !gsi_end_p (gsi); )
+  gimple_stmt_iterator gsi = forward ? gsi_start (*pseq) : gsi_last (*pseq);
+  for (; !gsi_end_p (gsi); )
 {
   tree ret = walk_gimple_stmt (, callback_stmt, callback_op, wi);
   if (ret)
@@ -60,7 +63,13 @@ walk_gimple_seq_mod (gimple_seq *pseq, walk_stmt_fn 
callback_stmt,
}

   if (!wi->removed_stmt)
-   gsi_next ();
+   {
+ if (forward)
+   gsi_next ();
+   

[OG11][committed][PATCH 15/22] openacc: Add runtime alias checking for OpenACC kernels

2021-11-17 Thread Frederik Harwath
From: Andrew Stubbs 

This commit adds the code generation for the runtime alias checks for
OpenACC loops that have been analyzed by Graphite.  The runtime alias
check condition gets generated in Graphite. It is evaluated by the
code generated for the IFN_GOACC_LOOP internal function calls.  If
aliasing is detected at runtime, the execution dimensions get adjusted
to execute the affected loops sequentially.

gcc/ChangeLog:

* graphite-isl-ast-to-gimple.c: Include internal-fn.h.
(graphite_oacc_analyze_scop): Implement runtime alias checks.
* omp-expand.c (expand_oacc_for): Add an additional "noalias" parameter
to GOACC_LOOP internal calls, and initialise it to integer_one_node.
* omp-offload.c (oacc_xform_loop): Integrate the runtime alias check
into the GOACC_LOOP expansion.

libgomp/ChangeLog:

* testsuite/libgomp.oacc-c-c++-common/runtime-alias-check-1.c: New test.
* testsuite/libgomp.oacc-c-c++-common/runtime-alias-check-2.c: New test.
---
 gcc/graphite-isl-ast-to-gimple.c  | 122 ++
 gcc/graphite-scop-detection.c |  18 +-
 gcc/omp-expand.c  |  37 +-
 gcc/omp-offload.c | 413 ++
 .../runtime-alias-check-1.c   |  79 
 .../runtime-alias-check-2.c   |  90 
 6 files changed, 550 insertions(+), 209 deletions(-)
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/runtime-alias-check-1.c
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/runtime-alias-check-2.c

diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c
index c516170d9493..bdabe588c3d8 100644
--- a/gcc/graphite-isl-ast-to-gimple.c
+++ b/gcc/graphite-isl-ast-to-gimple.c
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "graphite.h"
 #include "graphite-oacc.h"
 #include "stdlib.h"
+#include "internal-fn.h"

 struct ast_build_info
 {
@@ -1698,6 +1699,127 @@ graphite_oacc_analyze_scop (scop_p scop)
   print_isl_schedule (dump_file, scop->original_schedule);
 }

+  if (flag_graphite_runtime_alias_checks
+  && scop->unhandled_alias_ddrs.length () > 0)
+{
+  sese_info_p region = scop->scop_info;
+
+  /* Usually there will be a chunking loop with the actual work loop
+inside it.  In some corner cases there may only be one loop.  */
+  loop_p top_loop = region->region.entry->dest->loop_father;
+  loop_p active_loop = top_loop->inner ? top_loop->inner : top_loop;
+  tree cond = generate_alias_cond (scop->unhandled_alias_ddrs, 
active_loop);
+
+  /* Walk back to GOACC_LOOP block.  */
+  basic_block goacc_loop_block = region->region.entry->src;
+
+  /* Find the GOACC_LOOP calls. If there aren't any then this is not an
+OpenACC kernels loop and will need different handling.  */
+  gimple_stmt_iterator gsitop = gsi_start_bb (goacc_loop_block);
+  while (!gsi_end_p (gsitop)
+&& (!is_gimple_call (gsi_stmt (gsitop))
+|| !gimple_call_internal_p (gsi_stmt (gsitop))
+|| (gimple_call_internal_fn (gsi_stmt (gsitop))
+!= IFN_GOACC_LOOP)))
+   gsi_next ();
+
+  if (!gsi_end_p (gsitop))
+   {
+ /* Move the GOACC_LOOP CHUNK and STEP calls to after any hoisted
+statements.  There ought not be any problematic dependencies 
because
+the chunk size and step are only computed for very specific 
purposes.
+They may not be at the very top of the block, but they should be
+found together (the asserts test this assuption). */
+ gimple_stmt_iterator gsibottom = gsi_last_bb (goacc_loop_block);
+ gsi_move_after (, );
+ gimple_stmt_iterator gsiinsert = gsibottom;
+ gcc_checking_assert (is_gimple_call (gsi_stmt (gsitop))
+  && gimple_call_internal_p (gsi_stmt (gsitop))
+  && (gimple_call_internal_fn (gsi_stmt (gsitop))
+  == IFN_GOACC_LOOP));
+ gsi_move_after (, );
+
+ /* Insert "noalias_p = COND" before the GOACC_LOOP statements.
+Note that these likely depend on some of the hoisted statements.  
*/
+ tree cond_val = force_gimple_operand_gsi (, cond, true, 
NULL,
+   true, GSI_NEW_STMT);
+
+ /* Insert the cond_val into each GOACC_LOOP call in the region.  */
+ for (int n = -1; n < (int)region->bbs.length (); n++)
+   {
+ /* Cover the region plus goacc_loop_block.  */
+ basic_block bb = n < 0 ? goacc_loop_block : region->bbs[n];
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+  !gsi_end_p (gsi);
+  gsi_next ())
+   {
+ gimple *stmt = gsi_stmt (gsi);
+ if 

[OG11][committed][PATCH 13/22] Add function for printing a single OMP_CLAUSE

2021-11-17 Thread Frederik Harwath
Commit 89f4f339130c ("For 'OMP_CLAUSE' in 'dump_generic_node', dump
the whole OMP clause chain") changed the dumping behavior for
OMP_CLAUSEs.  The old behavior is required for a follow-up
commit ("openacc: Add data optimization pass") that optimizes single
OMP_CLAUSEs.

gcc/ChangeLog:

* tree-pretty-print.c (print_omp_clause_to_str): Add new function.
* tree-pretty-print.h (print_omp_clause_to_str): Add declaration.
---
 gcc/tree-pretty-print.c | 11 +++
 gcc/tree-pretty-print.h |  1 +
 2 files changed, 12 insertions(+)

diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index d769cd8f07c5..2e0255176c76 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -1402,6 +1402,17 @@ dump_omp_clause (pretty_printer *pp, tree clause, int 
spc, dump_flags_t flags)
 }
 }

+/* Print the single clause at the top of the clause chain C to a string and
+   return it. Note that print_generic_expr_to_str prints the whole clause chain
+   instead. The caller must free the returned memory. */
+
+char *
+print_omp_clause_to_str (tree c)
+{
+  pretty_printer pp;
+  dump_omp_clause (, c, 0, TDF_VOPS|TDF_MEMSYMS);
+  return xstrdup (pp_formatted_text ());
+}

 /* Dump chain of OMP clauses.

diff --git a/gcc/tree-pretty-print.h b/gcc/tree-pretty-print.h
index cafe9aa95989..3368cb9f1544 100644
--- a/gcc/tree-pretty-print.h
+++ b/gcc/tree-pretty-print.h
@@ -41,6 +41,7 @@ extern void print_generic_expr (FILE *, tree, dump_flags_t = 
TDF_NONE);
 extern char *print_generic_expr_to_str (tree);
 extern void dump_omp_clauses (pretty_printer *, tree, int, dump_flags_t,
  bool = true);
+extern char *print_omp_clause_to_str (tree);
 extern void dump_omp_atomic_memory_order (pretty_printer *,
  enum omp_memory_order);
 extern void dump_omp_loop_non_rect_expr (pretty_printer *, tree, int,
--
2.33.0

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[OG11][committed][PATCH 11/22] openacc: Add further kernels tests

2021-11-17 Thread Frederik Harwath
Add some copies of tests to continue covering the old "parloops"-based
"kernels" implementation - until it gets removed from GCC - and
add further tests for the new Graphite-based implementation.

libgomp/ChangeLog:

* testsuite/libgomp.oacc-fortran/parallel-loop-auto-reduction-2.f90:
New test.

gcc/testsuite/ChangeLog:

* c-c++-common/goacc/classify-kernels-unparallelized-graphite.c:
New test.
* c-c++-common/goacc/classify-kernels-unparallelized-parloops.c:
New test.
* c-c++-common/goacc/kernels-decompose-1-parloops.c: New test.
* c-c++-common/goacc/kernels-reduction-parloops.c: New test.
* c-c++-common/goacc/loop-auto-reductions.c: New test.
* c-c++-common/goacc/note-parallelism-1-kernels-loop-auto-parloops.c:
New test.
* c-c++-common/goacc/note-parallelism-kernels-loops-1.c: New test.
* c-c++-common/goacc/note-parallelism-kernels-loops-parloops.c:
New test.
* gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95:
New test.
* gfortran.dg/goacc/kernels-conversion.f95: New test.
* gfortran.dg/goacc/kernels-decompose-1-parloops.f95: New test.
* gfortran.dg/goacc/kernels-decompose-parloops-2.f95: New test.
* gfortran.dg/goacc/kernels-loop-data-parloops-2.f95: New test.
* gfortran.dg/goacc/kernels-loop-parloops-2.f95: New test.
* gfortran.dg/goacc/kernels-loop-parloops.f95: New test.
* gfortran.dg/goacc/kernels-reductions.f90: New test.
---
 ...classify-kernels-unparallelized-graphite.c |  41 +
 ...classify-kernels-unparallelized-parloops.c |  47 ++
 .../goacc/kernels-decompose-1-parloops.c  | 125 ++
 .../goacc/kernels-reduction-parloops.c|  36 
 .../c-c++-common/goacc/loop-auto-reductions.c |  22 +++
 ...parallelism-1-kernels-loop-auto-parloops.c | 128 +++
 .../goacc/note-parallelism-kernels-loops-1.c  |  61 +++
 .../note-parallelism-kernels-loops-parloops.c |  53 ++
 ...assify-kernels-unparallelized-parloops.f95 |  44 +
 .../gfortran.dg/goacc/kernels-conversion.f95  |  52 ++
 .../goacc/kernels-decompose-1-parloops.f95| 121 ++
 .../goacc/kernels-decompose-parloops-2.f95| 154 ++
 .../goacc/kernels-loop-data-parloops-2.f95|  52 ++
 .../goacc/kernels-loop-parloops-2.f95 |  45 +
 .../goacc/kernels-loop-parloops.f95   |  39 +
 .../gfortran.dg/goacc/kernels-reductions.f90  |  37 +
 .../parallel-loop-auto-reduction-2.f90|  98 +++
 17 files changed, 1155 insertions(+)
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-graphite.c
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/kernels-decompose-1-parloops.c
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/kernels-reduction-parloops.c
 create mode 100644 gcc/testsuite/c-c++-common/goacc/loop-auto-reductions.c
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/note-parallelism-1-kernels-loop-auto-parloops.c
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/note-parallelism-kernels-loops-1.c
 create mode 100644 
gcc/testsuite/c-c++-common/goacc/note-parallelism-kernels-loops-parloops.c
 create mode 100644 
gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95
 create mode 100644 
gcc/testsuite/gfortran.dg/goacc/kernels-decompose-1-parloops.f95
 create mode 100644 
gcc/testsuite/gfortran.dg/goacc/kernels-decompose-parloops-2.f95
 create mode 100644 
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-parloops-2.f95
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop-parloops-2.f95
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop-parloops.f95
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-reductions.f90
 create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-auto-reduction-2.f90

diff --git 
a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-graphite.c 
b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-graphite.c
new file mode 100644
index ..77f4524907a9
--- /dev/null
+++ 
b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-graphite.c
@@ -0,0 +1,41 @@
+/* Check offloaded function's attributes and classification for unparallelized
+   OpenACC 'kernels' with Graphite kernles handling (default).  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fno-openacc-kernels-annotate-loops" }
+   { dg-additional-options "-fopt-info-optimized-omp" }
+   { dg-additional-options "-fopt-info-note-omp" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-graphite-details" }
+   { dg-additional-options "-fdump-tree-oaccloops1" }
+  

[OG11][committed][PATCH 12/22] openacc: Remove unused partitioning in "kernels" regions

2021-11-17 Thread Frederik Harwath
With the old "kernels" handling, unparallelized regions would
get executed with 1x1x1 partitioning even if the user provided
explicit num_gangs, num_workers clauses etc.

This commit restores this behavior by removing unused partitioning
after assigning the parallelism dimensions to loops.

gcc/ChangeLog:

* omp-offload.c (oacc_remove_unused_partitioning): New function
for removing partitioning that is not used by any loop.
(oacc_validate_dims): Call oacc_remove_unused_partitioning and
enable warnings about unused partitioning.

libgomp/ChangeLog:

* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Adjust
expectations.
---
 gcc/omp-offload.c | 51 +--
 .../acc_prof-kernels-1.c  | 19 ---
 2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index f5cb222efd8c..68cc5a9d9e5d 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -1215,6 +1215,39 @@ oacc_parse_default_dims (const char *dims)
   targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
 }

+/* Remove parallelism dimensions below LEVEL which are not set in USED
+   from DIMS and emit a warning pointing to the location of FN. */
+
+static void
+oacc_remove_unused_partitioning (tree fn, int *dims, int level, unsigned used)
+{
+
+  bool host_compiler = true;
+#ifdef ACCEL_COMPILER
+  host_compiler = false;
+#endif
+
+  static char const *const axes[] =
+  /* Must be kept in sync with GOMP_DIM enumeration.  */
+  { "gang", "worker", "vector" };
+
+  char removed_partitions[20] = "\0";
+  for (int ix = level >= 0 ? level : 0; ix != GOMP_DIM_MAX; ix++)
+if (!(used & GOMP_DIM_MASK (ix)) && dims[ix] >= 0)
+  {
+if (host_compiler)
+  {
+strcat (removed_partitions, axes[ix]);
+strcat (removed_partitions, " ");
+  }
+dims[ix] = -1;
+  }
+  if (removed_partitions[0] != '\0')
+warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
+"removed %spartitioning from % region",
+removed_partitions);
+}
+
 /* Validate and update the dimensions for offloaded FN.  ATTRS is the
raw attribute.  DIMS is an array of dimensions, which is filled in.
LEVEL is the partitioning level of a routine, or -1 for an offload
@@ -1235,6 +1268,7 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int 
level, unsigned used)
   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
 {
   purpose[ix] = TREE_PURPOSE (pos);
+
   tree val = TREE_VALUE (pos);
   dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
   pos = TREE_CHAIN (pos);
@@ -1244,14 +1278,15 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int 
level, unsigned used)
 #ifdef ACCEL_COMPILER
   check = false;
 #endif
+
+  static char const *const axes[] =
+  /* Must be kept in sync with GOMP_DIM enumeration.  */
+  { "gang", "worker", "vector" };
+
   if (check
   && warn_openacc_parallelism
-  && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn))
-  && !lookup_attribute ("oacc parallel_kernels_graphite", DECL_ATTRIBUTES 
(fn)))
+  && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)))
 {
-  static char const *const axes[] =
-  /* Must be kept in sync with GOMP_DIM enumeration.  */
-   { "gang", "worker", "vector" };
   for (ix = level >= 0 ? level : 0; ix != GOMP_DIM_MAX; ix++)
if (dims[ix] < 0)
  ; /* Defaulting axis.  */
@@ -1262,14 +1297,20 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int 
level, unsigned used)
  "region contains %s partitioned code but"
  " is not %s partitioned", axes[ix], axes[ix]);
else if (!(used & GOMP_DIM_MASK (ix)) && dims[ix] != 1)
+ {
  /* The dimension is explicitly partitioned to non-unity, but
 no use is made within the region.  */
  warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
  "region is %s partitioned but"
  " does not contain %s partitioned code",
  axes[ix], axes[ix]);
+  }
 }

+  if (lookup_attribute ("oacc parallel_kernels_graphite",
+ DECL_ATTRIBUTES (fn)))
+oacc_remove_unused_partitioning  (fn, dims, level, used);
+
   bool changed = targetm.goacc.validate_dims (fn, dims, level, used);

   /* Default anything left to 1 or a partitioned default.  */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
index 4a9b11a3d3fe..d398b3463617 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
@@ -7,6 +7,8 @@

 #include 

+/* { dg-skip-if "'kernels' not analyzed by Graphite at -O0" { *-*-* } { "-O0" 
} { "" } } 

[OG11][committed][PATCH 10/22] openacc: Add "can_be_parallel" flag info to "graph" dumps

2021-11-17 Thread Frederik Harwath
gcc/ChangeLog:

* graph.c (oacc_get_fn_attrib): New declaration.
(find_loop_location): New declaration.
(draw_cfg_nodes_for_loop): Print value of the
can_be_parallel flag at the top of loops in OpenACC
functions.
---
 gcc/graph.c | 35 ---
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/gcc/graph.c b/gcc/graph.c
index ce8de33ffe10..3ad07be3b309 100644
--- a/gcc/graph.c
+++ b/gcc/graph.c
@@ -191,6 +191,10 @@ draw_cfg_nodes_no_loops (pretty_printer *pp, struct 
function *fun)
 }
 }

+
+extern tree oacc_get_fn_attrib (tree);
+extern dump_user_location_t find_loop_location (class loop *);
+
 /* Draw all the basic blocks in LOOP.  Print the blocks in breath-first
order to get a good ranking of the nodes.  This function is recursive:
It first prints inner loops, then the body of LOOP itself.  */
@@ -205,17 +209,26 @@ draw_cfg_nodes_for_loop (pretty_printer *pp, int 
funcdef_no,

   if (loop->header != NULL
   && loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun))
-pp_printf (pp,
-  "\tsubgraph cluster_%d_%d {\n"
-  "\tstyle=\"filled\";\n"
-  "\tcolor=\"darkgreen\";\n"
-  "\tfillcolor=\"%s\";\n"
-  "\tlabel=\"loop %d\";\n"
-  "\tlabeljust=l;\n"
-  "\tpenwidth=2;\n",
-  funcdef_no, loop->num,
-  fillcolors[(loop_depth (loop) - 1) % 3],
-  loop->num);
+{
+  pp_printf (pp,
+ "\tsubgraph cluster_%d_%d {\n"
+ "\tstyle=\"filled\";\n"
+ "\tcolor=\"darkgreen\";\n"
+ "\tfillcolor=\"%s\";\n"
+ "\tlabel=\"loop %d %s\";\n"
+ "\tlabeljust=l;\n"
+ "\tpenwidth=2;\n",
+ funcdef_no, loop->num,
+ fillcolors[(loop_depth (loop) - 1) % 3], loop->num,
+ /* This is only meaningful for loops that have been processed
+by Graphite.
+
+TODO Use can_be_parallel_valid_p? */
+ !oacc_get_fn_attrib (cfun->decl)
+ ? ""
+ : loop->can_be_parallel ? "(can_be_parallel = true)"
+ : "(can_be_parallel = false)");
+}

   for (class loop *inner = loop->inner; inner; inner = inner->next)
 draw_cfg_nodes_for_loop (pp, funcdef_no, inner);
--
2.33.0

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[OG11][committed][PATCH 08/22] graphite: Add runtime alias checking

2021-11-17 Thread Frederik Harwath
Graphite rejects a SCoP if it contains a pair of data references for
which it cannot determine statically if they may alias. This happens
very often, for instance in C code which does not use explicit
"restrict".  This commit adds the possibility to analyze a SCoP
nevertheless and perform an alias check at runtime.  Then, if aliasing
is detected, the execution will fall back to the unoptimized SCoP.

TODO This needs more testing on non-OpenACC code.

gcc/ChangeLog:

* common.opt: Add fgraphite-runtime-alias-checks.
* graphite-isl-ast-to-gimple.c
(generate_alias_cond): New function.
(graphite_regenerate_ast_isl): Use from here.
* graphite-poly.c (new_scop): Create unhandled_alias_ddrs vec ...
(free_scop): and release here.
* graphite-scop-detection.c (dr_defs_outside_region): New function.
(dr_well_analyzed_for_runtime_alias_check_p): New function.
(graphite_runtime_alias_check_p): New function.
(build_alias_set): Record unhandled alias ddrs for later alias check
creation if flag_graphite_runtime_alias_checks is true instead
of failing.
* graphite.h (struct scop): Add field unhandled_alias_ddrs.
* sese.h (has_operands_from_region_p): New function.
gcc/testsuite/ChangeLog:

* gcc.dg/graphite/alias-1.c: New test.
---
 gcc/common.opt  |   4 +
 gcc/graphite-isl-ast-to-gimple.c|  60 ++
 gcc/graphite-poly.c |   2 +
 gcc/graphite-scop-detection.c   | 239 +---
 gcc/graphite.h  |   4 +
 gcc/sese.h  |  18 ++
 gcc/testsuite/gcc.dg/graphite/alias-1.c |  22 +++
 7 files changed, 326 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-1.c

diff --git a/gcc/common.opt b/gcc/common.opt
index 771398bc03de..aa695e56dc48 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1636,6 +1636,10 @@ fgraphite-identity
 Common Var(flag_graphite_identity) Optimization
 Enable Graphite Identity transformation.

+fgraphite-runtime-alias-checks
+Common Var(flag_graphite_runtime_alias_checks) Optimization Init(1)
+Allow Graphite to add runtime alias checks to loop-nests if aliasing cannot be 
resolved statically.
+
 fhoist-adjacent-loads
 Common Var(flag_hoist_adjacent_loads) Optimization
 Enable hoisting adjacent loads to encourage generating conditional move
diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c
index 44c06016f1a2..caa0160b9bce 100644
--- a/gcc/graphite-isl-ast-to-gimple.c
+++ b/gcc/graphite-isl-ast-to-gimple.c
@@ -1456,6 +1456,34 @@ generate_entry_out_of_ssa_copies (edge false_entry,
 }
 }

+/* Create a condition that evaluates to TRUE if all ALIAS_DDRS are free of
+   aliasing. */
+
+static tree
+generate_alias_cond (vec _ddrs, loop_p context_loop)
+{
+  gcc_checking_assert (flag_graphite_runtime_alias_checks
+   && alias_ddrs.length () > 0);
+  gcc_checking_assert (context_loop);
+
+  auto_vec check_pairs;
+  compute_alias_check_pairs (context_loop, _ddrs, _pairs);
+  gcc_checking_assert (check_pairs.length () > 0);
+
+  tree alias_cond = NULL_TREE;
+  create_runtime_alias_checks (context_loop, _pairs, _cond);
+  gcc_checking_assert (alias_cond);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+{
+  fprintf (dump_file, "Generated runtime alias check: ");
+  print_generic_expr (dump_file, alias_cond, dump_flags);
+  fprintf (dump_file, "\n");
+}
+
+  return alias_cond;
+}
+
 /* GIMPLE Loop Generator: generates loops in GIMPLE form for the given SCOP.
Return true if code generation succeeded.  */

@@ -1496,12 +1524,44 @@ graphite_regenerate_ast_isl (scop_p scop)
   region->if_region = if_region;

   loop_p context_loop = region->region.entry->src->loop_father;
+  gcc_checking_assert (context_loop);
   edge e = single_succ_edge (if_region->true_region->region.entry->dest);
   basic_block bb = split_edge (e);

   /* Update the true_region exit edge.  */
   region->if_region->true_region->region.exit = single_succ_edge (bb);

+  if (flag_graphite_runtime_alias_checks
+  && scop->unhandled_alias_ddrs.length () > 0)
+{
+  /* SCoP detection has failed to handle the aliasing between some data
+references of the SCoP statically. Generate an alias check that selects
+the newly generated version of the SCoP in the true-branch of the
+conditional if aliasing can be ruled out at runtime and the original
+version of the SCoP, otherwise. */
+
+  loop_p loop
+  = find_common_loop (scop->scop_info->region.entry->dest->loop_father,
+  scop->scop_info->region.exit->src->loop_father);
+  tree cond = generate_alias_cond (scop->unhandled_alias_ddrs, loop);
+  tree non_alias_cond = build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
+  set_ifsese_condition (region->if_region, 

[OG11][committed][PATCH 07/22] Move compute_alias_check_pairs to tree-data-ref.c

2021-11-17 Thread Frederik Harwath
Move this function from tree-loop-distribution.c to tree-data-ref.c
and make it non-static to enable its use from other parts of GCC.

gcc/ChangeLog:
* tree-loop-distribution.c (data_ref_segment_size): Remove function.
(latch_dominated_by_data_ref): Likewise.
(compute_alias_check_pairs): Likewise.

* tree-data-ref.c (data_ref_segment_size): New function,
copied from tree-loop-distribution.c
(compute_alias_check_pairs): Likewise.
(latch_dominated_by_data_ref): Likewise.

* tree-data-ref.h (compute_alias_check_pairs): New declaration.
---
 gcc/tree-data-ref.c  | 87 
 gcc/tree-data-ref.h  |  3 ++
 gcc/tree-loop-distribution.c | 87 
 3 files changed, 90 insertions(+), 87 deletions(-)

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index d04e95f7c285..71f8d790e618 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -2645,6 +2645,93 @@ create_intersect_range_checks (class loop *loop, tree 
*cond_expr,
 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
 }

+/* Compute and return an expression whose value is the segment length which
+   will be accessed by DR in NITERS iterations.  */
+
+static tree
+data_ref_segment_size (struct data_reference *dr, tree niters)
+{
+  niters = size_binop (MINUS_EXPR,
+  fold_convert (sizetype, niters),
+  size_one_node);
+  return size_binop (MULT_EXPR,
+fold_convert (sizetype, DR_STEP (dr)),
+fold_convert (sizetype, niters));
+}
+
+/* Return true if LOOP's latch is dominated by statement for data reference
+   DR.  */
+
+static inline bool
+latch_dominated_by_data_ref (class loop *loop, data_reference *dr)
+{
+  return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
+gimple_bb (DR_STMT (dr)));
+}
+
+/* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's
+   data dependence relations ALIAS_DDRS.  */
+
+void
+compute_alias_check_pairs (class loop *loop, vec *alias_ddrs,
+  vec *comp_alias_pairs)
+{
+  unsigned int i;
+  unsigned HOST_WIDE_INT factor = 1;
+  tree niters_plus_one, niters = number_of_latch_executions (loop);
+
+  gcc_assert (niters != NULL_TREE && niters != chrec_dont_know);
+  niters = fold_convert (sizetype, niters);
+  niters_plus_one = size_binop (PLUS_EXPR, niters, size_one_node);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+fprintf (dump_file, "Creating alias check pairs:\n");
+
+  /* Iterate all data dependence relations and compute alias check pairs.  */
+  for (i = 0; i < alias_ddrs->length (); i++)
+{
+  ddr_p ddr = (*alias_ddrs)[i];
+  struct data_reference *dr_a = DDR_A (ddr);
+  struct data_reference *dr_b = DDR_B (ddr);
+  tree seg_length_a, seg_length_b;
+
+  if (latch_dominated_by_data_ref (loop, dr_a))
+   seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
+  else
+   seg_length_a = data_ref_segment_size (dr_a, niters);
+
+  if (latch_dominated_by_data_ref (loop, dr_b))
+   seg_length_b = data_ref_segment_size (dr_b, niters_plus_one);
+  else
+   seg_length_b = data_ref_segment_size (dr_b, niters);
+
+  unsigned HOST_WIDE_INT access_size_a
+   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a;
+  unsigned HOST_WIDE_INT access_size_b
+   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b;
+  unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a)));
+  unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b)));
+
+  dr_with_seg_len_pair_t dr_with_seg_len_pair
+   (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
+dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b),
+/* ??? Would WELL_ORDERED be safe?  */
+dr_with_seg_len_pair_t::REORDERED);
+
+  comp_alias_pairs->safe_push (dr_with_seg_len_pair);
+}
+
+  if (tree_fits_uhwi_p (niters))
+factor = tree_to_uhwi (niters);
+
+  /* Prune alias check pairs.  */
+  prune_runtime_alias_test_list (comp_alias_pairs, factor);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+fprintf (dump_file,
+"Improved number of alias checks from %d to %d\n",
+alias_ddrs->length (), comp_alias_pairs->length ());
+}
+
 /* Create a conditional expression that represents the run-time checks for
overlapping of address ranges represented by a list of data references
pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index 8001cc54f518..5016ec926b1d 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -577,6 +577,9 @@ extern opt_result runtime_alias_check_p (ddr_p, class loop 
*, bool);
 extern int data_ref_compare_tree (tree, tree);
 extern void prune_runtime_alias_test_list (vec 

[OG11][committed][PATCH 05/22] graphite: Fix minor mistakes in comments

2021-11-17 Thread Frederik Harwath
gcc/ChangeLog:

* graphite-sese-to-poly.c (build_poly_sr_1): Fix a typo and
  a reference to a variable which does not exist.
* graphite-isl-ast-to-gimple.c (gsi_insert_earliest): Fix typo
  in comment.
---
 gcc/graphite-isl-ast-to-gimple.c | 2 +-
 gcc/graphite-sese-to-poly.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c
index c202213f39b3..44c06016f1a2 100644
--- a/gcc/graphite-isl-ast-to-gimple.c
+++ b/gcc/graphite-isl-ast-to-gimple.c
@@ -1018,7 +1018,7 @@ gsi_insert_earliest (gimple_seq seq)
   basic_block begin_bb = get_entry_bb (codegen_region);

   /* Inserting the gimple statements in a vector because gimple_seq behave
- in strage ways when inserting the stmts from it into different basic
+ in strange ways when inserting the stmts from it into different basic
  blocks one at a time.  */
   auto_vec stmts;
   for (gimple_stmt_iterator gsi = gsi_start (seq); !gsi_end_p (gsi);
diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index 195851cb540a..12fa2d669b3c 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -644,14 +644,14 @@ build_poly_sr_1 (poly_bb_p pbb, gimple *stmt, tree var, 
enum poly_dr_type kind,
 isl_map *acc, isl_set *subscript_sizes)
 {
   scop_p scop = PBB_SCOP (pbb);
-  /* Each scalar variables has a unique alias set number starting from
+  /* Each scalar variable has a unique alias set number starting from
  the maximum alias set assigned to a dr.  */
   int alias_set = scop->max_alias_set + SSA_NAME_VERSION (var);
   subscript_sizes = isl_set_fix_si (subscript_sizes, isl_dim_set, 0,
alias_set);

   /* Add a constrain to the ACCESSES polyhedron for the alias set of
- data reference DR.  */
+ the reference */
   isl_constraint *c
 = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space 
(acc)));
   c = isl_constraint_set_constant_si (c, -alias_set);
--
2.33.0

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[OG11][committed][PATCH 04/22] graphite: Rename isl_id_for_ssa_name

2021-11-17 Thread Frederik Harwath
The SSA names for which this function gets used are always SCoP
parameters and hence "isl_id_for_parameter" is a better name.  It also
explains the prefix "P_" for those names in the ISL representation.

gcc/ChangeLog:

* graphite-sese-to-poly.c (isl_id_for_ssa_name): Rename to ...
  (isl_id_for_parameter): ... this new function name.
  (build_scop_context): Adjust function use.
---
 gcc/graphite-sese-to-poly.c | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index eebf2e02cfca..195851cb540a 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -100,14 +100,15 @@ extract_affine_mul (scop_p s, tree e, __isl_take 
isl_space *space)
   return isl_pw_aff_mul (lhs, rhs);
 }

-/* Return an isl identifier from the name of the ssa_name E.  */
+/* Return an isl identifier for the parameter P.  */

 static isl_id *
-isl_id_for_ssa_name (scop_p s, tree e)
+isl_id_for_parameter (scop_p s, tree p)
 {
-  char name1[14];
-  snprintf (name1, sizeof (name1), "P_%d", SSA_NAME_VERSION (e));
-  return isl_id_alloc (s->isl_context, name1, e);
+  gcc_checking_assert (TREE_CODE (p) == SSA_NAME);
+  char name[14];
+  snprintf (name, sizeof (name), "P_%d", SSA_NAME_VERSION (p));
+  return isl_id_alloc (s->isl_context, name, p);
 }

 /* Return an isl identifier for the data reference DR.  Data references and
@@ -893,15 +894,15 @@ build_scop_context (scop_p scop)
   isl_space *space = isl_space_set_alloc (scop->isl_context, nbp, 0);

   unsigned i;
-  tree e;
-  FOR_EACH_VEC_ELT (region->params, i, e)
+  tree p;
+  FOR_EACH_VEC_ELT (region->params, i, p)
 space = isl_space_set_dim_id (space, isl_dim_param, i,
-  isl_id_for_ssa_name (scop, e));
+  isl_id_for_parameter (scop, p));

   scop->param_context = isl_set_universe (space);

-  FOR_EACH_VEC_ELT (region->params, i, e)
-add_param_constraints (scop, i, e);
+  FOR_EACH_VEC_ELT (region->params, i, p)
+add_param_constraints (scop, i, p);
 }

 /* Return true when loop A is nested in loop B.  */
--
2.33.0

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[OG11][committed][PATCH 02/22] openacc: Move pass_oacc_device_lower after pass_graphite

2021-11-17 Thread Frederik Harwath
The OpenACC device lowering pass must run after the Graphite pass to
allow for the use of Graphite for automatic parallelization of kernels
regions in the future. Experimentation has shown that it is best,
performancewise, to run pass_oacc_device_lower together with the
related passes pass_oacc_loop_designation and pass_oacc_gimple_workers
early after pass_graphite in pass_tree_loop, at least if the other
tree loop passes are not adjusted. In particular, to enable
vectorization which is crucial for GCN offloading, device lowering
should happen before pass_vectorize. To bring the loops contained in
the offloading functions into the shape expected by the loop
vectorizer, we have to make sure that some passes that previously were
executed only once before pass_tree_loop are also executed on the
offloading functions.  To ensure the execution of
pass_oacc_device_lower if pass_tree_loop does not execute (no loops,
no optimizations), we introduce two further copies of the pass to the
pipeline that run if there are no loops or if no optimization is
performed.

gcc/ChangeLog:

* omp-general.c (oacc_get_fn_dim_size): Return 0 on
missing "dims".
* omp-offload.c (pass_oacc_loop_designation::clone): New
member function.
(pass_oacc_gimple_workers::clone): Likewise.
(pass_oacc_gimple_device_lower::clone): Likewise.
* passes.c (pass_data_no_loop_optimizations): New pass_data.
(class pass_no_loop_optimizations): New pass.
(make_pass_no_loop_optimizations): New function.
* passes.def: Move pass_oacc_{loop_designation,
gimple_workers, device_lower} into tree_loop, and add
copies to pass_tree_no_loop and to new
pass_no_loop_optimizations.  Add copies of passes pass_ccp,
pass_ipa_warn, pass_complete_unrolli, pass_backprop,
pass_phiprop, pass_fix_loops after the OpenACC passes
in pass_tree_loop.
* tree-ssa-loop-ivcanon.c (pass_complete_unroll::clone):
New member function.
(pass_complete_unrolli::clone): Likewise.
* tree-ssa-loop.c (pass_fix_loops::clone): Likewise.
(pass_tree_loop_init::clone): Likewise.
(pass_tree_loop_done::clone): Likewise.
* tree-ssa-phiprop.c (pass_phiprop::clone): Likewise.

libgomp/ChangeLog:

* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: Adjust
expected output to pass name changes due to the pass
reordering and cloning.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c: Likewise
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c: Likewise.

gcc/testsuite/ChangeLog:

* gcc.dg/goacc/loop-processing-1.c: Adjust expected output
* to pass name changes due to the pass reordering and cloning.
* c-c++-common/goacc/classify-kernels-unparallelized.c: Likewise.
* c-c++-common/goacc/classify-kernels.c: Likewise.
* c-c++-common/goacc/classify-parallel.c: Likewise.
* c-c++-common/goacc/classify-routine.c: Likewise.
* c-c++-common/goacc/routine-nohost-1.c: Likewise.
* c-c++-common/unroll-1.c: Likewise.
* c-c++-common/unroll-4.c: Likewise.
* gcc.dg/goacc/loop-processing-1.c: Likewise.
* gcc.dg/tree-ssa/backprop-1.c: Likewise.
* gcc.dg/tree-ssa/backprop-2.c: Likewise.
* gcc.dg/tree-ssa/backprop-3.c: Likewise.
* gcc.dg/tree-ssa/backprop-4.c: Likewise.
* gcc.dg/tree-ssa/backprop-5.c: Likewise.
* gcc.dg/tree-ssa/backprop-6.c: Likewise.
* gcc.dg/tree-ssa/cunroll-1.c: Likewise.
* gcc.dg/tree-ssa/cunroll-3.c: Likewise.
* gcc.dg/tree-ssa/cunroll-9.c: Likewise.
* gcc.dg/tree-ssa/ldist-17.c: Likewise.
* gcc.dg/tree-ssa/loop-38.c: Likewise.
* gcc.dg/tree-ssa/pr21463.c: Likewise.
* gcc.dg/tree-ssa/pr45427.c: Likewise.
* gcc.dg/tree-ssa/pr61743-1.c: Likewise.
* gcc.dg/unroll-2.c: Likewise.
* gcc.dg/unroll-3.c: Likewise.
* gcc.dg/unroll-4.c: Likewise.
* gcc.dg/unroll-5.c: Likewise.
* gcc.dg/vect/vect-profile-1.c: Likewise.
* c-c++-common/goacc/device-lowering-debug-optimization.c: New test.
* c-c++-common/goacc/device-lowering-no-loops.c: New test.
* c-c++-common/goacc/device-lowering-no-optimization.c: New test.

Co-Authored-By: Thomas Schwinge 
---
 gcc/omp-general.c |  8 +-
 gcc/omp-offload.c |  8 ++
 gcc/passes.c  | 42 
 gcc/passes.def  

[OG11][committed][PATCH 03/22] graphite: Extend SCoP detection dump output

2021-11-17 Thread Frederik Harwath
Extend dump output to make understanding why Graphite rejects to
include a loop in a SCoP easier (for GCC developers).

ChangeLog:

* graphite-scop-detection.c (scop_detection::can_represent_loop):
Output reason for failure to dump file.
(scop_detection::harmful_loop_in_region): Likewise.
(scop_detection::graphite_can_represent_expr): Likewise.
(scop_detection::stmt_has_simple_data_refs_p): Likewise.
(scop_detection::stmt_simple_for_scop_p): Likewise.
(print_sese_loop_numbers): New function.
(scop_detection::add_scop): Use from here to print loops in
rejected SCoP.
---
 gcc/graphite-scop-detection.c | 188 +-
 1 file changed, 165 insertions(+), 23 deletions(-)

diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
index 3e729b159b09..46c470210d05 100644
--- a/gcc/graphite-scop-detection.c
+++ b/gcc/graphite-scop-detection.c
@@ -69,12 +69,27 @@ public:
 fprintf (output.dump_file, "%d", i);
 return output;
   }
+
   friend debug_printer &
   operator<< (debug_printer , const char *s)
   {
 fprintf (output.dump_file, "%s", s);
 return output;
   }
+
+  friend debug_printer &
+  operator<< (debug_printer , gimple* stmt)
+  {
+print_gimple_stmt (output.dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS);
+return output;
+  }
+
+  friend debug_printer &
+  operator<< (debug_printer , tree t)
+  {
+print_generic_expr (output.dump_file, t, TDF_SLIM);
+return output;
+  }
 } dp;

 #define DEBUG_PRINT(args) do \
@@ -506,6 +521,24 @@ scop_detection::merge_sese (sese_l first, sese_l second) 
const
   return combined;
 }

+/* Print the loop numbers of the loops contained
+   in SESE to FILE. */
+
+static void
+print_sese_loop_numbers (FILE *file, sese_l sese)
+{
+  loop_p loop;
+  bool printed = false;
+  FOR_EACH_LOOP (loop, 0)
+  {
+if (loop_in_sese_p (loop, sese))
+  fprintf (file, "%d, ", loop->num);
+printed = true;
+  }
+  if (printed)
+fprintf (file, "\b\b");
+}
+
 /* Build scop outer->inner if possible.  */

 void
@@ -519,8 +552,13 @@ scop_detection::build_scop_depth (loop_p loop)
   if (! next
  || harmful_loop_in_region (next))
{
- if (s)
-   add_scop (s);
+  if (next)
+DEBUG_PRINT (
+dp << "[scop-detection] Discarding SCoP on loops ";
+print_sese_loop_numbers (dump_file, next);
+dp << " because of harmful loops\n";);
+  if (s)
+add_scop (s);
  build_scop_depth (loop);
  s = invalid_sese;
}
@@ -560,14 +598,62 @@ scop_detection::can_represent_loop (loop_p loop, sese_l 
scop)
   || !single_pred_p (loop->latch)
   || exit->src != single_pred (loop->latch)
   || !empty_block_p (loop->latch))
-return false;
+{
+  DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop shape 
unsupported.\n");
+  return false;
+}
+
+  bool edge_irreducible
+  = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
+  if (edge_irreducible)
+{
+  DEBUG_PRINT (
+  dp << "[can_represent_loop-fail] Loop is not a natural loop.\n");
+  return false;
+}
+
+  bool niter_is_unconditional = number_of_iterations_exit (loop,
+  single_exit (loop),
+  _desc, false);

-  return !(loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
-&& number_of_iterations_exit (loop, single_exit (loop), _desc, false)
-&& niter_desc.control.no_overflow
-&& (niter = number_of_latch_executions (loop))
-&& !chrec_contains_undetermined (niter)
-&& graphite_can_represent_expr (scop, loop, niter);
+  if (!niter_is_unconditional)
+{
+  DEBUG_PRINT (
+  dp << "[can_represent_loop-fail] Loop niter not unconditional.\n"
+ << "Condition: " << niter_desc.assumptions << "\n");
+  return false;
+}
+
+  niter = number_of_latch_executions (loop);
+  if (!niter)
+{
+  DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter unknown.\n");
+  return false;
+}
+  if (!niter_desc.control.no_overflow)
+{
+  DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter can 
overflow.\n");
+  return false;
+}
+
+  bool undetermined_coefficients = chrec_contains_undetermined (niter);
+  if (undetermined_coefficients)
+{
+  DEBUG_PRINT (dp << "[can_represent_loop-fail] "
+  << "Loop niter chrec contains undetermined coefficients.\n");
+  return false;
+}
+
+  bool can_represent_expr = graphite_can_represent_expr (scop, loop, niter);
+  if (!can_represent_expr)
+{
+  DEBUG_PRINT (dp << "[can_represent_loop-fail] "
+  << "Loop niter expression cannot be represented: "
+  << niter << "\n");
+  return false;
+}
+
+  return true;
 }

 /* Return true 

[OG11][committed][PATCH 01/22] Fortran: delinearize multi-dimensional array accesses

2021-11-17 Thread Frederik Harwath
From: Sandra Loosemore 

The Fortran front end presently linearizes accesses to
multi-dimensional arrays by combining the indices for the various
dimensions into a series of explicit multiplies and adds with
refactoring to allow CSE of invariant parts of the computation.
Unfortunately this representation interferes with Graphite-based loop
optimizations.  It is difficult to recover the original
multi-dimensional form of the access by the time loop optimizations
run because parts of it have already been optimized away or into a
form that is not easily recognizable, so it seems better to have the
Fortran front end produce delinearized accesses to begin with, a set
of nested ARRAY_REFs similar to the existing behavior of the C and C++
front ends.  This is a long-standing problem that has previously been
discussed e.g. in PR 14741 and PR61000.

This patch is an initial implementation for explicit array accesses
only; it doesn't handle the accesses generated during scalarization of
whole-array or array-section operations, which follow a different code
path.

gcc/
* expr.c (get_inner_reference): Handle NOP_EXPR like
VIEW_CONVERT_EXPR.

gcc/fortran/
* lang.opt (-param=delinearize=): New.
* trans-array.c (get_class_array_vptr): New, split from...
(build_array_ref): ...here.
(get_array_lbound, get_array_ubound): New, split from...
(gfc_conv_array_ref): ...here.  Additional code refactoring
plus support for delinearization of the array access.

gcc/testsuite/
* gfortran.dg/assumed_type_2.f90: Adjust patterns.
* gfortran.dg/goacc/kernels-loop-inner.f95: Likewise.
* gfortran.dg/graphite/block-3.f90: Remove xfails.
* gfortran.dg/graphite/block-4.f90: Likewise.
* gfortran.dg/inline_matmul_24.f90: Adjust patterns.
* gfortran.dg/no_arg_check_2.f90: Likewise.
* gfortran.dg/pr32921.f: Likewise.
* gfortran.dg/reassoc_4.f: Disable delinearization for this test.

Co-Authored-By: Tobias Burnus  
---
 gcc/expr.c|   1 +
 gcc/fortran/lang.opt  |   4 +
 gcc/fortran/trans-array.c | 321 +-
 gcc/testsuite/gfortran.dg/assumed_type_2.f90  |   6 +-
 .../gfortran.dg/goacc/kernels-loop-inner.f95  |   2 +-
 gcc/testsuite/gfortran.dg/graphite/block-2.f  |   9 +-
 .../gfortran.dg/graphite/block-3.f90  |   1 -
 .../gfortran.dg/graphite/block-4.f90  |   1 -
 gcc/testsuite/gfortran.dg/graphite/id-9.f |   2 +-
 .../gfortran.dg/inline_matmul_24.f90  |   2 +-
 gcc/testsuite/gfortran.dg/no_arg_check_2.f90  |   6 +-
 gcc/testsuite/gfortran.dg/pr32921.f   |   2 +-
 gcc/testsuite/gfortran.dg/reassoc_4.f |   2 +-
 13 files changed, 264 insertions(+), 95 deletions(-)

diff --git a/gcc/expr.c b/gcc/expr.c
index 21b7e96ed62e..c7ee800c4d4f 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -7539,6 +7539,7 @@ get_inner_reference (tree exp, poly_int64_pod *pbitsize,
  break;

case VIEW_CONVERT_EXPR:
+   case NOP_EXPR:
  break;

case MEM_REF:
diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt
index dba333448c11..1548d56278a4 100644
--- a/gcc/fortran/lang.opt
+++ b/gcc/fortran/lang.opt
@@ -521,6 +521,10 @@ fdefault-real-16
 Fortran Var(flag_default_real_16)
 Set the default real kind to an 16 byte wide type.

+-param=delinearize=
+Common Joined UInteger Var(flag_delinearize_aref) Init(1) IntegerRange(0,1) 
Param Optimization
+Delinearize array references.
+
 fdollar-ok
 Fortran Var(flag_dollar_ok)
 Allow dollar signs in entity names.
diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index b7d949929722..3eb9a1778173 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -3747,11 +3747,9 @@ add_to_offset (tree *cst_offset, tree *offset, tree t)
 }
 }

-
 static tree
-build_array_ref (tree desc, tree offset, tree decl, tree vptr)
+get_class_array_vptr (tree desc, tree vptr)
 {
-  tree tmp;
   tree type;
   tree cdesc;

@@ -3775,19 +3773,74 @@ build_array_ref (tree desc, tree offset, tree decl, 
tree vptr)
  && GFC_CLASS_TYPE_P (TYPE_CANONICAL (type)))
vptr = gfc_class_vptr_get (TREE_OPERAND (cdesc, 0));
 }
+  return vptr;
+}

+static tree
+build_array_ref (tree desc, tree offset, tree decl, tree vptr)
+{
+  tree tmp;
+  vptr = get_class_array_vptr (desc, vptr);
   tmp = gfc_conv_array_data (desc);
   tmp = build_fold_indirect_ref_loc (input_location, tmp);
   tmp = gfc_build_array_ref (tmp, offset, decl, vptr);
   return tmp;
 }

+/* Get the declared lower bound for rank N of array DECL which might
+   be either a bare array or a descriptor.  This differs from
+   gfc_conv_array_lbound because it gets information for temporary array
+   objects from AR instead of the descriptor (they can differ).  */
+
+static tree
+get_array_lbound (tree decl, int n, gfc_symbol *sym,
+   

[OG11][committed][PATCH 00/22] OpenACC "kernels" Improvements

2021-11-17 Thread Frederik Harwath
Hi,

this patch series implements the re-work of the OpenACC "kernels"
implementation that has been announced at the GNU Tools Track of this
year's Linux Plumbers Conference; see
https://linuxplumbersconf.org/event/11/contributions/998/.  The
central step is contained in the commit titled "openacc: Use Graphite
for dependence analysis in \"kernels\" regions" whose commit message
also contains further explanations.

Best regards,
Frederik

PS: The commit series also includes a backport from master
"00b98b6cac25 Add dg-final option-based target selectors" and two
trivial unrelated commits "fa558c2a6664 Fix gimple_debug_cfg
declaration" and "35cdc94463fe Fix branch prediction dump message"



Andrew Stubbs (2):
  openacc: Add data optimization pass
  openacc: Add runtime alias checking for OpenACC kernels

Frederik Harwath (19):
  openacc: Move pass_oacc_device_lower after pass_graphite
  graphite: Extend SCoP detection dump output
  graphite: Rename isl_id_for_ssa_name
  graphite: Fix minor mistakes in comments
  Fix branch prediction dump message
  Move compute_alias_check_pairs to tree-data-ref.c
  graphite: Add runtime alias checking
  openacc: Use Graphite for dependence analysis in "kernels" regions
  openacc: Add "can_be_parallel" flag info to "graph" dumps
  openacc: Add further kernels tests
  openacc: Remove unused partitioning in "kernels" regions
  Add function for printing a single OMP_CLAUSE
  openacc: Warn about "independent" "kernels" loops with
data-dependences
  openacc: Handle internal function calls in pass_lim
  openacc: Disable pass_pre on outlined functions analyzed by Graphite
  graphite: Tune parameters for OpenACC use
  graphite: Adjust scop loop-nest choice
  graphite: Accept loops without data references
  openacc: Adjust test expectations to new "kernels" handling

Sandra Loosemore (1):
  Fortran: delinearize multi-dimensional array accesses

 gcc/Makefile.in   |2 +
 gcc/cfgloop.c |1 +
 gcc/cfgloop.h |6 +
 gcc/cfgloopmanip.c|1 +
 gcc/common.opt|9 +
 gcc/config/nvptx/nvptx.c  |7 +
 gcc/doc/gimple.texi   |2 +
 gcc/doc/invoke.texi   |   20 +-
 gcc/doc/passes.texi   |6 +-
 gcc/expr.c|1 +
 gcc/flag-types.h  |1 +
 gcc/fortran/lang.opt  |4 +
 gcc/fortran/trans-array.c |  321 --
 gcc/gimple-loop-interchange.cc|2 +-
 gcc/gimple-pretty-print.c |3 +
 gcc/gimple-walk.c |   15 +-
 gcc/gimple-walk.h |6 +
 gcc/gimple.h  |7 +-
 gcc/gimplify.c|   13 +-
 gcc/graph.c   |   35 +-
 gcc/graphite-dependences.c|  220 +++-
 gcc/graphite-isl-ast-to-gimple.c  |  271 -
 gcc/graphite-oacc.c   |  689 
 gcc/graphite-oacc.h   |   55 +
 gcc/graphite-optimize-isl.c   |   42 +-
 gcc/graphite-poly.c   |   41 +-
 gcc/graphite-scop-detection.c |  654 +--
 gcc/graphite-sese-to-poly.c   |   90 +-
 gcc/graphite.c|  120 +-
 gcc/graphite.h|   40 +-
 gcc/internal-fn.c |2 +
 gcc/internal-fn.h |4 +-
 gcc/omp-data-optimize.cc  |  951 
 gcc/omp-expand.c  |  110 +-
 gcc/omp-general.c |   23 +-
 gcc/omp-general.h |1 +
 gcc/omp-low.c |  321 +-
 gcc/omp-oacc-kernels-decompose.cc |  145 ++-
 gcc/omp-offload.c | 1001 +
 gcc/omp-offload.h |2 +
 gcc/params.opt|5 +-
 gcc/passes.c  |   42 +
 gcc/passes.def|   47 +-
 gcc/predict.c |2 +-
 gcc/sese.c|   25 +-
 gcc/sese.h|   19 +
 gcc/testsuite/c-c++-common/goacc/acc-icf.c|4 +-
 gcc/testsuite/c-c++-common/goacc/cache-3-1.c  |2 +-
 ...classify-kernels-unparallelized-graphite.c |   41 +
 ...lassify-kernels-unparallelized-parloops.c} |   12 +-
 .../c-c++-common/goacc/classify-kernels.c |   27 +-
 .../c-c++-common/goacc/classify-parallel.c|8 +-
 .../c-c++-common/goacc/classify-routine.c |8 +-
 

Re: [PATCH] libcpp: Fix up handling of block comments in -fdirectives-only mode [PR103130]

2021-11-17 Thread Marek Polacek via Gcc-patches
On Wed, Nov 17, 2021 at 10:22:32AM +0100, Jakub Jelinek wrote:
> Hi!
> 
> Normal preprocessing, -fdirectives-only preprocessing before the Nathan's
> rewrite, and all other compilers I've tried on godbolt treat even \*/
> as end of a block comment, but the new -fdirectives-only handling doesn't.
> 
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?

OK for trunk and 11, thanks.
 
> 2021-11-17  Jakub Jelinek  
> 
>   PR preprocessor/103130
>   * lex.c (cpp_directive_only_process): Treat even \*/ as end of block
>   comment.
> 
>   * c-c++-common/cpp/dir-only-9.c: New test.
> 
> --- libcpp/lex.c.jj   2021-11-01 14:37:06.706853026 +0100
> +++ libcpp/lex.c  2021-11-16 16:54:04.022644499 +0100
> @@ -4493,7 +4493,7 @@ cpp_directive_only_process (cpp_reader *
>   break;
>  
> case '*':
> - if (pos > peek && !esc)
> + if (pos > peek)
> star = is_block;
>   esc = false;
>   break;
> --- gcc/testsuite/c-c++-common/cpp/dir-only-9.c.jj2021-11-16 
> 16:56:57.121217975 +0100
> +++ gcc/testsuite/c-c++-common/cpp/dir-only-9.c   2021-11-16 
> 16:56:14.524815094 +0100
> @@ -0,0 +1,13 @@
> +/* PR preprocessor/103130 */
> +/* { dg-do preprocess } */
> +/* { dg-options -fdirectives-only } */
> +
> +/*\
> + * this is a comment
> +\*/
> +
> +int
> +main ()
> +{
> +  return 0;
> +}
> 
>   Jakub
> 

Marek



Re: [PATCH v2] x86: Add -mharden-sls=[none|all|return|indirect-branch]

2021-11-17 Thread Uros Bizjak via Gcc-patches
On Wed, Nov 17, 2021 at 4:35 PM H.J. Lu  wrote:
>
> Add -mharden-sls= to mitigate against straight line speculation (SLS)
> for function return and indirect branch by adding an INT3 instruction
> after function return and indirect branch.
>
> gcc/
>
> PR target/102952
> * config/i386/i386-opts.h (harden_sls): New enum.
> * config/i386/i386.c (output_indirect_thunk): Mitigate against
> SLS for function return.
> (ix86_output_function_return): Likewise.
> (ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
> branch.
> (ix86_output_indirect_jmp): Likewise.
> (ix86_output_call_insn): Likewise.
> * config/i386/i386.opt: Add -mharden-sls=.
> * doc/invoke.texi: Document -mharden-sls=.
>
> gcc/testsuite/
>
> PR target/102952
> * gcc.target/i386/harden-sls-1.c: New test.
> * gcc.target/i386/harden-sls-2.c: Likewise.
> * gcc.target/i386/harden-sls-3.c: Likewise.
> * gcc.target/i386/harden-sls-4.c: Likewise.
> * gcc.target/i386/harden-sls-5.c: Likewise.
> ---
>  gcc/config/i386/i386-opts.h  |  7 ++
>  gcc/config/i386/i386.c   | 23 ++--
>  gcc/config/i386/i386.opt | 20 +
>  gcc/doc/invoke.texi  | 10 -
>  gcc/testsuite/gcc.target/i386/harden-sls-1.c | 14 
>  gcc/testsuite/gcc.target/i386/harden-sls-2.c | 14 
>  gcc/testsuite/gcc.target/i386/harden-sls-3.c | 14 
>  gcc/testsuite/gcc.target/i386/harden-sls-4.c | 16 ++
>  gcc/testsuite/gcc.target/i386/harden-sls-5.c | 17 +++
>  9 files changed, 127 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-5.c
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> index 04e4ad608fb..171d3106d0a 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -121,4 +121,11 @@ enum instrument_return {
>instrument_return_nop5
>  };
>
> +enum harden_sls {
> +  harden_sls_none = 0,
> +  harden_sls_return = 1 << 0,
> +  harden_sls_indirect_branch = 1 << 1,
> +  harden_sls_all = harden_sls_return | harden_sls_indirect_branch
> +};
> +
>  #endif
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 73c4d5115bb..8bbf6ae9875 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -5914,6 +5914,8 @@ output_indirect_thunk (unsigned int regno)
>  }
>
>fputs ("\tret\n", asm_out_file);
> +  if ((ix86_harden_sls & harden_sls_return))
> +fputs ("\tint3\n", asm_out_file);
>  }
>
>  /* Output a funtion with a call and return thunk for indirect branch.
> @@ -15984,6 +15986,8 @@ ix86_output_jmp_thunk_or_indirect (const char 
> *thunk_name, const int regno)
>fprintf (asm_out_file, "\tjmp\t");
>assemble_name (asm_out_file, thunk_name);
>putc ('\n', asm_out_file);
> +  if ((ix86_harden_sls & harden_sls_indirect_branch))
> +   fputs ("\tint3\n", asm_out_file);
>  }
>else
>  output_indirect_thunk (regno);
> @@ -16206,10 +16210,10 @@ ix86_output_indirect_jmp (rtx call_op)
> gcc_unreachable ();
>
>ix86_output_indirect_branch (call_op, "%0", true);
> -  return "";
>  }
>else
> -return "%!jmp\t%A0";
> +output_asm_insn ("%!jmp\t%A0", _op);
> +  return (ix86_harden_sls & harden_sls_indirect_branch) ? "int3" : "";
>  }
>
>  /* Output return instrumentation for current function if needed.  */
> @@ -16277,10 +16281,10 @@ ix86_output_function_return (bool long_p)
>return "";
>  }
>
> -  if (!long_p)
> -return "%!ret";
> -
> -  return "rep%; ret";
> +  if ((ix86_harden_sls & harden_sls_return))
> +long_p = false;

Is the above really needed? This will change "rep ret" to a "[notrack]
ret" when SLS hardening is in effect, with a conditional [notrack]
prefix, even when long ret was requested.

On a related note, "notrack ret" does not assemble for me, the
assembler reports:

notrack.s:1: Error: expecting indirect branch instruction after `notrack'

Can you please clarify the above change?

Uros.

> +  output_asm_insn (long_p ? "rep%; ret" : "%!ret", nullptr);
> +  return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
>  }
>
>  /* Output indirect function return.  RET_OP is the function return
> @@ -16375,7 +16379,12 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
>if (output_indirect_p && !direct_p)
> ix86_output_indirect_branch (call_op, xasm, true);
>else
> -   output_asm_insn (xasm, _op);
> +   {
> + output_asm_insn (xasm, _op);
> + if 

Re: [PATCH] x86: Add -mharden-sls=[none|all|return|indirect-branch]

2021-11-17 Thread H.J. Lu via Gcc-patches
On Wed, Nov 17, 2021 at 6:08 AM Uros Bizjak  wrote:
>
> On Wed, Nov 17, 2021 at 2:46 PM H.J. Lu  wrote:
> >
> > On Wed, Nov 17, 2021 at 1:05 AM Uros Bizjak  wrote:
> > >
> > > On Tue, Nov 16, 2021 at 7:20 PM H.J. Lu via Gcc-patches
> > >  wrote:
> > > >
> > > > Add -mharden-sls= to mitigate against straight line speculation (SLS)
> > > > for function return and indirect branch by adding an INT3 instruction
> > > > after function return and indirect branch.
> > > >
> > > > gcc/
> > > >
> > > > PR target/102952
> > > > * config/i386/i386-opts.h (harden_sls): New enum.
> > > > * config/i386/i386.c (output_indirect_thunk): Mitigate against
> > > > SLS for function return.
> > > > (ix86_output_function_return): Likewise.
> > > > (ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
> > > > branch.
> > > > (ix86_output_indirect_jmp): Likewise.
> > > > (ix86_output_call_insn): Likewise.
> > > > * config/i386/i386.opt: Add -mharden-sls=.
> > > > * doc/invoke.texi: Document -mharden-sls=.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > PR target/102952
> > > > * gcc.target/i386/harden-sls-1.c: New test.
> > > > * gcc.target/i386/harden-sls-2.c: Likewise.
> > > > * gcc.target/i386/harden-sls-3.c: Likewise.
> > > > * gcc.target/i386/harden-sls-4.c: Likewise.
> > > > ---
> > > >  gcc/config/i386/i386-opts.h  |  7 +
> > > >  gcc/config/i386/i386.c   | 30 
> > > >  gcc/config/i386/i386.opt | 20 +
> > > >  gcc/doc/invoke.texi  | 10 ++-
> > > >  gcc/testsuite/gcc.target/i386/harden-sls-1.c | 14 +
> > > >  gcc/testsuite/gcc.target/i386/harden-sls-2.c | 14 +
> > > >  gcc/testsuite/gcc.target/i386/harden-sls-3.c | 14 +
> > > >  gcc/testsuite/gcc.target/i386/harden-sls-4.c | 14 +
> > > >  8 files changed, 116 insertions(+), 7 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-1.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-2.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-3.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-4.c
> > > >
> > > > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> > > > index 04e4ad608fb..171d3106d0a 100644
> > > > --- a/gcc/config/i386/i386-opts.h
> > > > +++ b/gcc/config/i386/i386-opts.h
> > > > @@ -121,4 +121,11 @@ enum instrument_return {
> > > >instrument_return_nop5
> > > >  };
> > > >
> > > > +enum harden_sls {
> > > > +  harden_sls_none = 0,
> > > > +  harden_sls_return = 1 << 0,
> > > > +  harden_sls_indirect_branch = 1 << 1,
> > > > +  harden_sls_all = harden_sls_return | harden_sls_indirect_branch
> > > > +};
> > > > +
> > > >  #endif
> > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > > index cc9f9322fad..0a902d66321 100644
> > > > --- a/gcc/config/i386/i386.c
> > > > +++ b/gcc/config/i386/i386.c
> > > > @@ -5914,6 +5914,8 @@ output_indirect_thunk (unsigned int regno)
> > > >  }
> > > >
> > > >fputs ("\tret\n", asm_out_file);
> > > > +  if ((ix86_harden_sls & harden_sls_return))
> > > > +fputs ("\tint3\n", asm_out_file);
> > > >  }
> > > >
> > > >  /* Output a funtion with a call and return thunk for indirect branch.
> > > > @@ -15987,6 +15989,8 @@ ix86_output_jmp_thunk_or_indirect (const char 
> > > > *thunk_name, const int regno)
> > > >fprintf (asm_out_file, "\tjmp\t");
> > > >assemble_name (asm_out_file, thunk_name);
> > > >putc ('\n', asm_out_file);
> > > > +  if ((ix86_harden_sls & harden_sls_indirect_branch))
> > > > +   fputs ("\tint3\n", asm_out_file);
> > > >  }
> > > >else
> > > >  output_indirect_thunk (regno);
> > > > @@ -16212,10 +16216,14 @@ ix86_output_indirect_jmp (rtx call_op)
> > > > gcc_unreachable ();
> > > >
> > > >ix86_output_indirect_branch (call_op, "%0", true);
> > > > -  return "";
> > > > +  if ((ix86_harden_sls & harden_sls_indirect_branch))
> > > > +   return "int3";
> > > > +  else
> > > > +   return "";
> > > >  }
> > > >else
> > > > -return "%!jmp\t%A0";
> > > > +return ((ix86_harden_sls & harden_sls_indirect_branch)
> > > > +   ? "%!jmp\t%A0\n\tint3" : "%!jmp\t%A0");
> > > >  }
> > >
> > > Just change existing returns to fputs and end function with:
> > >
> > > return (ix86_harden_sls & harden_sls_indirect_branch) ? "int3" : "";
> >
> > But fputs doesn't support %A0.
>
> Sorry for the thinko, output_asm_insn instead of fputs will do the trick.

Fixed in the v2 patch.

Thanks.

> Uros.
>
> >
> > > >  /* Output return instrumentation for current function if needed.  */
> > > > @@ -16283,10 +16291,15 @@ ix86_output_function_return (bool long_p)
> > > >return "";
> > > >  }
> > > >
> > > > -  if 

Re: [AArch64] Enable generation of FRINTNZ instructions

2021-11-17 Thread Richard Sandiford via Gcc-patches
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 4035e061706793849c68ae09bcb2e4b9580ab7b6..62adbc4cb6bbbe0c856f9fbe451aee08f2dea3b5
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -7345,6 +7345,14 @@ (define_insn "despeculate_simpleti"
> (set_attr "speculation_barrier" "true")]
>  )
>  
> +(define_expand "ftrunc2"
> +  [(set (match_operand:VSFDF 0 "register_operand" "=w")
> +(unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
> +   FRINTNZ))]
> +  "TARGET_FRINT && TARGET_FLOAT
> +   && !(VECTOR_MODE_P (mode) && !TARGET_SIMD)"
> +)

Probably just me, but this condition seems quite hard to read.
I think it'd be better to add conditions to the VSFDF definition instead,
a bit like we do for the HF entries in VHSDF_HSDF and VHSDF_DF.  I.e.:

(define_mode_iterator VSFDF [(V2SF "TARGET_SIMD")
 (V4SF "TARGET_SIMD")
 (V2DF "TARGET_SIMD")
 (SF "TARGET_FLOAT")
 (DF "TARGET_FLOAT")])

Then the condition can be "TARGET_FRINT".

Same for the existing aarch64_.

> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 
> bb13c6cce1bf55633760bc14980402f1f0ac1689..fb97d37cecae17cdb6444e7f3391361b214f0712
>  100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -269,6 +269,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
>  DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
>  DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
>  DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
> +DEF_INTERNAL_OPTAB_FN (FTRUNC_INT, ECF_CONST, ftruncint, ftrunc_int)

ftrunc_int should be described in the comment at the top of the file.
E.g.:

  - ftrunc_int: a unary conversion optab that takes and returns values
of the same mode, but internally converts via another mode.  This
second mode is specified using a dummy final function argument.

> diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz.c 
> b/gcc/testsuite/gcc.target/aarch64/frintnz.c
> new file mode 100644
> index 
> ..2e1971f8aa11d8b95f454d03a03e050a3bf96747
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/frintnz.c
> @@ -0,0 +1,88 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=armv8.5-a" } */
> +/* { dg-require-effective-target arm_v8_5a_frintnzx_ok } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +/*
> +** f1:
> +**   ...
> +**   frint32zs0, s0
> +**   ...

Are these functions ever more than just:

f1:
frint32zs0, s0
ret

?  If not, I think we should match that sequence and “defend” the
good codegen.  The problem with ... on both sides is that it's
then not clear why we can rely on register 0 being used.

> +*/
> +float
> +f1 (float x)
> +{
> +  int y = x;
> +  return (float) y;
> +}
> +
> +/*
> +** f2:
> +**   ...
> +**   frint64zs0, s0
> +**   ...
> +*/
> +float
> +f2 (float x)
> +{
> +  long long int y = x;
> +  return (float) y;
> +}
> +
> +/*
> +** f3:
> +**   ...
> +**   frint32zd0, d0
> +**   ...
> +*/
> +double
> +f3 (double x)
> +{
> +  int y = x;
> +  return (double) y;
> +}
> +
> +/*
> +** f4:
> +**   ...
> +**   frint64zd0, d0
> +**   ...
> +*/
> +double
> +f4 (double x)
> +{
> +  long long int y = x;
> +  return (double) y;
> +}
> +
> +float
> +f1_dont (float x)
> +{
> +  unsigned int y = x;
> +  return (float) y;
> +}
> +
> +float
> +f2_dont (float x)
> +{
> +  unsigned long long int y = x;
> +  return (float) y;
> +}
> +
> +double
> +f3_dont (double x)
> +{
> +  unsigned int y = x;
> +  return (double) y;
> +}
> +
> +double
> +f4_dont (double x)
> +{
> +  unsigned long long int y = x;
> +  return (double) y;
> +}
> +
> +/* Make sure the 'dont's don't generate any frintNz.  */
> +/* { dg-final { scan-assembler-times {frint32z} 2 } } */
> +/* { dg-final { scan-assembler-times {frint64z} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c 
> b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
> index 
> 07217064e2ba54fcf4f5edc440e6ec19ddae66e1..3b34dc3ad79f1406a41ec4c00db10347ba1ca2c4
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -ffast-math" } */
> +/* { dg-skip-if "" { arm_v8_5a_frintnzx_ok } } */
>  
>  float
>  f1 (float x)
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 
> 8cbda192fe0fae59ea208ee43696b4d22c43e61e..7fa1659ce734257f3cd96f1e2e50ace4d02dcf51
>  100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -11365,6 +11365,33 @@ proc check_effective_target_arm_v8_3a_bkey_directive 
> { } {
>   }]
>  }
>  
> +# Return 1 if the target supports 

[PATCH v2] x86: Add -mharden-sls=[none|all|return|indirect-branch]

2021-11-17 Thread H.J. Lu via Gcc-patches
Add -mharden-sls= to mitigate against straight line speculation (SLS)
for function return and indirect branch by adding an INT3 instruction
after function return and indirect branch.

gcc/

PR target/102952
* config/i386/i386-opts.h (harden_sls): New enum.
* config/i386/i386.c (output_indirect_thunk): Mitigate against
SLS for function return.
(ix86_output_function_return): Likewise.
(ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
branch.
(ix86_output_indirect_jmp): Likewise.
(ix86_output_call_insn): Likewise.
* config/i386/i386.opt: Add -mharden-sls=.
* doc/invoke.texi: Document -mharden-sls=.

gcc/testsuite/

PR target/102952
* gcc.target/i386/harden-sls-1.c: New test.
* gcc.target/i386/harden-sls-2.c: Likewise.
* gcc.target/i386/harden-sls-3.c: Likewise.
* gcc.target/i386/harden-sls-4.c: Likewise.
* gcc.target/i386/harden-sls-5.c: Likewise.
---
 gcc/config/i386/i386-opts.h  |  7 ++
 gcc/config/i386/i386.c   | 23 ++--
 gcc/config/i386/i386.opt | 20 +
 gcc/doc/invoke.texi  | 10 -
 gcc/testsuite/gcc.target/i386/harden-sls-1.c | 14 
 gcc/testsuite/gcc.target/i386/harden-sls-2.c | 14 
 gcc/testsuite/gcc.target/i386/harden-sls-3.c | 14 
 gcc/testsuite/gcc.target/i386/harden-sls-4.c | 16 ++
 gcc/testsuite/gcc.target/i386/harden-sls-5.c | 17 +++
 9 files changed, 127 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-5.c

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index 04e4ad608fb..171d3106d0a 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -121,4 +121,11 @@ enum instrument_return {
   instrument_return_nop5
 };
 
+enum harden_sls {
+  harden_sls_none = 0,
+  harden_sls_return = 1 << 0,
+  harden_sls_indirect_branch = 1 << 1,
+  harden_sls_all = harden_sls_return | harden_sls_indirect_branch
+};
+
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 73c4d5115bb..8bbf6ae9875 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5914,6 +5914,8 @@ output_indirect_thunk (unsigned int regno)
 }
 
   fputs ("\tret\n", asm_out_file);
+  if ((ix86_harden_sls & harden_sls_return))
+fputs ("\tint3\n", asm_out_file);
 }
 
 /* Output a funtion with a call and return thunk for indirect branch.
@@ -15984,6 +15986,8 @@ ix86_output_jmp_thunk_or_indirect (const char 
*thunk_name, const int regno)
   fprintf (asm_out_file, "\tjmp\t");
   assemble_name (asm_out_file, thunk_name);
   putc ('\n', asm_out_file);
+  if ((ix86_harden_sls & harden_sls_indirect_branch))
+   fputs ("\tint3\n", asm_out_file);
 }
   else
 output_indirect_thunk (regno);
@@ -16206,10 +16210,10 @@ ix86_output_indirect_jmp (rtx call_op)
gcc_unreachable ();
 
   ix86_output_indirect_branch (call_op, "%0", true);
-  return "";
 }
   else
-return "%!jmp\t%A0";
+output_asm_insn ("%!jmp\t%A0", _op);
+  return (ix86_harden_sls & harden_sls_indirect_branch) ? "int3" : "";
 }
 
 /* Output return instrumentation for current function if needed.  */
@@ -16277,10 +16281,10 @@ ix86_output_function_return (bool long_p)
   return "";
 }
 
-  if (!long_p)
-return "%!ret";
-
-  return "rep%; ret";
+  if ((ix86_harden_sls & harden_sls_return))
+long_p = false;
+  output_asm_insn (long_p ? "rep%; ret" : "%!ret", nullptr);
+  return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
 }
 
 /* Output indirect function return.  RET_OP is the function return
@@ -16375,7 +16379,12 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
   if (output_indirect_p && !direct_p)
ix86_output_indirect_branch (call_op, xasm, true);
   else
-   output_asm_insn (xasm, _op);
+   {
+ output_asm_insn (xasm, _op);
+ if (!direct_p
+ && (ix86_harden_sls & harden_sls_indirect_branch))
+   return "int3";
+   }
   return "";
 }
 
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 46fad3cc038..8d499a5a4df 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1117,6 +1117,26 @@ mrecord-return
 Target Var(ix86_flag_record_return) Init(0)
 Generate a __return_loc section pointing to all return instrumentation code.
 
+mharden-sls=
+Target RejectNegative Joined Enum(harden_sls) Var(ix86_harden_sls) 
Init(harden_sls_none)
+Generate code to mitigate against straight line speculation.
+
+Enum

Re: [PATCH] handle folded nonconstant array bounds [PR101702]

2021-11-17 Thread Marek Polacek via Gcc-patches
On Tue, Nov 16, 2021 at 05:32:00PM -0700, Martin Sebor via Gcc-patches wrote:
> -Warray-parameter and -Wvla-parameter assume that array bounds
> in function parameters are either constant integers or variable,
> but not something in between like a cast of a constant that's
> not recognized as an INTEGER_CST until we strip the cast from
> it.  This leads to an ICE as the the internal checks fail.
> 
> The attached patch fixes the problem by stripping the casts
> earlier than before, preventing the inconsistency.  In addition,
> it also folds the array bound, avoiding a class of false
> positives and negatives that not doing so would lead to otherwise.
> 
> Tested on x86_64-linux.
> 
> Martin

> Handle folded nonconstant array bounds [PR101702]
> 
> PR c/101702 - ICE: in handle_argspec_attribute, at c-family/c-attribs.c:3623
> 
> gcc/c/ChangeLog:
> 
>   PR c/101702
>   * c-decl.c (get_parm_array_spec): Strip casts earlier and fold array
>   bounds before deciding if they're constant.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR c/101702
>   * gcc.dg/Warray-parameter-11.c: New test.
> 
> diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
> index 186fa1692c1..63d806a84c9 100644
> --- a/gcc/c/c-decl.c
> +++ b/gcc/c/c-decl.c
> @@ -5866,6 +5866,12 @@ get_parm_array_spec (const struct c_parm *parm, tree 
> attrs)
>if (pd->u.array.static_p)
>   spec += 's';
>  
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (nelts)))
> + /* Avoid invalid NELTS.  */
> + return attrs;
> +
> +  STRIP_NOPS (nelts);
> +  nelts = c_fully_fold (nelts, false, nullptr);

STRIP_NOPS before a call to c_fully_fold looks sort of weird, but I see
it's needed to prevent bogus warnings in Wvla-parameter-12.c:

void f2ci_can (const int m, char a[m]);
void f2ci_can (int n,   char a[n])

OK for trunk then.

>if (TREE_CODE (nelts) == INTEGER_CST)
>   {
> /* Skip all constant bounds except the most significant one.
> @@ -5883,13 +5889,9 @@ get_parm_array_spec (const struct c_parm *parm, tree 
> attrs)
> spec += buf;
> break;
>   }
> -  else if (!INTEGRAL_TYPE_P (TREE_TYPE (nelts)))
> - /* Avoid invalid NELTS.  */
> - return attrs;
>  
>/* Each variable VLA bound is represented by a dollar sign.  */
>spec += "$";
> -  STRIP_NOPS (nelts);
>vbchain = tree_cons (NULL_TREE, nelts, vbchain);
>  }
>  
> diff --git a/gcc/testsuite/gcc.dg/Warray-parameter-11.c 
> b/gcc/testsuite/gcc.dg/Warray-parameter-11.c
> new file mode 100644
> index 000..8ca1b55bd28
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/Warray-parameter-11.c
> @@ -0,0 +1,24 @@
> +/* PR c/101702 - ICE on invalid function redeclaration
> +   { dg-do compile }
> +   { dg-options "-Wall" } */
> +
> +typedef __INTPTR_TYPE__ intptr_t;
> +
> +#define copysign(x, y) __builtin_copysign (x, y)
> +
> +void f0 (double[!copysign (~2, 3)]);
> +
> +void f1 (double[!copysign (~2, 3)]);
> +void f1 (double[1]);// { dg-warning "-Warray-parameter" }
> +
> +void f2 (int[(int)+1.0]);
> +void f2 (int[(int)+1.1]);
> +
> +/* Also verify that equivalent expressions don't needlessly cause false
> +   positives or negatives.  */
> +struct S { int a[1]; };
> +extern struct S *sp;
> +
> +void f3 (int[(intptr_t)((char*)sp->a - (char*)sp)]);
> +void f3 (int[(intptr_t)((char*)>a[0] - (char*)sp)]);
> +void f3 (int[(intptr_t)((char*)>a[1] - (char*)sp)]);   // { dg-warning 
> "-Warray-parameter" }


Marek



Re: [PATCH v1 2/8] RISC-V: costs: handle BSWAP

2021-11-17 Thread Kito Cheng via Gcc-patches
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index c77b0322869..8480cf09294 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -2131,6 +2131,14 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
> outer_code, int opno ATTRIBUTE_UN
>*total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
>return false;
>
> +case BSWAP:
> +  if (TARGET_ZBB)
> +   {
> + *total = COSTS_N_INSNS (1);

Add a cost model for HImode? maybe `*total = COSTS_N_INSNS (mode ==
HImode ? 2 : 1);` ?


Re: [PATCH v1 1/8] bswap: synthesize HImode bswap from SImode or DImode

2021-11-17 Thread Kito Cheng via Gcc-patches
Hi Philipp:

I would suggest add define_expand pattern for bswaphi2 rather than
changing expand_unop with following reasons:

- There is a comment above this change, and it also tried widen_bswap
after this if-block,
  so I think this patch is kind of violating this comment.
 /* HImode is special because in this mode BSWAP is equivalent to ROTATE
or ROTATERT.  First try these directly; if this fails, then try the
obvious pair of shifts with allowed widening, as this will probably
be always more efficient than the other fallback methods.  */

- This change doesn't improve the code gen without bswapsi2 or bswapdi2,
  (e.g. rv64gc result same code) and this also might also affect other targets,
  but we didn't have evidence it will always get better results, so I guess at
  least we should add a target hook for this.

- ...I didn't have permission to approve this change since it's not
part of RISC-V back-end :p

On Thu, Nov 11, 2021 at 10:10 PM Philipp Tomsich
 wrote:
>
> The RISC-V Zbb extension adds an XLEN (i.e. SImode for rv32, DImode
> for rv64) bswap instruction (rev8).  While, with the current master,
> SImode is synthesized correctly from DImode, HImode is not.
>
> This change adds an appropriate expansion for a HImode bswap, if a
> wider bswap is available.
>
> Without this change, the following rv64gc_zbb code is generated for
> __builtin_bswap16():
> slliw   a5,a0,8
> zext.h  a0,a0
> srliw   a0,a0,8
> or  a0,a5,a0
> sext.h  a0,a0  // this is a 16bit sign-extension following
>// the byteswap (e.g. on a 'short' function
>// return).
>
> After this change, a bswap (rev8) is used and any extensions are
> combined into the shift-right:
> rev8a0,a0
> sraia0,a0,48   // the sign-extension is combined into the
>// shift; a srli is emitted otherwise...
>
> gcc/ChangeLog:
>
> * optabs.c (expand_unop): support expanding a HImode bswap
>   using SImode or DImode, followed by a shift.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zbb-bswap.c: New test.
>
> Signed-off-by: Philipp Tomsich 
> ---
>
>  gcc/optabs.c   |  6 ++
>  gcc/testsuite/gcc.target/riscv/zbb-bswap.c | 22 ++
>  2 files changed, 28 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-bswap.c
>
> diff --git a/gcc/optabs.c b/gcc/optabs.c
> index 019bbb62882..7a3ffbe4525 100644
> --- a/gcc/optabs.c
> +++ b/gcc/optabs.c
> @@ -3307,6 +3307,12 @@ expand_unop (machine_mode mode, optab unoptab, rtx 
> op0, rtx target,
> return temp;
> }
>
> + /* If we are missing a HImode BSWAP, but have one for SImode or
> +DImode, use a BSWAP followed by a SHIFT.  */
> + temp = widen_bswap (as_a  (mode), op0, target);
> + if (temp)
> +   return temp;
> +
>   last = get_last_insn ();
>
>   temp1 = expand_binop (mode, ashl_optab, op0,
> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-bswap.c 
> b/gcc/testsuite/gcc.target/riscv/zbb-bswap.c
> new file mode 100644
> index 000..6ee27d9f47a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zbb-bswap.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */
> +
> +unsigned long
> +func64 (unsigned long i)
> +{
> +  return __builtin_bswap64(i);
> +}
> +
> +unsigned int
> +func32 (unsigned int i)
> +{
> +  return __builtin_bswap32(i);
> +}
> +
> +unsigned short
> +func16 (unsigned short i)
> +{
> +  return __builtin_bswap16(i);
> +}
> +
> +/* { dg-final { scan-assembler-times "rev8" 3 } } */
> --
> 2.32.0
>


Re: [PATCH] x86: Add -mharden-sls=[none|all|return|indirect-branch]

2021-11-17 Thread Uros Bizjak via Gcc-patches
On Wed, Nov 17, 2021 at 2:46 PM H.J. Lu  wrote:
>
> On Wed, Nov 17, 2021 at 1:05 AM Uros Bizjak  wrote:
> >
> > On Tue, Nov 16, 2021 at 7:20 PM H.J. Lu via Gcc-patches
> >  wrote:
> > >
> > > Add -mharden-sls= to mitigate against straight line speculation (SLS)
> > > for function return and indirect branch by adding an INT3 instruction
> > > after function return and indirect branch.
> > >
> > > gcc/
> > >
> > > PR target/102952
> > > * config/i386/i386-opts.h (harden_sls): New enum.
> > > * config/i386/i386.c (output_indirect_thunk): Mitigate against
> > > SLS for function return.
> > > (ix86_output_function_return): Likewise.
> > > (ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
> > > branch.
> > > (ix86_output_indirect_jmp): Likewise.
> > > (ix86_output_call_insn): Likewise.
> > > * config/i386/i386.opt: Add -mharden-sls=.
> > > * doc/invoke.texi: Document -mharden-sls=.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/102952
> > > * gcc.target/i386/harden-sls-1.c: New test.
> > > * gcc.target/i386/harden-sls-2.c: Likewise.
> > > * gcc.target/i386/harden-sls-3.c: Likewise.
> > > * gcc.target/i386/harden-sls-4.c: Likewise.
> > > ---
> > >  gcc/config/i386/i386-opts.h  |  7 +
> > >  gcc/config/i386/i386.c   | 30 
> > >  gcc/config/i386/i386.opt | 20 +
> > >  gcc/doc/invoke.texi  | 10 ++-
> > >  gcc/testsuite/gcc.target/i386/harden-sls-1.c | 14 +
> > >  gcc/testsuite/gcc.target/i386/harden-sls-2.c | 14 +
> > >  gcc/testsuite/gcc.target/i386/harden-sls-3.c | 14 +
> > >  gcc/testsuite/gcc.target/i386/harden-sls-4.c | 14 +
> > >  8 files changed, 116 insertions(+), 7 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-1.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-2.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-3.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-4.c
> > >
> > > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> > > index 04e4ad608fb..171d3106d0a 100644
> > > --- a/gcc/config/i386/i386-opts.h
> > > +++ b/gcc/config/i386/i386-opts.h
> > > @@ -121,4 +121,11 @@ enum instrument_return {
> > >instrument_return_nop5
> > >  };
> > >
> > > +enum harden_sls {
> > > +  harden_sls_none = 0,
> > > +  harden_sls_return = 1 << 0,
> > > +  harden_sls_indirect_branch = 1 << 1,
> > > +  harden_sls_all = harden_sls_return | harden_sls_indirect_branch
> > > +};
> > > +
> > >  #endif
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index cc9f9322fad..0a902d66321 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -5914,6 +5914,8 @@ output_indirect_thunk (unsigned int regno)
> > >  }
> > >
> > >fputs ("\tret\n", asm_out_file);
> > > +  if ((ix86_harden_sls & harden_sls_return))
> > > +fputs ("\tint3\n", asm_out_file);
> > >  }
> > >
> > >  /* Output a funtion with a call and return thunk for indirect branch.
> > > @@ -15987,6 +15989,8 @@ ix86_output_jmp_thunk_or_indirect (const char 
> > > *thunk_name, const int regno)
> > >fprintf (asm_out_file, "\tjmp\t");
> > >assemble_name (asm_out_file, thunk_name);
> > >putc ('\n', asm_out_file);
> > > +  if ((ix86_harden_sls & harden_sls_indirect_branch))
> > > +   fputs ("\tint3\n", asm_out_file);
> > >  }
> > >else
> > >  output_indirect_thunk (regno);
> > > @@ -16212,10 +16216,14 @@ ix86_output_indirect_jmp (rtx call_op)
> > > gcc_unreachable ();
> > >
> > >ix86_output_indirect_branch (call_op, "%0", true);
> > > -  return "";
> > > +  if ((ix86_harden_sls & harden_sls_indirect_branch))
> > > +   return "int3";
> > > +  else
> > > +   return "";
> > >  }
> > >else
> > > -return "%!jmp\t%A0";
> > > +return ((ix86_harden_sls & harden_sls_indirect_branch)
> > > +   ? "%!jmp\t%A0\n\tint3" : "%!jmp\t%A0");
> > >  }
> >
> > Just change existing returns to fputs and end function with:
> >
> > return (ix86_harden_sls & harden_sls_indirect_branch) ? "int3" : "";
>
> But fputs doesn't support %A0.

Sorry for the thinko, output_asm_insn instead of fputs will do the trick.

Uros.

>
> > >  /* Output return instrumentation for current function if needed.  */
> > > @@ -16283,10 +16291,15 @@ ix86_output_function_return (bool long_p)
> > >return "";
> > >  }
> > >
> > > -  if (!long_p)
> > > -return "%!ret";
> > > +  if ((ix86_harden_sls & harden_sls_return))
> > > +return "%!ret\n\tint3";
> > > +  else
> > > +{
> > > +  if (!long_p)
> > > +   return "%!ret";
> > >
> > > -  return "rep%; ret";
> > > +  return "rep%; ret";
> > > +}
> > >  }
> >
> > Also here.
>
> But 

Re: [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion

2021-11-17 Thread Kito Cheng via Gcc-patches
Hi Philipp:

This patch set LGTM, feel free to commit once addressed those issues.

On Mon, Nov 15, 2021 at 5:48 AM Philipp Tomsich
 wrote:
>
>
> This series provides support for the Ventana VT1 (a 4-way superscalar
> rv64gc_zba_zbb_zbc_zbs core) including support for the supported
> instruction fusion patterns.
>
> This includes the addition of the fusion-aware scheduling
> infrastructure for RISC-V and implements idiom recognition for the
> fusion patterns supported by VT1.
>
>
> Philipp Tomsich (2):
>   RISC-V: Add basic support for the Ventana-VT1 core
>   RISC-V: Add instruction fusion (for ventana-vt1)
>
>  gcc/config/riscv/riscv-cores.def |   2 +
>  gcc/config/riscv/riscv-opts.h|   3 +-
>  gcc/config/riscv/riscv.c | 210 +++
>  gcc/config/riscv/riscv.md|   2 +-
>  gcc/doc/invoke.texi  |   4 +-
>  5 files changed, 217 insertions(+), 4 deletions(-)
>
> --
> 2.32.0
>


Re: [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1)

2021-11-17 Thread Kito Cheng via Gcc-patches
Hi Philipp:

Thanks for the patch, I like this approach, that can easily configure
different capabilities for each core :)

So there are only a few minor comments for this patch.

On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
 wrote:
>
> From: Philipp Tomsich 
>
> The Ventana VT1 core supports quad-issue and instruction fusion.
> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
> together and adds idiom matcheing for the supported fusion cases.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
> constants to identify supported fusion patterns.
> (struct riscv_tune_param): Add fusible_op field.
> (riscv_macro_fusion_p): Implement.
> (riscv_fusion_enabled_p): Implement.
> (riscv_macro_fusion_pair_p): Implement and recoginze fusible
> idioms for Ventana VT1.
> (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
> (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to 
> riscv_macro_fusion_pair_p.
>
> Signed-off-by: Philipp Tomsich 
> ---
>
>  gcc/config/riscv/riscv.c | 196 +++
>  1 file changed, 196 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index 6b918db65e9..8eac52101a3 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -211,6 +211,19 @@ struct riscv_integer_op {
> The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
>  #define RISCV_MAX_INTEGER_OPS 8
>
> +enum riscv_fusion_pairs
> +{
> +  RISCV_FUSE_NOTHING = 0,
> +  RISCV_FUSE_ZEXTW = (1 << 0),
> +  RISCV_FUSE_ZEXTH = (1 << 1),
> +  RISCV_FUSE_ZEXTWS = (1 << 2),
> +  RISCV_FUSE_LDINDEXED = (1 << 3),

RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED

Could you add some comment for above enums, like that:
/* slli rx, rx, 32 + srli rx, rx, 32 */
RISCV_FUSE_ZEXTW

So that we could know what kind of instruction will be funded for this enum.

> +  RISCV_FUSE_LUI_ADDI = (1 << 4),
> +  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
> +  RISCV_FUSE_LUI_LD = (1 << 6),
> +  RISCV_FUSE_AUIPC_LD = (1 << 7),
> +};
> +
>  /* Costs of various operations on the different architectures.  */
>
>  struct riscv_tune_param
> @@ -224,6 +237,7 @@ struct riscv_tune_param
>unsigned short branch_cost;
>unsigned short memory_cost;
>bool slow_unaligned_access;
> +  unsigned int fusible_ops;
>  };
>
>  /* Information about one micro-arch we know about.  */
> @@ -289,6 +303,7 @@ static const struct riscv_tune_param rocket_tune_info = {
>3,   /* branch_cost */
>5,   /* memory_cost */
>true,/* 
> slow_unaligned_access */
> +  RISCV_FUSE_NOTHING,   /* fusible_ops */
>  };
>
>  /* Costs to use when optimizing for Sifive 7 Series.  */
> @@ -302,6 +317,7 @@ static const struct riscv_tune_param sifive_7_tune_info = 
> {
>4,   /* branch_cost */
>3,   /* memory_cost */
>true,/* 
> slow_unaligned_access */
> +  RISCV_FUSE_NOTHING,   /* fusible_ops */
>  };
>
>  /* Costs to use when optimizing for T-HEAD c906.  */
> @@ -328,6 +344,7 @@ static const struct riscv_tune_param 
> optimize_size_tune_info = {
>1,   /* branch_cost */
>2,   /* memory_cost */
>false,   /* slow_unaligned_access */
> +  RISCV_FUSE_NOTHING,   /* fusible_ops */
>  };
>
>  /* Costs to use when optimizing for Ventana Micro VT1.  */
> @@ -341,6 +358,10 @@ static const struct riscv_tune_param 
> ventana_vt1_tune_info = {
>4,   /* branch_cost */
>5,   /* memory_cost */
>false,   /* slow_unaligned_access */
> +  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |   /* fusible_ops */
> +RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
> +RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
> +RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
>  };
>
>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> @@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
>return tune_param->issue_rate;
>  }
>
> +/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
> +   instruction fusion of some sort.  */
> +
> +static bool
> +riscv_macro_fusion_p (void)
> +{
> +  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
> +}
> +
> +/* Return true iff the instruction fusion described by OP is enabled.  */
> +
> +static bool
> +riscv_fusion_enabled_p(enum riscv_fusion_pairs op)

space between function name and parentheses.

riscv_fusion_enabled_p (enum riscv_fusion_pairs op)

> 

RE: [PATCH][GCC] aarch64: Add new vector mode V8DI

2021-11-17 Thread Przemyslaw Wirkus via Gcc-patches



> -Original Message-
> From: Richard Sandiford 
> Sent: 17 November 2021 10:08
> To: Przemyslaw Wirkus 
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> ; Kyrylo Tkachov ;
> Marcus Shawcroft 
> Subject: Re: [PATCH][GCC] aarch64: Add new vector mode V8DI
> 
> Oops, only just realised that I hadn't reviewed this.
> 
> Przemyslaw Wirkus  writes:
> > Hi,
> > This patch is adding new V8DI mode which will be used with new
> > Armv8.7-A
> > LS64 extension intrinsics.
> >
> > Regtested on aarch64-elf and no issues.
> >
> > OK for master?
> >
> > gcc/ChangeLog:
> >
> > 2021-11-10  Przemyslaw Wirkus  
> >
> > * config/aarch64/aarch64-modes.def (VECTOR_MODE): New V8DI
> mode.
> > * config/aarch64/aarch64.c (aarch64_hard_regno_mode_ok): Handle
> > V8DImode.
> > * config/aarch64/iterators.md (define_mode_attr nunits): Add entry
> > for V8DI.
> >
> > Kind regards,
> > Przemyslaw Wirkus
> >
> > ---
> >
> > diff --git a/gcc/config/aarch64/aarch64-modes.def
> > b/gcc/config/aarch64/aarch64-modes.def
> > index
> >
> ac97d222789c6701d858c014736f8c211512a4d9..62595b8af6e1eea8fc769885
> bba9
> > fe54f0a9ec05 100644
> > --- a/gcc/config/aarch64/aarch64-modes.def
> > +++ b/gcc/config/aarch64/aarch64-modes.def
> > @@ -81,6 +81,11 @@ INT_MODE (OI, 32);
> >  INT_MODE (CI, 48);
> >  INT_MODE (XI, 64);
> >
> > +/* V8DI mode.  */
> > +VECTOR_MODE_WITH_PREFIX (V, INT, DI, 8, 5); \
> > +  \
> > +  ADJUST_ALIGNMENT (V8DI, 8);
> 
> The backslashes aren't needed here, can just be:
> 
> VECTOR_MODE_WITH_PREFIX (V, INT, DI, 8, 5);
> 
> ADJUST_ALIGNMENT (V8DI, 8);
> 
> > +
> >  /* Define Advanced SIMD modes for structures of 2, 3 and 4
> > d-registers.  */  #define ADV_SIMD_D_REG_STRUCT_MODES(NVECS, VB,
> VH, VS, VD) \
> >VECTOR_MODES_WITH_PREFIX (V##NVECS##x, INT, 8, 3); \ diff --git
> > a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index
> >
> 69f08052ce808c140ed2933ab6b2e2617ca6f669..0e102a83a8dc34e715fafb58
> 1698
> > 97b12c9b3a20 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -3376,6 +3376,9 @@ aarch64_hard_regno_nregs (unsigned regno,
> > machine_mode mode)  static bool  aarch64_hard_regno_mode_ok
> (unsigned
> > regno, machine_mode mode)  {
> > +  if (mode == V8DImode)
> > +return IN_RANGE (regno, R0_REGNUM, R23_REGNUM);
> 
> As you pointed out off-list, this should also check for even registers:
> 
> return (IN_RANGE (regno, R0_REGNUM, R23_REGNUM);
>   && multiple_p (regno - R0_REGNUM, 2));
> 
> OK with those changes, thanks.

Thank you.

Installed with changes:
commit dd159a4167ca19b5ff26e7156333c88e854943bf

/Przemek

> Richard
> 
> > +
> >if (GET_MODE_CLASS (mode) == MODE_CC)
> >  return regno == CC_REGNUM;
> >
> > diff --git a/gcc/config/aarch64/iterators.md
> > b/gcc/config/aarch64/iterators.md index
> >
> bdc8ba3576cf2c9b4ae96b45a382234e4e25b13f..cea277f3a03cfd20178e51e6
> abd7
> > e256e206299f 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -1053,7 +1053,7 @@ (define_mode_attr vas [(DI "") (SI ".2s")])
> > (define_mode_attr nunits [(V8QI "8") (V16QI "16")
> >   (V4HI "4") (V8HI "8")
> >   (V2SI "2") (V4SI "4")
> > -(V2DI "2")
> > + (V2DI "2") (V8DI "8")
> >   (V4HF "4") (V8HF "8")
> >   (V4BF "4") (V8BF "8")
> >   (V2SF "2") (V4SF "4")


Re: [PATCH] x86: Add -mindirect-branch-cs-prefix

2021-11-17 Thread H.J. Lu via Gcc-patches
On Wed, Nov 17, 2021 at 1:10 AM Uros Bizjak  wrote:
>
> On Tue, Nov 16, 2021 at 7:51 PM H.J. Lu via Gcc-patches
>  wrote:
> >
> > Add -mindirect-branch-cs-prefix to add CS prefix to call and jmp to thunk
> > via r8-r15 registers when converting indirect call and jump to increase
> > the instruction length to 6, allowing the non-thunk form to be inlined.
> >
> > gcc/
> >
> > PR target/102952
> > * config/i386/i386.c (ix86_output_jmp_thunk_or_indirect): Emit
> > CS prefix for -mindirect-branch-cs-prefix.
> > (ix86_output_indirect_branch_via_reg): Likewise.
> > * config/i386/i386.opt: Add -mindirect-branch-cs-prefix.
> > * doc/invoke.texi: Document -mindirect-branch-cs-prefix.
> >
> > gcc/testsuite/
> >
> > PR target/102952
> > * gcc.target/i386/indirect-thunk-cs-prefix-1.c: New test.
> > * gcc.target/i386/indirect-thunk-cs-prefix-2.c: Likewise.
> > ---
> >  gcc/config/i386/i386.c|  6 ++
> >  gcc/config/i386/i386.opt  |  4 
> >  gcc/doc/invoke.texi   |  8 +++-
> >  .../gcc.target/i386/indirect-thunk-cs-prefix-1.c  | 14 ++
> >  .../gcc.target/i386/indirect-thunk-cs-prefix-2.c  | 15 +++
> >  5 files changed, 46 insertions(+), 1 deletion(-)
> >  create mode 100644 
> > gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c
> >  create mode 100644 
> > gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-2.c
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 7e9b7bc347f..0a902d66321 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -15983,6 +15983,9 @@ ix86_output_jmp_thunk_or_indirect (const char 
> > *thunk_name, const int regno)
> >  {
> >if (thunk_name != NULL)
> >  {
> > +  if (regno >= FIRST_REX_INT_REG
>
>  REX_INT_REGNO_P

Fixed in the v2 patch.

> > + && ix86_indirect_branch_cs_prefix)
> > +   fprintf (asm_out_file, "\tcs\n");
> >fprintf (asm_out_file, "\tjmp\t");
> >assemble_name (asm_out_file, thunk_name);
> >putc ('\n', asm_out_file);
> > @@ -16036,6 +16039,9 @@ ix86_output_indirect_branch_via_reg (rtx call_op, 
> > bool sibcall_p)
> >  {
> >if (thunk_name != NULL)
> > {
> > + if (regno >= FIRST_REX_INT_REG
>
>  REX_INT_REGNO_P

Fixed in the v2 patch.

> > + && ix86_indirect_branch_cs_prefix)
> > +   fprintf (asm_out_file, "\tcs\n");
> >   fprintf (asm_out_file, "\tcall\t");
> >   assemble_name (asm_out_file, thunk_name);
> >   putc ('\n', asm_out_file);
> > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> > index 8d499a5a4df..c5452c49597 100644
> > --- a/gcc/config/i386/i386.opt
> > +++ b/gcc/config/i386/i386.opt
> > @@ -1076,6 +1076,10 @@ Enum(indirect_branch) String(thunk-inline) 
> > Value(indirect_branch_thunk_inline)
> >  EnumValue
> >  Enum(indirect_branch) String(thunk-extern) 
> > Value(indirect_branch_thunk_extern)
> >
> > +mindirect-branch-cs-prefix
> > +Target Var(ix86_indirect_branch_cs_prefix) Init(0)
> > +Add CS prefix to call and jmp to thunk when converting indirect call and 
> > jump.
>
> This is not what the function really does. It adds cs to REX prefixed regs.

Fixed in the v2 patch.

Thanks.

> > +
> >  mindirect-branch-register
> >  Target Var(ix86_indirect_branch_register) Init(0)
> >  Force indirect call and jump via register.
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index f3b4b467765..c992a7152f5 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -1425,7 +1425,8 @@ See RS/6000 and PowerPC Options.
> >  -mstack-protector-guard-symbol=@var{symbol} @gol
> >  -mgeneral-regs-only  -mcall-ms2sysv-xlogues -mrelax-cmpxchg-loop @gol
> >  -mindirect-branch=@var{choice}  -mfunction-return=@var{choice} @gol
> > --mindirect-branch-register -mharden-sls=@var{choice} -mneeded}
> > +-mindirect-branch-register -mharden-sls=@var{choice} @gol
> > +-mindirect-branch-cs-prefix -mneeded}
> >
> >  @emph{x86 Windows Options}
> >  @gccoptlist{-mconsole  -mcygwin  -mno-cygwin  -mdll @gol
> > @@ -32390,6 +32391,11 @@ hardening.  @samp{return} enables SLS hardening 
> > for function return.
> >  @samp{indirect-branch} enables SLS hardening for indirect branch.
> >  @samp{all} enables all SLS hardening.
> >
> > +@item -mindirect-branch-cs-prefix
> > +@opindex mindirect-branch-cs-prefix
> > +Add CS prefix to call and jmp to thunk via r8-r15 registers when
> > +converting indirect call and jump.
> > +
> >  @end table
> >
> >  These @samp{-m} switches are supported in addition to the above
> > diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c 
> > b/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c
> > new file mode 100644
> > index 000..db2f3416823
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c
> > @@ 

Re: [PATCH v2] rs6000: Test case adjustments for new builtins

2021-11-17 Thread Bill Schmidt via Gcc-patches


On 11/17/21 6:44 AM, Segher Boessenkool wrote:
> Hi!
>
> On Tue, Nov 16, 2021 at 02:26:22PM -0600, Bill Schmidt wrote:
>> Hi!  I recently submitted [1] to make adjustments to test cases for the new 
>> builtins
>> support, mostly due to error messages changing for consistency.  Thanks for 
>> the
>> previous review.  I've reviewed the reasons for the changes and removed 
>> unrelated
>> changes as requested.
> And the results are?  This is much easier to write up, and to review, if
> you split the patch into pieces with one theme each.  If you do that
> right then most reviews will be rubber-stamping, and some might require
> some thought (and some may even get objections).  The way things are it
> is a puzzle hunt to review this.

Sorry!  I thought I was addressing the issues that came up last time.  I didn't
intend for this to be difficult.  I will break the patch up going forward.

>
>>  - For fold-vect-splat-floatdouble.c and fold-vec-splat-longlong.c, the 
>> existing
>>test cases have some bad tests in them (checking two bits when only one 
>> bit
>>is meaningful).  The new builtin support catches this but the old support 
>> did
>>not.  Removing those bad cases changes some of the scan-assembler-times 
>> expected
>>values.
> Do this is a separate patch then, independent of the series?  With this
> explanation in the commit message.  This is pre-approved.
OK, will do.
>
>>  - For int_128bit-runnable.c, I chose not to do gimple folding on the 128-bit
>>comparison operations in the new implementation, because doing so results 
>> in
>>bad code that splits things into two 64-bit values.  That needs separate
>>attention; but the point here is, when I did that, I started generating
>>more of the vcmpequq, vcmpgtsq, and vcmpgtuq instructions.
> And you now get worse code (albeit in some cases no longer invalid)?

No, sorry that this wasn't more clear.  The "old" builtins code performs
gimple folding on 128-bit compares.  This results in correct but very
inefficient code.  The "new" builtins code has removed the gimple folding
for 128-bit compares.  This results in directly generating vcmpequq and
friends, which is the efficient code we're looking for.  This test case
then needs modification to show we're doing better.  I'll submit this
separately.

>
>
>> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-2.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-2.c
>> @@ -14,7 +14,7 @@ get_exponent (double *p)
>>  {
>>double source = *p;
>>  
>> -  return scalar_extract_exp (source);   /* { dg-error 
>> "'__builtin_vec_scalar_extract_exp' is not supported in this compiler 
>> configuration" } */
>> +  return scalar_extract_exp (source);   /* { dg-error 
>> "'__builtin_vsx_scalar_extract_exp' requires the" } */
>>  }
> The testcase uses __builtin_vec_scalar_extract_exp, so this is not okay.

Sorry, this is a case of my bad eyesight not identifying this had changed.
As with the test case (cmpb-3.c) in the 32-bit patch, this error message
isn't all that the user sees.  There is also a "note" diagnostic that ties
the generic overload name to the specific underlying builtin name so that
confusion is avoided.  I'll just submit these separately with a full
explanation.

Same applies to the similar cases below.

>
>> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
>> @@ -12,5 +12,5 @@ get_significand (double *p)
>>  {
>>double source = *p;
>>  
>> -  return __builtin_vec_scalar_extract_sig (source); /* { dg-error 
>> "'__builtin_vec_scalar_extract_sig' is not supported in this compiler 
>> configuration" } */
>> +  return __builtin_vec_scalar_extract_sig (source); /* { dg-error 
>> "'__builtin_vsx_scalar_extract_sig' requires the" } */
>>  }
> This not either.
>
>> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
>> @@ -16,5 +16,5 @@ insert_exponent (unsigned long long int *significand_p,
>>unsigned long long int significand = *significand_p;
>>unsigned long long int exponent = *exponent_p;
>>  
>> -  return scalar_insert_exp (significand, exponent); /* { dg-error 
>> "'__builtin_vec_scalar_insert_exp' is not supported in this compiler 
>> configuration" } */
>> +  return scalar_insert_exp (significand, exponent); /* { dg-error 
>> "'__builtin_vsx_scalar_insert_exp' requires the" } */
> Or this.
>
>> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
>> @@ -16,5 +16,5 @@ insert_exponent (double *significand_p,
>>double significand = *significand_p;
>>unsigned long long int exponent = *exponent_p;
>>  
>> -  return scalar_insert_exp (significand, exponent); /* { dg-error 
>> "'__builtin_vec_scalar_insert_exp' is not supported in this compiler 
>> configuration" } */
>> +  

[PATCH v2] x86: Add -mindirect-branch-cs-prefix

2021-11-17 Thread H.J. Lu via Gcc-patches
Add -mindirect-branch-cs-prefix to add CS prefix to call and jmp to thunk
via r8-r15 registers when converting indirect call and jump to increase
the instruction length to 6, allowing the non-thunk form to be inlined.

gcc/

PR target/102952
* config/i386/i386.c (ix86_output_jmp_thunk_or_indirect): Emit
CS prefix for -mindirect-branch-cs-prefix.
(ix86_output_indirect_branch_via_reg): Likewise.
* config/i386/i386.opt: Add -mindirect-branch-cs-prefix.
* doc/invoke.texi: Document -mindirect-branch-cs-prefix.

gcc/testsuite/

PR target/102952
* gcc.target/i386/indirect-thunk-cs-prefix-1.c: New test.
* gcc.target/i386/indirect-thunk-cs-prefix-2.c: Likewise.
---
 gcc/config/i386/i386.c|  6 ++
 gcc/config/i386/i386.opt  |  4 
 gcc/doc/invoke.texi   |  8 +++-
 .../gcc.target/i386/indirect-thunk-cs-prefix-1.c  | 14 ++
 .../gcc.target/i386/indirect-thunk-cs-prefix-2.c  | 15 +++
 5 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-2.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7e9b7bc347f..ae92df0be2f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -15983,6 +15983,9 @@ ix86_output_jmp_thunk_or_indirect (const char 
*thunk_name, const int regno)
 {
   if (thunk_name != NULL)
 {
+  if (REX_INT_REGNO_P (regno)
+ && ix86_indirect_branch_cs_prefix)
+   fprintf (asm_out_file, "\tcs\n");
   fprintf (asm_out_file, "\tjmp\t");
   assemble_name (asm_out_file, thunk_name);
   putc ('\n', asm_out_file);
@@ -16036,6 +16039,9 @@ ix86_output_indirect_branch_via_reg (rtx call_op, bool 
sibcall_p)
 {
   if (thunk_name != NULL)
{
+ if (REX_INT_REGNO_P (regno)
+ && ix86_indirect_branch_cs_prefix)
+   fprintf (asm_out_file, "\tcs\n");
  fprintf (asm_out_file, "\tcall\t");
  assemble_name (asm_out_file, thunk_name);
  putc ('\n', asm_out_file);
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 8d499a5a4df..806ffd7b0ac 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1076,6 +1076,10 @@ Enum(indirect_branch) String(thunk-inline) 
Value(indirect_branch_thunk_inline)
 EnumValue
 Enum(indirect_branch) String(thunk-extern) Value(indirect_branch_thunk_extern)
 
+mindirect-branch-cs-prefix
+Target Var(ix86_indirect_branch_cs_prefix) Init(0)
+Add CS prefix to call and jmp to thunk via r8-r15 registers when converting 
indirect call and jump.
+
 mindirect-branch-register
 Target Var(ix86_indirect_branch_register) Init(0)
 Force indirect call and jump via register.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0265c160e02..233f3b579d9 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1427,7 +1427,8 @@ See RS/6000 and PowerPC Options.
 -mstack-protector-guard-symbol=@var{symbol} @gol
 -mgeneral-regs-only  -mcall-ms2sysv-xlogues -mrelax-cmpxchg-loop @gol
 -mindirect-branch=@var{choice}  -mfunction-return=@var{choice} @gol
--mindirect-branch-register -mharden-sls=@var{choice} -mneeded}
+-mindirect-branch-register -mharden-sls=@var{choice} @gol
+-mindirect-branch-cs-prefix -mneeded}
 
 @emph{x86 Windows Options}
 @gccoptlist{-mconsole  -mcygwin  -mno-cygwin  -mdll @gol
@@ -32409,6 +32410,11 @@ hardening.  @samp{return} enables SLS hardening for 
function return.
 @samp{indirect-branch} enables SLS hardening for indirect branch.
 @samp{all} enables all SLS hardening.
 
+@item -mindirect-branch-cs-prefix
+@opindex mindirect-branch-cs-prefix
+Add CS prefix to call and jmp to thunk via r8-r15 registers when
+converting indirect call and jump.
+
 @end table
 
 These @samp{-m} switches are supported in addition to the above
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c 
b/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c
new file mode 100644
index 000..db2f3416823
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -ffixed-rax -ffixed-rbx -ffixed-rcx -ffixed-rdx 
-ffixed-rdi -ffixed-rsi -mindirect-branch-cs-prefix 
-mindirect-branch=thunk-extern" } */
+/* { dg-additional-options "-fno-pic" { target { ! *-*-darwin* } } } */
+
+extern void (*fptr) (void);
+
+void
+foo (void)
+{
+  fptr ();
+}
+
+/* { dg-final { scan-assembler-times "jmp\[ 
\t\]+_?__x86_indirect_thunk_r\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\tcs" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-2.c 
b/gcc/testsuite/gcc.target/i386/indirect-thunk-cs-prefix-2.c
new file mode 100644
index 000..adfc39a49d4
--- /dev/null
+++ 

Re: [PATCH] x86: Add -mharden-sls=[none|all|return|indirect-branch]

2021-11-17 Thread H.J. Lu via Gcc-patches
On Wed, Nov 17, 2021 at 1:05 AM Uros Bizjak  wrote:
>
> On Tue, Nov 16, 2021 at 7:20 PM H.J. Lu via Gcc-patches
>  wrote:
> >
> > Add -mharden-sls= to mitigate against straight line speculation (SLS)
> > for function return and indirect branch by adding an INT3 instruction
> > after function return and indirect branch.
> >
> > gcc/
> >
> > PR target/102952
> > * config/i386/i386-opts.h (harden_sls): New enum.
> > * config/i386/i386.c (output_indirect_thunk): Mitigate against
> > SLS for function return.
> > (ix86_output_function_return): Likewise.
> > (ix86_output_jmp_thunk_or_indirect): Mitigate against indirect
> > branch.
> > (ix86_output_indirect_jmp): Likewise.
> > (ix86_output_call_insn): Likewise.
> > * config/i386/i386.opt: Add -mharden-sls=.
> > * doc/invoke.texi: Document -mharden-sls=.
> >
> > gcc/testsuite/
> >
> > PR target/102952
> > * gcc.target/i386/harden-sls-1.c: New test.
> > * gcc.target/i386/harden-sls-2.c: Likewise.
> > * gcc.target/i386/harden-sls-3.c: Likewise.
> > * gcc.target/i386/harden-sls-4.c: Likewise.
> > ---
> >  gcc/config/i386/i386-opts.h  |  7 +
> >  gcc/config/i386/i386.c   | 30 
> >  gcc/config/i386/i386.opt | 20 +
> >  gcc/doc/invoke.texi  | 10 ++-
> >  gcc/testsuite/gcc.target/i386/harden-sls-1.c | 14 +
> >  gcc/testsuite/gcc.target/i386/harden-sls-2.c | 14 +
> >  gcc/testsuite/gcc.target/i386/harden-sls-3.c | 14 +
> >  gcc/testsuite/gcc.target/i386/harden-sls-4.c | 14 +
> >  8 files changed, 116 insertions(+), 7 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-3.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/harden-sls-4.c
> >
> > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> > index 04e4ad608fb..171d3106d0a 100644
> > --- a/gcc/config/i386/i386-opts.h
> > +++ b/gcc/config/i386/i386-opts.h
> > @@ -121,4 +121,11 @@ enum instrument_return {
> >instrument_return_nop5
> >  };
> >
> > +enum harden_sls {
> > +  harden_sls_none = 0,
> > +  harden_sls_return = 1 << 0,
> > +  harden_sls_indirect_branch = 1 << 1,
> > +  harden_sls_all = harden_sls_return | harden_sls_indirect_branch
> > +};
> > +
> >  #endif
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index cc9f9322fad..0a902d66321 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -5914,6 +5914,8 @@ output_indirect_thunk (unsigned int regno)
> >  }
> >
> >fputs ("\tret\n", asm_out_file);
> > +  if ((ix86_harden_sls & harden_sls_return))
> > +fputs ("\tint3\n", asm_out_file);
> >  }
> >
> >  /* Output a funtion with a call and return thunk for indirect branch.
> > @@ -15987,6 +15989,8 @@ ix86_output_jmp_thunk_or_indirect (const char 
> > *thunk_name, const int regno)
> >fprintf (asm_out_file, "\tjmp\t");
> >assemble_name (asm_out_file, thunk_name);
> >putc ('\n', asm_out_file);
> > +  if ((ix86_harden_sls & harden_sls_indirect_branch))
> > +   fputs ("\tint3\n", asm_out_file);
> >  }
> >else
> >  output_indirect_thunk (regno);
> > @@ -16212,10 +16216,14 @@ ix86_output_indirect_jmp (rtx call_op)
> > gcc_unreachable ();
> >
> >ix86_output_indirect_branch (call_op, "%0", true);
> > -  return "";
> > +  if ((ix86_harden_sls & harden_sls_indirect_branch))
> > +   return "int3";
> > +  else
> > +   return "";
> >  }
> >else
> > -return "%!jmp\t%A0";
> > +return ((ix86_harden_sls & harden_sls_indirect_branch)
> > +   ? "%!jmp\t%A0\n\tint3" : "%!jmp\t%A0");
> >  }
>
> Just change existing returns to fputs and end function with:
>
> return (ix86_harden_sls & harden_sls_indirect_branch) ? "int3" : "";

But fputs doesn't support %A0.

> >  /* Output return instrumentation for current function if needed.  */
> > @@ -16283,10 +16291,15 @@ ix86_output_function_return (bool long_p)
> >return "";
> >  }
> >
> > -  if (!long_p)
> > -return "%!ret";
> > +  if ((ix86_harden_sls & harden_sls_return))
> > +return "%!ret\n\tint3";
> > +  else
> > +{
> > +  if (!long_p)
> > +   return "%!ret";
> >
> > -  return "rep%; ret";
> > +  return "rep%; ret";
> > +}
> >  }
>
> Also here.

But fputs doesn't know "%!".

>
> >
> >  /* Output indirect function return.  RET_OP is the function return
> > @@ -16381,7 +16394,12 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
> >if (output_indirect_p && !direct_p)
> > ix86_output_indirect_branch (call_op, xasm, true);
> >else
> > -   output_asm_insn (xasm, _op);
> > +   {
> > + 

  1   2   >