[PATCH] i386: Combine the FADD(A, FMA(B, C, 0)) to FMA(B, C, A) and combine FADD(A, FMUL(B, C)) to FMA(B, C, A).

2021-10-21 Thread Kong, Lingling via Gcc-patches
Hi,

This patch is to support transform in fast-math something like 
_mm512_add_ph(x1, _mm512_fmadd_pch(a, b, _mm512_setzero_ph())) to  
_mm512_fmadd_pch(a, b, x1).

And support transform _mm512_add_ph(x1, _mm512_fmul_pch(a, b)) to 
_mm512_fmadd_pch(a, b, x1).
Ok for master?

gcc/ChangeLog:

* config/i386/sse.md (fma__fadd_fmul): Add new
define_insn_and_split.
(fma__fadd_fcmul):Likewise
(fma___fma_zero):Likewise

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512fp16-complex-fma.c: New test.
---
 gcc/config/i386/sse.md| 52 +++
 .../gcc.target/i386/avx512fp16-complex-fma.c  | 18 +++
 2 files changed, 70 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 
fbf056bf9e6..36407ca4a59 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5958,6 +5958,58 @@
   [(set_attr "type" "ssemuladd")
(set_attr "mode" "")])
 
+(define_insn_and_split "fma__fadd_fmul"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+   (plus:VF_AVX512FP16VL
+ (unspec:VF_AVX512FP16VL
+   [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+(match_operand:VF_AVX512FP16VL 2 "vector_operand")]
+UNSPEC_COMPLEX_FMUL)
+ (match_operand:VF_AVX512FP16VL 3 "vector_operand")))]
+  "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
+  && ix86_pre_reload_split()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:VF_AVX512FP16VL
+ [(match_dup 1) (match_dup 2) (match_dup 3)]
+  UNSPEC_COMPLEX_FMA))])
+
+(define_insn_and_split "fma__fadd_fcmul"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+   (plus:VF_AVX512FP16VL
+ (unspec:VF_AVX512FP16VL
+   [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+(match_operand:VF_AVX512FP16VL 2 "vector_operand")]
+UNSPEC_COMPLEX_FCMUL)
+ (match_operand:VF_AVX512FP16VL 3 "vector_operand")))]
+  "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
+  && ix86_pre_reload_split()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:VF_AVX512FP16VL
+ [(match_dup 1) (match_dup 2) (match_dup 3)]
+  UNSPEC_COMPLEX_FCMA))])
+
+(define_insn_and_split "fma___fma_zero"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+   (plus:VF_AVX512FP16VL
+ (unspec:VF_AVX512FP16VL
+   [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+(match_operand:VF_AVX512FP16VL 2 "vector_operand")
+(match_operand:VF_AVX512FP16VL 3 "const0_operand")]
+UNSPEC_COMPLEX_F_C_MA)
+ (match_operand:VF_AVX512FP16VL 4 "vector_operand")))]
+  "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
+  && ix86_pre_reload_split()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:VF_AVX512FP16VL
+ [(match_dup 1) (match_dup 2) (match_dup 4)]
+  UNSPEC_COMPLEX_F_C_MA))])
+
 (define_insn "___mask"
   [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=")
(vec_merge:VF_AVX512FP16VL
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c 
b/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c
new file mode 100644
index 000..2dfd369e785
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2 -Ofast" } */
+/* { dg-final { scan-assembler-times "vfmaddcph\[ 
+\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(
+?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-not "vaddph\[ 
+\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(
+?:\n|\[ \\t\]+#)"} } */
+/* { dg-final { scan-assembler-not "vfmulcph\[ 
+\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(
+?:\n|\[ \\t\]+#)"} } */
+/* { dg-final { scan-assembler-times "vfcmaddcph\[ 
+\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(
+?:\n|\[ \\t\]+#)" 2 } } */
+
+#include 
+volatile __m512h x1, x2, res, a, b;
+void extern
+avx512f_test (void)
+{
+  res = _mm512_add_ph (x1, _mm512_fmadd_pch (a, b, 
+_mm512_setzero_ph()));
+  res = _mm512_add_ph (x1, _mm512_fcmadd_pch (a, b, 
+_mm512_setzero_ph()));
+
+  res = _mm512_add_ph (x1, _mm512_fmul_pch (a, b));
+  res = _mm512_add_ph (x1, _mm512_fcmul_pch (a, b)); }
--
2.18.1



[PATCH] Canonicalize __atomic/sync_fetch_or/xor/and for constant mask.

2021-10-21 Thread liuhongt via Gcc-patches
Hi:
 This patch is try to canoicalize bit_and and nop_convert order for
__atomic_fetch_or_*, __atomic_fetch_xor_*,
__atomic_xor_fetch_*,__sync_fetch_and_or_*,
__sync_fetch_and_xor_*,__sync_xor_and_fetch_*,
__atomic_fetch_and_*,__sync_fetch_and_and_* when mask is constant.

.i.e.

+/* Canonicalize
+  _1 = __atomic_fetch_or_4 (, 1, 0);
+  _2 = (int) _1;
+  _5 = _2 & 1;
+
+to
+
+  _1 = __atomic_fetch_or_4 (, 1, 0);
+  _2 = _1 & 1;
+  _5 = (int) _2;

+/* Convert
+ _1 = __atomic_fetch_and_4 (a_6(D), 4294959103, 0);
+ _2 = (int) _1;
+ _3 = _2 & 8192;
+to
+  _1 = __atomic_fetch_and_4 (a_4(D), 4294959103, 0);
+  _7 = _1 & 8192;
+  _6 = (int) _7;
+ So it can be handled by  optimize_atomic_bit_test_and.  */

I'm trying to rewrite match part in match.pd and find the
canonicalization is ok when mask is constant, but not for variable
since it will be simplified back by
 /* In GIMPLE, getting rid of 2 conversions for one new results
in smaller IL.  */
 (simplify
  (convert (bitop:cs@2 (nop_convert:s @0) @1))
  (if (GIMPLE
   && TREE_CODE (@1) != INTEGER_CST
   && tree_nop_conversion_p (type, TREE_TYPE (@2))
   && types_match (type, @0))
   (bitop @0 (convert @1)

The canonicalization for variabled is like

convert
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
 _4 = (int) _3;
 _5 = _4 & mask_7;

to
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
  _4 = (unsigned int) mask_7
  _6 = _3 & _4
  _5 = (int) _6

and be simplified back.

I've also tried another way of simplication like

convert
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
 _4 = (int) _3;
 _5 = _4 & mask_7;

to
  _1 = (unsigned int)mask_7;
  _2 = ~ _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
   _6 = _3 & _1
  _5 = (int)

but it's prevent by below since __atomic_fetch_and_4 is not CONST, but
we need to regenerate it with updated parameter.

  /* We can't and should not emit calls to non-const functions.  */
  if (!(flags_from_decl_or_type (decl) & ECF_CONST))
return NULL;


  Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
  Ok for trunk?

gcc/ChangeLog:

* match.pd: Canonicalize bit_and and nop_convert order for
__atomic/sync_fetch_or/xor/and for when mask is constant.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr102566-1a.c: New test.
* gcc.target/i386/pr102566-2a.c: New test.
---
 gcc/match.pd| 118 
 gcc/testsuite/gcc.target/i386/pr102566-1a.c |  66 +++
 gcc/testsuite/gcc.target/i386/pr102566-2a.c |  65 +++
 3 files changed, 249 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2a.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 5bed2e12715..06b369d1ab1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -104,6 +104,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (define_operator_list COND_TERNARY
   IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
 
+/* __atomic_fetch_or_*, __atomic_fetch_xor_*, __atomic_xor_fetch_*  */
+(define_operator_list ATOMIC_FETCH_OR_XOR_N
+  BUILT_IN_ATOMIC_FETCH_OR_1 BUILT_IN_ATOMIC_FETCH_OR_2
+  BUILT_IN_ATOMIC_FETCH_OR_4 BUILT_IN_ATOMIC_FETCH_OR_8
+  BUILT_IN_ATOMIC_FETCH_OR_16
+  BUILT_IN_ATOMIC_FETCH_XOR_1 BUILT_IN_ATOMIC_FETCH_XOR_2
+  BUILT_IN_ATOMIC_FETCH_XOR_4 BUILT_IN_ATOMIC_FETCH_XOR_8
+  BUILT_IN_ATOMIC_FETCH_XOR_16
+  BUILT_IN_ATOMIC_XOR_FETCH_1 BUILT_IN_ATOMIC_XOR_FETCH_2
+  BUILT_IN_ATOMIC_XOR_FETCH_4 BUILT_IN_ATOMIC_XOR_FETCH_8
+  BUILT_IN_ATOMIC_XOR_FETCH_16)
+/* __sync_fetch_and_or_*, __sync_fetch_and_xor_*, __sync_xor_and_fetch_*  */
+(define_operator_list SYNC_FETCH_OR_XOR_N
+  BUILT_IN_SYNC_FETCH_AND_OR_1 BUILT_IN_SYNC_FETCH_AND_OR_2
+  BUILT_IN_SYNC_FETCH_AND_OR_4 BUILT_IN_SYNC_FETCH_AND_OR_8
+  BUILT_IN_SYNC_FETCH_AND_OR_16
+  BUILT_IN_SYNC_FETCH_AND_XOR_1 BUILT_IN_SYNC_FETCH_AND_XOR_2
+  BUILT_IN_SYNC_FETCH_AND_XOR_4 BUILT_IN_SYNC_FETCH_AND_XOR_8
+  BUILT_IN_SYNC_FETCH_AND_XOR_16
+  BUILT_IN_SYNC_XOR_AND_FETCH_1 BUILT_IN_SYNC_XOR_AND_FETCH_2
+  BUILT_IN_SYNC_XOR_AND_FETCH_4 BUILT_IN_SYNC_XOR_AND_FETCH_8
+  BUILT_IN_SYNC_XOR_AND_FETCH_16)
+/* __atomic_fetch_and_*.  */
+(define_operator_list ATOMIC_FETCH_AND_N
+  BUILT_IN_ATOMIC_FETCH_AND_1 BUILT_IN_ATOMIC_FETCH_AND_2
+  BUILT_IN_ATOMIC_FETCH_AND_4 BUILT_IN_ATOMIC_FETCH_AND_8
+  BUILT_IN_ATOMIC_FETCH_AND_16)
+/* __sync_fetch_and_and_*.  */
+(define_operator_list SYNC_FETCH_AND_AND_N
+  BUILT_IN_SYNC_FETCH_AND_AND_1 BUILT_IN_SYNC_FETCH_AND_AND_2
+  BUILT_IN_SYNC_FETCH_AND_AND_4 BUILT_IN_SYNC_FETCH_AND_AND_8
+  BUILT_IN_SYNC_FETCH_AND_AND_16)
+
 /* With nop_convert? combine convert? and view_convert? in one pattern
plus conditionalize on tree_nop_conversion_p conversions.  */
 (match (nop_convert @0)
@@ -3907,6 +3940,91 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (vec_cond @0 (op! @3 @1) (op! @3 @2
 #endif
 
+#if 

[Bug bootstrap/102681] [12 Regression] AArch64 bootstrap failure

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102681

--- Comment #16 from Andrew Pinski  ---
(In reply to Andrew Pinski from comment #15)
> We totally missed the jump threading of 3->5->7 path and the 4->5->8 path.

  FAIL: path through PHI in bb8 (incoming bb:6) crosses loop

But but, it does not exactly cross the loop as 5 (6) is not part of the loop
but rather just 8.

[Bug bootstrap/102681] [12 Regression] AArch64 bootstrap failure

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102681

--- Comment #15 from Andrew Pinski  ---
So the major difference comes from mark_stack_region_used.
We have a missing jump thread in ethread.

Before the patch, ethread was able to jump thread all the way through:
  if (_13 != 0)
goto ; [5.50%]
  else
goto ; [94.50%]

   :
  # _22 = PHI <0(2)>
  goto ; [INV]

   :
  # _18 = PHI <1(2)>
  _15 = upper_bound.coeffs[0];
  goto ; [100.00%]

   :

But after we get:

   :
  _13 = upper_bound.coeffs[1];
  if (_13 != 0)
goto ; [5.50%]
  else
goto ; [94.50%]

   :
  # _22 = PHI <0(2)>
  goto ; [100.00%]

   :
  # _9 = PHI <1(2)>
  _15 = upper_bound.coeffs[0];

   :
  # _16 = PHI <0(3), 1(4)>
  # const_upper_20 = PHI 
  if (_16 != 0)
goto ; [INV]
  else
goto ; [INV]

We totally missed the jump threading of 3->5->7 path and the 4->5->8 path.

Aldy,
  Can you look into why there is a missing jump threading there?

Re: [PATH][_GLIBCXX_DEBUG] Fix unordered container merge

2021-10-21 Thread François Dumont via Gcc-patches

On 21/10/21 6:55 pm, Jonathan Wakely wrote:



On Thu, 21 Oct 2021 at 17:52, François Dumont > wrote:


I eventually would like to propose a different approach.

I am adding a hook in normal implementation to let the
_GLIBCXX_DEBUG code know when a node is being extracted. This way
invalidation is only done by comparing nodes, no need to compute
hash code for this operation.


Ugh, this is horrible, I don't like the normal mode depending on the 
debug mode (even if it's just having to add hooks like this).


Yes, I was relunctant to do so but in this case I was not able to find 
another way to provide the same result as here.


Ok, I'll come back to the other patch and just invalidate all iterators 
in case of exception.




The previous patch seemed fine to me. Already an improvement on what 
is on trunk now.






Re: assembler errors when bootstrapping with #pragma optimize "0"

2021-10-21 Thread Andrew Pinski via Gcc
On Thu, Oct 21, 2021 at 5:07 PM Martin Sebor via Gcc  wrote:
>
> I put #pragma GCC optimize "0" at the top of gimplify.c to help
> me debug something in a bootstrapped compiler.  The file failed
> to compile with many assembler errors like this:
>
> /tmp/ccL9zcXD.s: Assembler messages:
> /tmp/ccL9zcXD.s:9: Error: CFI instruction used without previous
> .cfi_startproc
>
> I've done this before and had no problems.  Is this supposed to
> work or was I just lucky when it did before?

I see that dwarf2out_do_cfi_asm is sticky, once true or false, it will
always return true or false.
So it might be an issue there.

Thanks,
Andrew Pinski


>
> Thanks
> Martin
>
> PS The top of gimplify.s is below (this is with no other code
> changes to any files except the #pragma).
>
> .file   "gimplify.c"
> .text
> .local  _ZZ20gimplify_va_arg_exprPP9tree_nodePP6gimpleS4_E9gave_help
> .comm   
> _ZZ20gimplify_va_arg_exprPP9tree_nodePP6gimpleS4_E9gave_help,1,1
> .p2align 4
> .type   _ZL19handled_component_pPK9tree_node, @function
> _ZL19handled_component_pPK9tree_node:
> pushq   %rbp
> .cfi_def_cfa_offset 16
> .cfi_offset 6, -16
> movq%rsp, %rbp
> .cfi_def_cfa_register 6
> movq%rdi, -8(%rbp)
> movq-8(%rbp), %rax
> movzwl  (%rax), %eax
> movzwl  %ax, %eax
> subl$47, %eax
> cmpl$6, %eax
> ja  .L2
> movl$1, %eax
> jmp .L3
> .L2:
> movl$0, %eax
> .L3:
> popq%rbp
> .cfi_def_cfa 7, 8
> ret


Re: [PATCH] Try to resolve paths in threader without looking further back.

2021-10-21 Thread Aldy Hernandez via Gcc-patches
On Fri, Oct 22, 2021, 05:34 Jeff Law  wrote:

>
>
> On 10/21/2021 4:15 AM, Aldy Hernandez wrote:
> > On Wed, Oct 20, 2021 at 10:19 PM Jeff Law  wrote:
> >> So we want to keep some form of ssa-dom-thread-7.  That' s the canonical
> >> testcase for the case for the FSM optimization.
> >>
> >> What we need to verify is that we thread jumps across the backedge of
> >> the loop through the switch statement to a particular case (thus
> >> bypassing the indirect jump for the switch statement).  How to do that
> >> in a way that's easier to manage?  I have no clue.  I guess a gimple-fe
> >> based test might help.
> > Ah, I see.
> >
> > After lots of pain, I was able to distill a tiny testcase that does
> > just that (tree-ssa/ssa-thread-backedge.c), and is easy to maintain.
> > I've added a "backedge" marker to the path dumping code for easy
> > matching in the test.  An alternative would be to convert it to a
> > gimple FE test examining the exact thread sequence through the
> > backedge, but I think that's overkill since the test is so small.
> Well, and the worry with a smaller testcase is reducing too far with the
> result not really being representative of the issue.  This actually
> happened during the development of the FSM bits.  I got a test from the
> ARM guys, evaluated it and concluded it could be addressed with the
> forward threader  Then I did the implementation work.  Once done
> they said it didn't work and gave me a better testcase which had more
> "join" blocks we would have had to copy to realize the important jump
> threads.  At which point Steve E's FSM threader was the only viable choice.
>
>
> >
> > Phew, I think we're finally converging on a useful set of threading
> tests :).
> >
> > OK for trunk?
> Mostly, I just worry about losing the key test for the FSM optimization.


With the provided test, the forward threaders can't thread through the
backedge and into the switch. Disabling the other threaders was just a
precaution. I just wanted to make sure it happened late because of the loop
restrictions we have in place. I could enable the forward threaders to
prove they can't get it.

Also, we have an assert noting that we never thread through backedges in
the forward threaders. It was part of the refactor. So the forward
threaders can't even do it.

I could add more cases and check that we have N or more threads through the
back edges. .and if it makes you feel safer, we could even convert the test
to gimple and test the specific thread sequence. It's just that the gimple
FE test is bound to get large and difficult to decipher if I start adding
many switch cases.

I'm just trying to avoid a huge test with 40 potential threads where no one
really knows how many we should getas every threading pass opens up
possibilities for other passes.

Ugwe could put the test back, check for some random large number,
and come up with a more satisfactory test later? ;-)

Aldy


Re: [PATCH] Try to resolve paths in threader without looking further back.

2021-10-21 Thread Jeff Law via Gcc-patches




On 10/21/2021 4:15 AM, Aldy Hernandez wrote:

On Wed, Oct 20, 2021 at 10:19 PM Jeff Law  wrote:

So we want to keep some form of ssa-dom-thread-7.  That' s the canonical
testcase for the case for the FSM optimization.

What we need to verify is that we thread jumps across the backedge of
the loop through the switch statement to a particular case (thus
bypassing the indirect jump for the switch statement).  How to do that
in a way that's easier to manage?  I have no clue.  I guess a gimple-fe
based test might help.

Ah, I see.

After lots of pain, I was able to distill a tiny testcase that does
just that (tree-ssa/ssa-thread-backedge.c), and is easy to maintain.
I've added a "backedge" marker to the path dumping code for easy
matching in the test.  An alternative would be to convert it to a
gimple FE test examining the exact thread sequence through the
backedge, but I think that's overkill since the test is so small.
Well, and the worry with a smaller testcase is reducing too far with the 
result not really being representative of the issue.  This actually 
happened during the development of the FSM bits.  I got a test from the 
ARM guys, evaluated it and concluded it could be addressed with the 
forward threader  Then I did the implementation work.  Once done 
they said it didn't work and gave me a better testcase which had more 
"join" blocks we would have had to copy to realize the important jump 
threads.  At which point Steve E's FSM threader was the only viable choice.





Phew, I think we're finally converging on a useful set of threading tests :).

OK for trunk?

Mostly, I just worry about losing the key test for the FSM optimization.

Jeff



[Bug middle-end/102566] [i386] GCC should emit LOCK BTS for simple bit-test-and-set operations with std::atomic

2021-10-21 Thread crazylht at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102566

--- Comment #28 from Hongtao.liu  ---
Can be optimize

int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;

void
gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval)
{
  /* First loop spins a while.  */
  while (oldval == 1)
{
  if (do_spin (mutex, 1))
{
  /* Spin timeout, nothing changed.  Set waiting flag.  */
  oldval = __atomic_exchange_n (mutex, -1, MEMMODEL_ACQUIRE);
  if (oldval == 0)
return;
  futex_wait (mutex, -1);
  break;
}
  else
{
  /* Something changed.  If now unlocked, we're good to go.  */
  oldval = 0;
  if (__atomic_compare_exchange_n (mutex, , 1, false,
   MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
return;
}
}

  /* Second loop waits until mutex is unlocked.  We always exit this
 loop with wait flag set, so next unlock will awaken a thread.  */
  while ((oldval = __atomic_exchange_n (mutex, -1, MEMMODEL_ACQUIRE)))
do_wait (mutex, -1);
}

with _atomic_fetch_or/and/xor ?

Re: Ping^3: [PATCH v2 0/2] Fix vec_sel code generation and merge xxsel to vsel

2021-10-21 Thread Xionghu Luo via Gcc-patches
Ping^3, thanks.

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579637.html


On 2021/10/15 14:28, Xionghu Luo via Gcc-patches wrote:
> Ping^2, thanks.
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579637.html
> 
> 
> On 2021/10/8 09:17, Xionghu Luo via Gcc-patches wrote:
>> Ping, thanks.
>>
>>
>> On 2021/9/17 13:25, Xionghu Luo wrote:
>>> These two patches are updated version from:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579490.html
>>>
>>> Changes:
>>> 1. Fix alignment error in md files.
>>> 2. Replace rtx_equal_p with match_dup.
>>> 3. Use register_operand instead of gpc_reg_operand to align with
>>>vperm/xxperm.
>>> 4. Regression tested pass on P8LE.
>>>
>>> Xionghu Luo (2):
>>>   rs6000: Fix wrong code generation for vec_sel [PR94613]
>>>   rs6000: Fold xxsel to vsel since they have same semantics
>>>
>>>  gcc/config/rs6000/altivec.md  | 84 ++-
>>>  gcc/config/rs6000/rs6000-call.c   | 62 ++
>>>  gcc/config/rs6000/rs6000.c| 19 ++---
>>>  gcc/config/rs6000/vector.md   | 26 +++---
>>>  gcc/config/rs6000/vsx.md  | 25 --
>>>  gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
>>>  gcc/testsuite/gcc.target/powerpc/pr94613.c| 47 +++
>>>  7 files changed, 193 insertions(+), 72 deletions(-)
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr94613.c
>>>
>>
> 

-- 
Thanks,
Xionghu


Sddm.net is for sale!

2021-10-21 Thread Mark James Bautista via Gcc
Hello, my name is James from TDS. We have a domain that is currently on
sale that you might be interested in - *Sddm.net*

Anytime someone types SDDM, or any other phrase with these keywords into
their browser, your site could be the first they see!

The internet is the most efficient way to acquire new customers

Avg Google Search Results for this domain is: 1,210,000
You can easily redirect all the traffic this domain gets to your current
site!

*GoDaddy.com* appraises this domain at $1,076.

Priced at only $998 for a limited time! If interested please go to *Sddm.net

*and select Buy Now, or purchase directly at GoDaddy.
Act Fast! First person to select Buy Now gets it!

Thank you very much for your time.
Top Domain Sellers (TDS)
Mark James Bautista
[image: beacon]


[Bug fortran/100910] Bind(c): errors handling long double complex

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100910

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 CC||sandra at gcc dot gnu.org
 Resolution|--- |FIXED

--- Comment #5 from sandra at gcc dot gnu.org ---
This is fixed now.  There is adequate test coverage in the c-interop testsuite;
typecodes-array-longdouble.f90 and typecodes-scalar-longdouble.f90.

[Bug fortran/100915] Bind(c): failure handling C_FUNPTR

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100915

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|UNCONFIRMED |RESOLVED
 CC||sandra at gcc dot gnu.org

--- Comment #5 from sandra at gcc dot gnu.org ---
Marking this as fixed now.

[Bug fortran/100911] Bind(c): failure handling C_PTR

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100911

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED
 CC||sandra at gcc dot gnu.org

--- Comment #5 from sandra at gcc dot gnu.org ---
Fixed now, closing.

[Bug fortran/100914] Bind(c): errors handling complex

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100914

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|ASSIGNED|RESOLVED

--- Comment #9 from sandra at gcc dot gnu.org ---
I think it's safe to close this issue now.  The testcase is in and still passes
after Tobias's changes.

[PATCH, Fortran] Add testcase for PR100906

2021-10-21 Thread Sandra Loosemore
PR100906 ("Bind(c): failure handling character with len/=1") has been 
fixed by Tobias's rewrite of the GFC <-> C descriptor conversions.  I'd 
like to add José's testcase for that issue before closing it.  OK?


-Sandra
commit 4c2fa9cf74162015710ccfd913c827779151aa52
Author: Sandra Loosemore 
Date:   Thu Oct 21 19:17:50 2021 -0700

Add testcase for PR fortran/100906

2021-10-21  José Rui Faustino de Sousa  
	Sandra Loosemore  

	gcc/testsuite/

	PR fortran/100906
	* gfortran.dg/PR100906.f90: New.
	* gfortran.dg/PR100906.c: New.

diff --git a/gcc/testsuite/gfortran.dg/PR100906.c b/gcc/testsuite/gfortran.dg/PR100906.c
new file mode 100644
index 000..f71d567
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/PR100906.c
@@ -0,0 +1,169 @@
+/* Test the fix for PR100906 */
+
+#include 
+#include 
+#include 
+#include 
+/* #include  */
+
+#include 
+
+#define _CFI_type_mask 0xFF
+#define _CFI_type_kind_shift 8
+
+#define _CFI_decode_type(NAME) (signed char)((NAME) & CFI_type_mask)
+#define _CFI_decode_kind(NAME) (signed char)(((NAME) >> CFI_type_kind_shift) & CFI_type_mask)
+
+#define _CFI_encode_type(TYPE, KIND) (int16_t)\
+KIND) & CFI_type_mask) << CFI_type_kind_shift)\
+ | ((TYPE) & CFI_type_mask))
+
+#define N 11
+#define M 7
+
+typedef char c_char;
+/* typedef char32_t c_ucs4_char; */
+typedef uint32_t char32_t;
+typedef uint32_t c_ucs4_char;
+ 
+bool charcmp (char *, char, size_t);
+
+bool ucharcmp (char32_t *, char32_t, size_t);
+
+bool c_vrfy_c_char (const CFI_cdesc_t *restrict, const size_t);
+
+bool c_vrfy_c_ucs4_char (const CFI_cdesc_t *restrict, const size_t);
+
+bool c_vrfy_character (const CFI_cdesc_t *restrict, const size_t);
+ 
+void check_tk (const CFI_cdesc_t*restrict, const CFI_type_t, const signed char, const size_t, const size_t);
+
+bool
+charcmp (char *c, char v, size_t n)
+{
+  bool res = true;
+  char b = (char)'A';
+  size_t i;
+
+  for (i=0; ((ibase_addr);
+  assert (auxp->elem_len>0);
+  lb = auxp->dim[0].lower_bound;
+  ex = auxp->dim[0].extent;
+  assert (ex==N);
+  sz = (size_t)auxp->elem_len / sizeof (c_char);
+  assert (sz==len);
+  ub = ex + lb - 1;
+  ip = (c_char*)auxp->base_addr;
+  for (i=0; ibase_addr);
+  assert (auxp->elem_len>0);
+  lb = auxp->dim[0].lower_bound;
+  ex = auxp->dim[0].extent;
+  assert (ex==N);
+  sz = (size_t)auxp->elem_len / sizeof (c_ucs4_char);
+  assert (sz==len);
+  ub = ex + lb - 1;
+  ip = (c_ucs4_char*)auxp->base_addr;
+  for (i=0; itype);
+  kind = _CFI_decode_kind(auxp->type);
+  assert (type == CFI_type_Character);
+  switch (kind)
+{
+case 1:
+  return c_vrfy_c_char (auxp, len);
+  break;
+case 4:
+  return c_vrfy_c_ucs4_char (auxp, len);
+  break;
+default:
+  assert (false);
+}
+  return true;
+}
+
+void
+check_tk (const CFI_cdesc_t *restrict auxp, const CFI_type_t type, const signed char kind, const size_t elem_len, const size_t nelem)
+{
+  signed char ityp, iknd;
+
+  assert (auxp);
+  assert (auxp->elem_len==elem_len*nelem);
+  assert (auxp->rank==1);
+  assert (auxp->dim[0].sm>0);
+  assert ((size_t)auxp->dim[0].sm==elem_len*nelem);
+  /*  */
+  assert (auxp->type==type);
+  ityp = _CFI_decode_type(auxp->type);
+  assert (ityp == CFI_type_Character);
+  iknd = _CFI_decode_kind(auxp->type);
+  assert (_CFI_decode_type(type)==ityp);
+  assert (kind==iknd);
+  assert (c_vrfy_character (auxp, nelem));
+  return;
+}
+
+// Local Variables:
+// mode: C
+// End:
diff --git a/gcc/testsuite/gfortran.dg/PR100906.f90 b/gcc/testsuite/gfortran.dg/PR100906.f90
new file mode 100644
index 000..f6cb3af
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/PR100906.f90
@@ -0,0 +1,1699 @@
+! { dg-do run }
+! { dg-additional-sources PR100906.c }
+!
+! Test the fix for PR100906
+! 
+
+module isof_m
+
+  use, intrinsic :: iso_c_binding, only: &
+c_signed_char, c_int16_t
+  
+  implicit none
+
+  private
+  
+  public :: &
+CFI_type_character
+
+  public :: &
+CFI_type_char,  &
+CFI_type_ucs4_char
+ 
+  public ::  &
+check_tk_as, &
+check_tk_ar
+  
+  
+  public ::  &
+cfi_encode_type
+  
+  integer, parameter :: CFI_type_t = c_int16_t
+  
+  integer(kind=c_int16_t), parameter :: CFI_type_mask = int(z"FF", kind=c_int16_t)
+  integer(kind=c_int16_t), parameter :: CFI_type_kind_shift = 8_c_int16_t
+
+  ! Intrinsic types. Their kind number defines their storage size. */
+  integer(kind=c_signed_char), parameter :: CFI_type_Character = 5
+
+  ! C-Fortran Interoperability types.
+  integer(kind=cfi_type_t), parameter :: CFI_type_char  = &
+ior(int(CFI_type_Character, kind=c_int16_t), shiftl(1_c_int16_t, CFI_type_kind_shift))
+  integer(kind=cfi_type_t), parameter :: CFI_type_ucs4_char = &
+ior(int(CFI_type_Character, kind=c_int16_t), shiftl(4_c_int16_t, CFI_type_kind_shift))
+
+  interface
+subroutine check_tk_as(a, t, k, e, n) &
+  bind(c, name="check_tk")
+  use, intrinsic :: iso_c_binding, 

Re: [PATCH] hardened conditionals

2021-10-21 Thread Alexandre Oliva via Gcc-patches
On Oct 20, 2021, Alexandre Oliva  wrote:

> I suppose it's a latent issue exposed by the patch,

I was mistaken.  Though I even had bisected the -fcompare-debug problem
back to a patch from back in May, that added a new sink_code pass before
store_merging, it was actually a bug in my patch, it was just a little
hard to hit with bootstrap-debug, but it came up with -fcompare-debug in
ways that misled me.

split_block remains slightly risky to use unless you know you have or
are going to insert nondebug stmts/insns in both blocks.  I've often
pondered warning in case split_block completes with only debug
stmts/insns in either block, but IIRC there are multiple passes that
split first and insert code afterwards, which have to be rearranged to
aovid the warning.

Anyway, here's the fixed patch.  Regstrapped on x86_64-linux-gnu, and
bootstrapped with an additional patch that enables both new passes.  Ok
to install?


hardened conditionals

From: Alexandre Oliva 

This patch introduces optional passes to harden conditionals used in
branches, and in computing boolean expressions, by adding redundant
tests of the reversed conditions, and trapping in case of unexpected
results.  Though in abstract machines the redundant tests should never
fail, CPUs may be led to misbehave under certain kinds of attacks,
such as of power deprivation, and these tests reduce the likelihood of
going too far down an unexpected execution path.


for  gcc/ChangeLog

* common.opt (fharden-compares): New.
(fharden-conditional-branches): New.
* doc/invoke.texi: Document new options.
* gimple-harden-conditionals.cc: New.
* passes.def: Add new passes.
* tree-pass.h (make_pass_harden_compares): Declare.
(make_pass_harden_conditional_branches): Declare.

for  gcc/ada/ChangeLog

* doc/gnat_rm/security_hardening_features.rst
(Hardened Conditionals): New.

for  gcc/testsuite/ChangeLog

* c-c++-common/torture/harden-comp.c: New.
* c-c++-common/torture/harden-cond.c: New.
---
 gcc/Makefile.in|1 
 .../doc/gnat_rm/security_hardening_features.rst|   40 ++
 gcc/common.opt |8 
 gcc/doc/invoke.texi|   19 +
 gcc/gimple-harden-conditionals.cc  |  439 
 gcc/passes.def |2 
 gcc/testsuite/c-c++-common/torture/harden-comp.c   |   14 +
 gcc/testsuite/c-c++-common/torture/harden-cond.c   |   18 +
 gcc/tree-pass.h|3 
 9 files changed, 544 insertions(+)
 create mode 100644 gcc/gimple-harden-conditionals.cc
 create mode 100644 gcc/testsuite/c-c++-common/torture/harden-comp.c
 create mode 100644 gcc/testsuite/c-c++-common/torture/harden-cond.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index f36ffa4740b78..a79ff93dd5999 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1389,6 +1389,7 @@ OBJS = \
gimple-if-to-switch.o \
gimple-iterator.o \
gimple-fold.o \
+   gimple-harden-conditionals.o \
gimple-laddress.o \
gimple-loop-interchange.o \
gimple-loop-jam.o \
diff --git a/gcc/ada/doc/gnat_rm/security_hardening_features.rst 
b/gcc/ada/doc/gnat_rm/security_hardening_features.rst
index 1c46e3a4c7b88..52240d7e3dd54 100644
--- a/gcc/ada/doc/gnat_rm/security_hardening_features.rst
+++ b/gcc/ada/doc/gnat_rm/security_hardening_features.rst
@@ -87,3 +87,43 @@ types and subtypes, may be silently ignored.  Specifically, 
it is not
 currently recommended to rely on any effects this pragma might be
 expected to have when calling subprograms through access-to-subprogram
 variables.
+
+
+.. Hardened Conditionals:
+
+Hardened Conditionals
+=
+
+GNAT can harden conditionals to protect against control flow attacks.
+
+This is accomplished by two complementary transformations, each
+activated by a separate command-line option.
+
+The option *-fharden-compares* enables hardening of compares that
+compute results stored in variables, adding verification that the
+reversed compare yields the opposite result.
+
+The option *-fharden-conditional-branches* enables hardening of
+compares that guard conditional branches, adding verification of the
+reversed compare to both execution paths.
+
+These transformations are introduced late in the compilation pipeline,
+long after boolean expressions are decomposed into separate compares,
+each one turned into either a conditional branch or a compare whose
+result is stored in a boolean variable or temporary.  Compiler
+optimizations, if enabled, may also turn conditional branches into
+stored compares, and vice-versa.  Conditionals may also be optimized
+out entirely, if their value can be determined at compile time, and
+occasionally multiple compares can be combined into one.
+
+It is thus difficult to predict which of these two options will affect
+a 

Re: assembler errors when bootstrapping with #pragma optimize "0"

2021-10-21 Thread Martin Sebor via Gcc

On 10/21/21 6:10 PM, Tom Kacvinsky via Gcc wrote:

On Thu, Oct 21, 2021 at 8:06 PM Martin Sebor via Gcc 
wrote:


I put #pragma GCC optimize "0" at the top of gimplify.c to help
me debug something in a bootstrapped compiler.  The file failed
to compile with many assembler errors like this:

/tmp/ccL9zcXD.s: Assembler messages:
/tmp/ccL9zcXD.s:9: Error: CFI instruction used without previous
.cfi_startproc

I've done this before and had no problems.  Is this supposed to
work or was I just lucky when it did before?



I know the binutils people have been doing work with CFI stuff, so perhaps
this is a binutils
issue?  Which version of binutils are you using?  A newer version
of binutils (or perhaps older)
might make a difference.


I don't think the problem is Binutils.  The assembly emitted
by GCC changes with the pragma: the functions are missing
.cfi_startproc and .cfi_endproc directives, but have other
.cfi directives.  This happens even when I remove the #pragma
from the file, and the only difference is that I'm compiling
the file with a compiler that includes the same file compiled
with #pragma optimize "0".  So GCC emits different assembly
depending on whether or not its gimplify.c was compiled
optimized.  That seems like a symptom of miscompilation
to me.  The GCC binary, by the way, is stage 2.

Martin



Regards,

Tom





[Bug libstdc++/102882] [AIX] 23_containers 96088 testsuite failures

2021-10-21 Thread dje at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102882

David Edelsohn  changed:

   What|Removed |Added

 Ever confirmed|0   |1
 Status|UNCONFIRMED |NEW
   Last reconfirmed||2021-10-22

--- Comment #3 from David Edelsohn  ---
The patch for unordered_set fixes that testcase.

Re: assembler errors when bootstrapping with #pragma optimize "0"

2021-10-21 Thread Tom Kacvinsky via Gcc
On Thu, Oct 21, 2021 at 8:06 PM Martin Sebor via Gcc 
wrote:

> I put #pragma GCC optimize "0" at the top of gimplify.c to help
> me debug something in a bootstrapped compiler.  The file failed
> to compile with many assembler errors like this:
>
> /tmp/ccL9zcXD.s: Assembler messages:
> /tmp/ccL9zcXD.s:9: Error: CFI instruction used without previous
> .cfi_startproc
>
> I've done this before and had no problems.  Is this supposed to
> work or was I just lucky when it did before?
>

I know the binutils people have been doing work with CFI stuff, so perhaps
this is a binutils
issue?  Which version of binutils are you using?  A newer version
of binutils (or perhaps older)
might make a difference.

Regards,

Tom


assembler errors when bootstrapping with #pragma optimize "0"

2021-10-21 Thread Martin Sebor via Gcc

I put #pragma GCC optimize "0" at the top of gimplify.c to help
me debug something in a bootstrapped compiler.  The file failed
to compile with many assembler errors like this:

/tmp/ccL9zcXD.s: Assembler messages:
/tmp/ccL9zcXD.s:9: Error: CFI instruction used without previous 
.cfi_startproc


I've done this before and had no problems.  Is this supposed to
work or was I just lucky when it did before?

Thanks
Martin

PS The top of gimplify.s is below (this is with no other code
changes to any files except the #pragma).

.file   "gimplify.c"
.text
.local  _ZZ20gimplify_va_arg_exprPP9tree_nodePP6gimpleS4_E9gave_help
.comm   _ZZ20gimplify_va_arg_exprPP9tree_nodePP6gimpleS4_E9gave_help,1,1
.p2align 4
.type   _ZL19handled_component_pPK9tree_node, @function
_ZL19handled_component_pPK9tree_node:
pushq   %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq%rsp, %rbp
.cfi_def_cfa_register 6
movq%rdi, -8(%rbp)
movq-8(%rbp), %rax
movzwl  (%rax), %eax
movzwl  %ax, %eax
subl$47, %eax
cmpl$6, %eax
ja  .L2
movl$1, %eax
jmp .L3
.L2:
movl$0, %eax
.L3:
popq%rbp
.cfi_def_cfa 7, 8
ret


[Bug c++/102508] ICE on coroutine when awaiting inside a statement expression (in transform_local_var_uses, at cp/coroutines.cc:2102)

2021-10-21 Thread kacper.slominski72 at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102508

--- Comment #1 from Kacper Słomiński  ---
Was about to report this myself; here is a minimal test case that reproduces
this issue without using any external libraries. It causes a slightly different
ICE in gimplify_expr in gimplify.c:14879 for GCC 11.2, but with small tweaks
you can get the one in transform_local_var_uses as well:

#include 

struct coro {
bool await_ready() { return true; }
void await_suspend(std::coroutine_handle<>) { }
int await_resume() { return 0; }

struct promise_type {
coro get_return_object() {
return {};
}

std::suspend_never initial_suspend() {
return {};
}

std::suspend_never final_suspend() noexcept {
return {};
}

template 
void return_value(T &&) {}

void unhandled_exception() {}
};
};

coro fn() { co_return 1; };

coro foo() {
({
auto ex = co_await fn();
co_return ex;
});
co_return 0;
}

[Bug fortran/100916] Bind(c): CFI_type_other unimplemented.

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100916

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 CC||sandra at gcc dot gnu.org

--- Comment #5 from sandra at gcc dot gnu.org ---
Jose's test case for this issue is still failing.

[Bug fortran/100907] Bind(c): failure handling wide character

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100907

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 CC||sandra at gcc dot gnu.org

--- Comment #9 from sandra at gcc dot gnu.org ---
Jose's test case for this issue is still failing.

[Bug bootstrap/102681] [12 Regression] AArch64 bootstrap failure

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102681

Andrew Pinski  changed:

   What|Removed |Added

  Attachment #51649|0   |1
is obsolete||

--- Comment #14 from Andrew Pinski  ---
Created attachment 51650
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51650=edit
Little more reduced

So FRE is able to figure out for the following:
  # _20 = PHI <0(2), 1(3)>
  # const_upper_26 = PHI 

  # _30 = PHI <0(12), 1(13)>
  # const_upper_33 = PHI 

That _30 is the same as _20 but not _26 is the same as _33 even though it does
figure out that _19 and _29 are the same as _10. If it is able to figure that
out, then things would just work.

Richi,
  I assume FRE does not Value number default SSA names (non-parm) the same
which is why this is happening is that correct?

gcc-9-20211021 is now available

2021-10-21 Thread GCC Administrator via Gcc
Snapshot gcc-9-20211021 is now available on
  https://gcc.gnu.org/pub/gcc/snapshots/9-20211021/
and on various mirrors, see http://gcc.gnu.org/mirrors.html for details.

This snapshot has been generated from the GCC 9 git branch
with the following options: git://gcc.gnu.org/git/gcc.git branch releases/gcc-9 
revision 5ed78f8bd84eb696579d928c816bc840664829b2

You'll find:

 gcc-9-20211021.tar.xzComplete GCC

  SHA256=0935a8ba1f42b262cff3d18785bb0c392f1fd383bbebf56ad3ffe6a240c80161
  SHA1=ebd47f4d8eb612919272d1808487e036b62b4355

Diffs from 9-20211014 are available in the diffs/ subdirectory.

When a particular snapshot is ready for public consumption the LATEST-9
link is updated and a message is sent to the gcc list.  Please do not use
a snapshot before it has been announced that way.


[Bug bootstrap/102681] [12 Regression] AArch64 bootstrap failure

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102681

Andrew Pinski  changed:

   What|Removed |Added

  Attachment #51648|0   |1
is obsolete||

--- Comment #13 from Andrew Pinski  ---
Created attachment 51649
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51649=edit
Reduced testcase

Reduced testcast attached, 68 lines. Which should be easier to figure out what
is going on.

Fwd: [committed] libstdc++: Improve generated man pages for libstdc++

2021-10-21 Thread Jonathan Wakely via Gcc-patches
I messed up the CC to gcc-patches for this.

And I forgot to mention in the commit msg that the reason I started looking
at stdheader.cc in the first place was to fix a use-after-free bug in the
old code:

-// come on, gdb, find `p' already...
-const char* p = longheader.substr(start).c_str();
-Map::iterator word = headers.find(p);


-- Forwarded message -
From: Jonathan Wakely via Libstdc++ 
Date: Thu, 21 Oct 2021 at 22:58
Subject: [committed] libstdc++: Improve generated man pages for libstdc++
To: , 


Tested x86_64-linux, committed to trunk.

The man pages generated by Doxygen show internal header files, not the
standard headers that users actually care about. The run_doxygen script
uses the doc/doxygen/stdheader.cc program to address that, but it
doesn't work. It only tries to fix headers with underscores in the
names, which doesn't work for  or .  It
isn't prepared for the strings like "bits/stl_set\&.h" that are produced
by Doxygen. It doesn't know about many headers that have been added
since it was written. And the run_doxygen script fails to use its output
correctly to modify the man pages. Additionally, run_doxygen doesn't
know about new nested namespaces like std::filesystem and std::ranges.

This change rewrites the stdheader.cc program to do a better job of
finding the right header. The run_doxygen script now uses the just-built
compiler to build stdheader.cc and actually uses its output. And the
script now knows about other nested namespaces.

The stdheader.cc program might be unnecessary if we consistently used
@headername tags in the Doxygen comments, but we don't (and probably
never will).

A problem that remains after this change is that all the free function
defined in namespace std get dumped into a single man page for std(3),
without detailed descriptions. We don't even install that std(3) page,
but remove it before installation. That means only classes are
documented in man pages (including many internal ones that should not be
publicly documented such as _Deque_base and _Tuple_impl).

libstdc++-v3/ChangeLog:

* doc/doxygen/stdheader.cc: Refactor. Use C++23. Add new
headers.
* scripts/run_doxygen: Fix post-processing of #include
directives in man pages. Use new xg++ to compile helper program.
---
 libstdc++-v3/doc/doxygen/stdheader.cc | 279 +-
 libstdc++-v3/scripts/run_doxygen  |  34 +++-
 2 files changed, 207 insertions(+), 106 deletions(-)

diff --git a/libstdc++-v3/doc/doxygen/stdheader.cc
b/libstdc++-v3/doc/doxygen/stdheader.cc
index 8bcb1a059f9..67f54eef02b 100644
--- a/libstdc++-v3/doc/doxygen/stdheader.cc
+++ b/libstdc++-v3/doc/doxygen/stdheader.cc
@@ -1,171 +1,256 @@
-// This is a slow larval-stage kludge to help massage the generated man
-// pages.  It's used like this:
-const char* const usage =
-"\nTakes on stdin, whitespace-separated words of the form\n"
-"\n"
-"[bits/]stl_foo.h\n"
-"[bits/]std_foo.h\n"
-"\n"
-"and writes on stdout the nearest matching standard header name.\n"
-"\n"
-"Takes no command-line arguments.\n"
-"\n";
-
-#include 
 #include 
+#include 
 #include 
+#include 
+#include 
+#include 
 #include 

-typedef std::map   Map;
+// This is a slow larval-stage kludge to help massage the generated man
+// pages.  It's used like this:
+const std::string_view usage = R"(
+Takes on stdin, whitespace-separated words of the form
+
+[bits/]stl_foo.h
+[bits/]std_foo.h
+
+and writes on stdout the nearest matching standard header name.

-Map  headers;
+Takes no command-line arguments.
+)";
+
+// List of standard headers
+std::set std_headers;
+// Map of partial header filenames to standard headers.
+std::map  headers;

 void init_map()
 {
 // Enter the glamourous world of data entry!!  Maintain these!
+// Because the map_header function removes common prefixes and
suffixes,
+// a header "bits/st[dl]_foo.h" will automatically map to "foo" if that
+// is a standard header, so we don't need to list those cases here.
+headers["atomic_base.h"]= "atomic";
+headers["atomic_lockfree_defines.h"] = "atomic";
+headers["atomic_timed_wait.h"]  = "atomic";
+headers["atomic_wait.h"]= "atomic";
+headers["algorithmfwd.h"]   = "algorithm";
 headers["algo.h"]   = "algorithm";
 headers["algobase.h"]   = "algorithm";
-headers["algorithm.h"]  = "algorithm";
+headers["ranges_algo.h"]= "algorithm";
+headers["ranges_algobase.h"]= "algorithm";
 headers["heap.h"]   = "algorithm";
-headers["bitset.h"] = "bitset";
-headers["complex.h"]= "complex";
-//headers["construct.h"]  stl_construct.h entirely internal
-headers["deque.h"]  = "deque";
-headers["deque.tcc"]= "deque";
-headers["fstream.h"]= "fstream";
-

[PATCH v2] c++: P2360R0: Extend init-stmt to allow alias-decl [PR102617]

2021-10-21 Thread Marek Polacek via Gcc-patches
On Thu, Oct 21, 2021 at 04:56:57PM -0400, Jason Merrill wrote:
> On 10/21/21 16:26, Marek Polacek wrote:
> > The following patch implements C++23 P2360R0.  This proposal merely
> > extends init-statement to contain alias-declaration.  init-statement
> > is used in if/for/switch.  The unsightly duplication of the new code
> > seems to be necessary to handle
> > 
> >for ( init-statement condition[opt] ; expression[opt] ) statement
> > 
> > as well as
> > 
> >for ( init-statement[opt] for-range-declaration : for-range-initializer 
> > ) statement
> 
> It seems like the duplication of the new code is a consequence of the
> duplication of the old code.  I'd think we could remove the duplication by
> remembering the result of cp_parser_range_based_for_with_init_p and then
> recursing at the end if it was true.  Or check it in cp_parser_for and call
> cp_parser_init_statement twice.

That works well, just had to move the pedwarn too.  dg.exp passes, full testing
running, OK if it passes?

-- >8 --
The following patch implements C++23 P2360R0.  This proposal merely
extends init-statement to contain alias-declaration.  init-statement
is used in if/for/switch.  It also removes the unsightly duplication
of code by calling cp_parser_init_statement twice.

PR c++/102617

gcc/cp/ChangeLog:

* parser.c (cp_parser_for): Maybe call cp_parser_init_statement
twice.  Warn about range-based for loops with initializer here.
(cp_parser_init_statement): Don't duplicate code.  Allow
alias-declaration in init-statement.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/init-stmt1.C: New test.
* g++.dg/cpp23/init-stmt2.C: New test.
---
 gcc/cp/parser.c | 70 ++---
 gcc/testsuite/g++.dg/cpp23/init-stmt1.C | 31 +++
 gcc/testsuite/g++.dg/cpp23/init-stmt2.C | 25 +
 3 files changed, 95 insertions(+), 31 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp23/init-stmt1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp23/init-stmt2.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 49d951cfb19..93335c817d7 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -12040,6 +12040,7 @@ cp_parser_handle_directive_omp_attributes (cp_parser 
*parser, tree *pattrs,
   init-statement:
 expression-statement
 simple-declaration
+alias-declaration
 
   TM Extension:
 
@@ -13327,6 +13328,23 @@ cp_parser_for (cp_parser *parser, bool ivdep, unsigned 
short unroll)
   /* Begin the for-statement.  */
   scope = begin_for_scope ();
 
+  /* Maybe parse the optional init-statement in a range-based for loop.  */
+  if (cp_parser_range_based_for_with_init_p (parser)
+  /* Checked for diagnostic purposes only.  */
+  && cp_lexer_next_token_is_not (parser->lexer, CPP_SEMICOLON))
+{
+  tree dummy;
+  cp_parser_init_statement (parser, );
+  if (cxx_dialect < cxx20)
+   {
+ pedwarn (cp_lexer_peek_token (parser->lexer)->location,
+  OPT_Wc__20_extensions,
+  "range-based % loops with initializer only "
+  "available with %<-std=c++20%> or %<-std=gnu++20%>");
+ decl = error_mark_node;
+   }
+}
+
   /* Parse the initialization.  */
   is_range_for = cp_parser_init_statement (parser, );
 
@@ -13987,12 +14005,13 @@ cp_parser_iteration_statement (cp_parser* parser, 
bool *if_p, bool ivdep,
   return statement;
 }
 
-/* Parse a init-statement or the declarator of a range-based-for.
+/* Parse an init-statement or the declarator of a range-based-for.
Returns true if a range-based-for declaration is seen.
 
init-statement:
  expression-statement
- simple-declaration  */
+ simple-declaration
+ alias-declaration  */
 
 static bool
 cp_parser_init_statement (cp_parser *parser, tree *decl)
@@ -14008,40 +14027,29 @@ cp_parser_init_statement (cp_parser *parser, tree 
*decl)
   bool is_range_for = false;
   bool saved_colon_corrects_to_scope_p = parser->colon_corrects_to_scope_p;
 
-  /* Try to parse the init-statement.  */
-  if (cp_parser_range_based_for_with_init_p (parser))
-   {
- tree dummy;
- cp_parser_parse_tentatively (parser);
- /* Parse the declaration.  */
- cp_parser_simple_declaration (parser,
-   /*function_definition_allowed_p=*/false,
-   );
- cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
- if (!cp_parser_parse_definitely (parser))
-   /* That didn't work, try to parse it as an expression-statement.  */
-   cp_parser_expression_statement (parser, NULL_TREE);
-
- if (cxx_dialect < cxx20)
-   {
- pedwarn (cp_lexer_peek_token (parser->lexer)->location,
-  OPT_Wc__20_extensions,
-  "range-based % loops with initializer only "
-  "available with %<-std=c++20%> or 

Fwd: [committed] libstdc++: Add Doxygen comments to contents of

2021-10-21 Thread Jonathan Wakely via Gcc-patches
I messed up the CC to gcc-patches for this ...

-- Forwarded message -
From: Jonathan Wakely via Libstdc++ 
Date: Thu, 21 Oct 2021 at 22:57
Subject: [committed] libstdc++: Add Doxygen comments to contents of

To: , 


Tested x86_64-linux, committed to trunk.

libstdc++-v3/ChangeLog:

* include/bits/mofunc_impl.h: Add doxygen comments.
* include/std/functional: Likewise.
---
 libstdc++-v3/include/bits/mofunc_impl.h | 37 ++
 libstdc++-v3/include/std/functional | 94 +
 2 files changed, 119 insertions(+), 12 deletions(-)

diff --git a/libstdc++-v3/include/bits/mofunc_impl.h
b/libstdc++-v3/include/bits/mofunc_impl.h
index 968d235f867..6cc9711a42b 100644
--- a/libstdc++-v3/include/bits/mofunc_impl.h
+++ b/libstdc++-v3/include/bits/mofunc_impl.h
@@ -44,6 +44,22 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION

+  /**
+   *  @brief Polymorphic function wrapper.
+   *  @ingroup functors
+   *  @since C++23
+   *  @headername functional
+   *
+   *  The `std::move_only_function` class template is a call wrapper
similar
+   *  to *  `std::function`, but does not require the stored target
function
+   *  to be copyable.
+   *
+   *  It also supports const-qualification, ref-qualification, and
+   *  no-throw guarantees. The qualifications and exception-specification
+   *  of the `move_only_function::operator()` member function are respected
+   *  when invoking the target function.
+   *
+   */
   template
 class move_only_function<_Res(_ArgTypes...) _GLIBCXX_MOF_CV
   _GLIBCXX_MOF_REF noexcept(_Noex)>
@@ -64,15 +80,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 public:
   using result_type = _Res;

+  /// Creates an empty object.
   move_only_function() noexcept { }

+  /// Creates an empty object.
   move_only_function(nullptr_t) noexcept { }

+  /// Moves the target object, leaving the source empty.
   move_only_function(move_only_function&& __x) noexcept
   : _Mofunc_base(static_cast<_Mofunc_base&&>(__x)),
_M_invoke(std::__exchange(__x._M_invoke, nullptr))
   { }

+  /// Stores a target object initialized from the argument.
   template>
requires (!is_same_v<_Vt, move_only_function>)
  && (!__is_in_place_type_v<_Vt>) && __is_callable_from<_Vt>
@@ -89,6 +109,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _M_invoke = &_S_invoke<_Vt>;
}

+  /// Stores a target object initialized from the arguments.
   template
requires is_constructible_v<_Tp, _Args...>
  && __is_callable_from<_Tp>
@@ -101,6 +122,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _M_init<_Tp>(std::forward<_Args>(__args)...);
}

+  /// Stores a target object initialized from the arguments.
   template
requires is_constructible_v<_Tp, initializer_list<_Up>&, _Args...>
  && __is_callable_from<_Tp>
@@ -114,6 +136,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _M_init<_Tp>(__il, std::forward<_Args>(__args)...);
}

+  /// Stores a new target object, leaving `x` empty.
   move_only_function&
   operator=(move_only_function&& __x) noexcept
   {
@@ -122,6 +145,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return *this;
   }

+  /// Destroys the target object (if any).
   move_only_function&
   operator=(nullptr_t) noexcept
   {
@@ -130,6 +154,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return *this;
   }

+  /// Stores a new target object, initialized from the argument.
   template
requires is_constructible_v
move_only_function&
@@ -142,8 +167,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION

   ~move_only_function() = default;

+  /// True if a target object is present, false otherwise.
   explicit operator bool() const noexcept { return _M_invoke !=
nullptr; }

+  /** Invoke the target object.
+   *
+   * The target object will be invoked using the supplied arguments,
+   * and as an lvalue or rvalue, and as const or non-const, as dictated
+   * by the template arguments of the `move_only_function`
specialization.
+   *
+   * @pre Must not be empty.
+   */
   _Res
   operator()(_ArgTypes... __args) _GLIBCXX_MOF_CV_REF noexcept(_Noex)
   {
@@ -151,6 +185,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return _M_invoke(this, std::forward<_ArgTypes>(__args)...);
   }

+  /// Exchange the target objects (if any).
   void
   swap(move_only_function& __x) noexcept
   {
@@ -158,10 +193,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::swap(_M_invoke, __x._M_invoke);
   }

+  /// Exchange the target objects (if any).
   friend void
   swap(move_only_function& __x, move_only_function& __y) noexcept
   { __x.swap(__y); }

+  /// Check for emptiness by comparing with `nullptr`.
   friend bool
   operator==(const move_only_function& __x, 

Fwd: [committed] libstdc++: Suppress Doxygen docs for more implementation details

2021-10-21 Thread Jonathan Wakely via Gcc-patches
I messed up the CC to gcc-patches for this ...

-- Forwarded message -
From: Jonathan Wakely via Libstdc++ 
Date: Thu, 21 Oct 2021 at 22:56
Subject: [committed] libstdc++: Suppress Doxygen docs for more
implementation details
To: , 


Tested x86_64-linux, committed to trunk.

libstdc++-v3/ChangeLog:

* include/bits/alloc_traits.h: Suppress doxygen documentation.
* include/bits/allocated_ptr.h: Likewise.
* include/bits/enable_special_members.h: Likewise.
* include/bits/hashtable.h: Likewise.
* include/bits/hashtable_policy.h: Likewise.
* include/bits/uses_allocator.h: Likewise.
* include/bits/node_handle.h: Document node handles and suppress
documentation for protected members.
* include/std/any: Suppress documentation for implementation
details.
---
 libstdc++-v3/include/bits/alloc_traits.h  |  4 
 libstdc++-v3/include/bits/allocated_ptr.h |  2 ++
 .../include/bits/enable_special_members.h |  2 ++
 libstdc++-v3/include/bits/hashtable.h |  2 ++
 libstdc++-v3/include/bits/hashtable_policy.h  |  2 ++
 libstdc++-v3/include/bits/node_handle.h   | 20 +++
 libstdc++-v3/include/bits/uses_allocator.h|  2 ++
 libstdc++-v3/include/std/any  |  8 +++-
 8 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/alloc_traits.h
b/libstdc++-v3/include/bits/alloc_traits.h
index 05b584f742f..602a90d4d8a 100644
--- a/libstdc++-v3/include/bits/alloc_traits.h
+++ b/libstdc++-v3/include/bits/alloc_traits.h
@@ -45,6 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #if __cplusplus >= 201103L
 #define __cpp_lib_allocator_traits_is_always_equal 201411

+  /// @cond undocumented
   struct __allocator_traits_base
   {
 template
@@ -77,10 +78,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 using __alloc_rebind
   = typename __allocator_traits_base::template __rebind<_Alloc,
_Up>::type;
+  /// @endcond

   /**
* @brief  Uniform interface to all allocator types.
+   * @headerfile memory
* @ingroup allocators
+   * @since C++11
   */
   template
 struct allocator_traits : __allocator_traits_base
diff --git a/libstdc++-v3/include/bits/allocated_ptr.h
b/libstdc++-v3/include/bits/allocated_ptr.h
index 340964eed15..695695f7d11 100644
--- a/libstdc++-v3/include/bits/allocated_ptr.h
+++ b/libstdc++-v3/include/bits/allocated_ptr.h
@@ -40,6 +40,7 @@
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
+/// @cond undocumented

   /// Non-standard RAII type for managing pointers obtained from
allocators.
   template
@@ -97,6 +98,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return { __a, std::allocator_traits<_Alloc>::allocate(__a, 1) };
 }

+/// @endcond
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std

diff --git a/libstdc++-v3/include/bits/enable_special_members.h
b/libstdc++-v3/include/bits/enable_special_members.h
index 8361a063b4d..ac59f72e328 100644
--- a/libstdc++-v3/include/bits/enable_special_members.h
+++ b/libstdc++-v3/include/bits/enable_special_members.h
@@ -37,6 +37,7 @@
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
+/// @cond undocumented

   struct _Enable_default_constructor_tag
   {
@@ -308,6 +309,7 @@ template
 operator=(_Enable_copy_move&&) noexcept =
delete;
   };

+/// @endcond
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std

diff --git a/libstdc++-v3/include/bits/hashtable.h
b/libstdc++-v3/include/bits/hashtable.h
index ff8af2201cd..25c45d3ba85 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -41,6 +41,7 @@
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
+/// @cond undocumented

   template
 using __cache_default
@@ -2546,6 +2547,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   = __enable_if_t,
__is_allocator<_Hash>>::value>;
 #endif

+/// @endcond
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std

diff --git a/libstdc++-v3/include/bits/hashtable_policy.h
b/libstdc++-v3/include/bits/hashtable_policy.h
index 994c7b61046..8c72043e368 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -38,6 +38,7 @@
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
+/// @cond undocumented

   template` type. You should always use the container's
+   * `node_handle` type (e.g. `std::set::node_handle`) to refer to
+   * these types, not the non-standard internal `_Node_handle` names.
+   *
+   * @{
+   */
+
   /// Base class for node handle types of maps and sets.
   template
 class _Node_handle_common
@@ -64,6 +79,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION

   [[nodiscard]] bool empty() const noexcept { return _M_ptr ==
nullptr; }

+/// @cond undocumented
 protected:
   constexpr _Node_handle_common() noexcept : _M_ptr() { }

@@ -214,6 +230,8 @@ 

[Bug bootstrap/102681] [12 Regression] AArch64 bootstrap failure

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102681

--- Comment #12 from Andrew Pinski  ---
So this is definitely a bad interaction between complete unrolling where we
had:
for (unsigned int i = 1; i < 2; i++)
  if (this->coeffs[1] != 0)
 return false;

And jump threading.

I am still reducing the testcase but at least I figured out this part of it.

[Bug libffi/102874] [12 regression] src/x86/win64.S doesn't assemble with Solaris as

2021-10-21 Thread hjl.tools at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102874

--- Comment #4 from H.J. Lu  ---
(In reply to r...@cebitec.uni-bielefeld.de from comment #3)
> > --- Comment #2 from H.J. Lu  ---
> > Does libffi 3.4.2 work on Solaris?  If yes, why doesn't it work in gcc?
> 
> It does when gcc is configured with gas, but doesn't when configured
> with /bin/as.
> 
> > If no, please fix the upstream and backport the fix.
> 
> Done: https://github.com/libffi/libffi/issues/665.
> 
> Depending on how this goes upstream, we may need a local fix: leaving
> gcc master broken on Solaris/x86 isn't an option!

The goal is to fix libffi in both upstream and GCC.  Please do

1. Create a pull request to fix libffi upstream.
2. Submit the proposed fix to GCC.
3. After the proposed is checked into GCC, add it to libffi/LOCAL_PATCHES
so that we keep it in GCC until it is fixed in upstream and synced.

If libffi upstream is fixed before the proposed fix is checked into GCC,
you can cherry-pick the fix from libffi upstream without touching
libffi/LOCAL_PATCHES.

[PATCH v2] c++tools: Fix memory leak

2021-10-21 Thread Jonathan Wakely via Gcc-patches
On Thu, 21 Oct 2021 at 20:38, Jason Merrill wrote:

> On 10/21/21 09:28, Jonathan Wakely wrote:
> >   #else
> > buffer = xmalloc (stat.st_size);
> > if (!buffer)
> >   return -errno;
> > +  struct Deleter { void operator()(void* p) const { free(p); } };
> > +  std::unique_ptr guard;
>
> Don't you need to initialize guard from buffer?
>

Oops, yes!  Updated patch attached.
commit b280f6b5b4339586446eec99e49074e091c27ea5
Author: Jonathan Wakely 
Date:   Thu Oct 21 22:32:23 2021

c++tools: Fix memory leak

The allocated memory is not freed when returning early due to an error.

c++tools/ChangeLog:

* resolver.cc (module_resolver::read_tuple_file): Use unique_ptr
to ensure memory is freed before returning.

diff --git a/c++tools/resolver.cc b/c++tools/resolver.cc
index 421fdaa55fe..a1837b3ee10 100644
--- a/c++tools/resolver.cc
+++ b/c++tools/resolver.cc
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "resolver.h"
 // C++
 #include 
+#include 
 // C
 #include 
 // OS
@@ -114,10 +115,17 @@ module_resolver::read_tuple_file (int fd, char const 
*prefix, bool force)
   buffer = mmap (nullptr, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
   if (buffer == MAP_FAILED)
 return -errno;
+  struct Deleter {
+void operator()(void* p) const { munmap(p, size); }
+size_t size;
+  };
+  std::unique_ptr guard(buffer, Deleter{(size_t)stat.st_size});
 #else
   buffer = xmalloc (stat.st_size);
   if (!buffer)
 return -errno;
+  struct Deleter { void operator()(void* p) const { free(p); } };
+  std::unique_ptr guard(buffer);
   if (read (fd, buffer, stat.st_size) != stat.st_size)
 return -errno;
 #endif
@@ -179,12 +187,6 @@ module_resolver::read_tuple_file (int fd, char const 
*prefix, bool force)
}
 }
 
-#if MAPPED_READING
-  munmap (buffer, stat.st_size);
-#else
-  free (buffer);
-#endif
-
   return 0;
 }
 


[Bug fortran/101337] gfortran doesn't diagnose all operands with constraint violations

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101337

--- Comment #1 from sandra at gcc dot gnu.org ---
This is likely a "won't fix" bug, but I'll leave it open for now.  The test
cases (now committed) are still XFAILed.

[Bug fortran/101334] gfortran fails to enforce C838 on disallowed uses of assumed-rank variable names + bogus errors

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101334

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|UNCONFIRMED |RESOLVED

--- Comment #3 from sandra at gcc dot gnu.org ---
This has been fixed.

[Bug fortran/54753] assumed-rank dummies: Reject assumed-size actuals in in some cases (C535c; in F2018: C839)

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54753

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #7 from sandra at gcc dot gnu.org ---
This has been fixed except for the problem with deallocation/initialization of
assumed-rank arrays now being tracked separately in PR102641.

[Bug libffi/102874] [12 regression] src/x86/win64.S doesn't assemble with Solaris as

2021-10-21 Thread ro at CeBiTec dot Uni-Bielefeld.DE via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102874

--- Comment #3 from ro at CeBiTec dot Uni-Bielefeld.DE  ---
> --- Comment #2 from H.J. Lu  ---
> Does libffi 3.4.2 work on Solaris?  If yes, why doesn't it work in gcc?

It does when gcc is configured with gas, but doesn't when configured
with /bin/as.

> If no, please fix the upstream and backport the fix.

Done: https://github.com/libffi/libffi/issues/665.

Depending on how this goes upstream, we may need a local fix: leaving
gcc master broken on Solaris/x86 isn't an option!

[Bug fortran/101333] gfortran fails to enforce C711 on assumed-type actual arguments

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101333

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from sandra at gcc dot gnu.org ---
This has been fixed.

[Bug fortran/101320] Bind(C): Missing diagnostic for constraint C1557 on allocatable/pointer arguments

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101320

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from sandra at gcc dot gnu.org ---
This has been fixed.

[Bug fortran/101319] Missing diagnostic for argument with type parameters for assumed-type dummy

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101319

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from sandra at gcc dot gnu.org ---
This has been fixed.

Re: [PATCH] c++: P2360R0: Extend init-stmt to allow alias-decl [PR102617]

2021-10-21 Thread Jason Merrill via Gcc-patches

On 10/21/21 16:26, Marek Polacek wrote:

The following patch implements C++23 P2360R0.  This proposal merely
extends init-statement to contain alias-declaration.  init-statement
is used in if/for/switch.  The unsightly duplication of the new code
seems to be necessary to handle

   for ( init-statement condition[opt] ; expression[opt] ) statement

as well as

   for ( init-statement[opt] for-range-declaration : for-range-initializer ) 
statement


It seems like the duplication of the new code is a consequence of the 
duplication of the old code.  I'd think we could remove the duplication 
by remembering the result of cp_parser_range_based_for_with_init_p and 
then recursing at the end if it was true.  Or check it in cp_parser_for 
and call cp_parser_init_statement twice.



Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/102617

gcc/cp/ChangeLog:

* parser.c (cp_parser_init_statement): Allow alias-declaration in
init-statement.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/init-stmt1.C: New test.
* g++.dg/cpp23/init-stmt2.C: New test.
---
  gcc/cp/parser.c | 52 +++--
  gcc/testsuite/g++.dg/cpp23/init-stmt1.C | 31 +++
  gcc/testsuite/g++.dg/cpp23/init-stmt2.C | 25 
  3 files changed, 96 insertions(+), 12 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp23/init-stmt1.C
  create mode 100644 gcc/testsuite/g++.dg/cpp23/init-stmt2.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 49d951cfb19..8ba5370740e 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -12040,6 +12040,7 @@ cp_parser_handle_directive_omp_attributes (cp_parser 
*parser, tree *pattrs,
init-statement:
  expression-statement
  simple-declaration
+alias-declaration
  
TM Extension:
  
@@ -13987,12 +13988,13 @@ cp_parser_iteration_statement (cp_parser* parser, bool *if_p, bool ivdep,

return statement;
  }
  
-/* Parse a init-statement or the declarator of a range-based-for.

+/* Parse an init-statement or the declarator of a range-based-for.
 Returns true if a range-based-for declaration is seen.
  
 init-statement:

   expression-statement
- simple-declaration  */
+ simple-declaration
+ alias-declaration  */
  
  static bool

  cp_parser_init_statement (cp_parser *parser, tree *decl)
@@ -14013,11 +14015,24 @@ cp_parser_init_statement (cp_parser *parser, tree 
*decl)
{
  tree dummy;
  cp_parser_parse_tentatively (parser);
- /* Parse the declaration.  */
- cp_parser_simple_declaration (parser,
-   /*function_definition_allowed_p=*/false,
-   );
- cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+ if (cp_lexer_next_token_is_keyword (parser->lexer, RID_USING))
+   {
+ cp_parser_alias_declaration (parser);
+ if (cxx_dialect < cxx23
+ && !cp_parser_uncommitted_to_tentative_parse_p (parser))
+   pedwarn (cp_lexer_peek_token (parser->lexer)->location,
+OPT_Wc__23_extensions,
+  "alias-declaration in init-statement only "
+  "available with %<-std=c++23%> or %<-std=gnu++23%>");
+   }
+ else
+   {
+ /* Parse the declaration.  */
+ cp_parser_simple_declaration (parser,
+   /*function_definition_allowed_p=*/
+   false, );
+ cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+   }
  if (!cp_parser_parse_definitely (parser))
/* That didn't work, try to parse it as an expression-statement.  */
cp_parser_expression_statement (parser, NULL_TREE);
@@ -14038,10 +14053,23 @@ cp_parser_init_statement (cp_parser *parser, tree 
*decl)
/* We're going to speculatively look for a declaration, falling back
 to an expression, if necessary.  */
cp_parser_parse_tentatively (parser);
-  /* Parse the declaration.  */
-  cp_parser_simple_declaration (parser,
-   /*function_definition_allowed_p=*/false,
-   decl);
+  bool expect_semicolon_p = true;
+  if (cp_lexer_next_token_is_keyword (parser->lexer, RID_USING))
+   {
+ cp_parser_alias_declaration (parser);
+ expect_semicolon_p = false;
+ if (cxx_dialect < cxx23
+ && !cp_parser_uncommitted_to_tentative_parse_p (parser))
+   pedwarn (cp_lexer_peek_token (parser->lexer)->location,
+OPT_Wc__23_extensions,
+"alias-declaration in init-statement only "
+"available with %<-std=c++23%> or %<-std=gnu++23%>");
+   }
+  else
+   /* Parse the declaration.  */
+   cp_parser_simple_declaration 

Re: [PATCH] Possible use before def in fortran/trans-decl.c.

2021-10-21 Thread Martin Sebor via Gcc-patches

On 10/21/21 1:02 PM, Andrew MacLeod via Gcc-patches wrote:
As I'm tweaking installing ranger as the VRP2 pass, I am getting a stage 
2 bootstrap failure now:


In file included from 
/opt/notnfs/amacleod/master/gcc/gcc/fortran/trans-decl.c:28:
/opt/notnfs/amacleod/master/gcc/gcc/tree.h: In function ‘void 
gfc_conv_cfi_to_gfc(stmtblock_t*, stmtblock_t*, tree, tree, gfc_symbol*)’:
/opt/notnfs/amacleod/master/gcc/gcc/tree.h:244:56: error: ‘rank’ may be 
used uninitialized in this function [-Werror=maybe-uninitialized]

   244 | #define TREE_CODE(NODE) ((enum tree_code) (NODE)->base.code)
   | ^~~~
/opt/notnfs/amacleod/master/gcc/gcc/fortran/trans-decl.c:6671:8: note: 
‘rank’ was declared here

  6671 |   tree rank, idx, etype, tmp, tmp2, size_var = NULL_TREE;
   |    ^~~~
cc1plus: all warnings being treated as errors
make[3]: *** [Makefile:1136: fortran/trans-decl.o] Error 1


looking at that function, in the middle I see:

   if (sym->as->rank < 0)
     {
   /* Set gfc->dtype.rank, if assumed-rank.  */
   rank = gfc_get_cfi_desc_rank (cfi);
   gfc_add_modify (, gfc_conv_descriptor_rank (gfc_desc), rank);
     }
   else if (!GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (gfc_desc)))
     /* In that case, the CFI rank and the declared rank can differ.  */
     rank = gfc_get_cfi_desc_rank (cfi);
   else
     rank = build_int_cst (signed_char_type_node, sym->as->rank);


so rank is set on all paths here.   However, stepping back a bit, 
earlier in the function I see:


   if (!sym->attr.dimension || !GFC_DESCRIPTOR_TYPE_P (TREE_TYPE 
(gfc_desc)))

     {
   tmp = gfc_get_cfi_desc_base_addr (cfi);
   gfc_add_modify (, gfc_desc,
   fold_convert (TREE_TYPE (gfc_desc), tmp));
   if (!sym->attr.dimension)
     goto done;
     }

The done: label occurs *after* that block of initialization code, and 
bit furtehr down , I see this:


       gfc_add_modify (_body, tmpidx, idx);
   stmtblock_t inner_loop;
   gfc_init_block (_loop);
   tree dim = gfc_create_var (TREE_TYPE (rank), "dim");

I cannot convince myself by looking at the intervening code that this 
can not be executed along this path.  Perhaps someone more familiar with 
the code can check it out.   However, It seems worthwhile to at least 
initialize rank to NULL_TREE, thus we can be more likely to see a trap 
if that path ever gets followed.


And it makes the warning go away :-)

OK?


Initializing variables on declaration is commonly recommended
as a best C/C++ etc. programming practice.  If it silences
a warning and makes the code more readable, who could possibly
say no? ;)



Andrew

PS as a side note, it would be handy to have the def point *and* the use 
point that might be undefined.   Its a big function and it took me a 
while just to see where a possible use might be.


The use point should be the what the warning points to.  In
the case above it's the result of macro expansion so it less
than helpfully points to the macro definition. I would expect
it to also point to its expansion like in other warnings.
Something must be interfering with it.  I've opened pr102887
to remind us to look into it.

Martin


[Bug tree-optimization/102888] New: missing case for combining / and % into one operation

2021-10-21 Thread vanyacpp at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102888

Bug ID: 102888
   Summary: missing case for combining / and % into one operation
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: vanyacpp at gmail dot com
  Target Milestone: ---

Normally GCC combines a/b and a%b into one operation when they are computed in
the same basic-block. The example below has two functions. For one GCC is able
to combine the operations and for other not (presumably because of complicated
control-flow). I believe the two functions are functionally equivalent. 

unsigned long long reduce(unsigned long long a, unsigned long long b)
{
while ((a % b) == 0)
a /= b;

return a;
}

unsigned long long reduce_opt(unsigned long long a, unsigned long long b)
{
for (;;)
{
unsigned long long quot = a / b;
unsigned long long rem = a % b;
if (rem != 0)
break;
a = quot;
}

return a;
}

reduce.L3:
mov rax, r8
xor edx, edx
div rsi
xor edx, edx
mov r8, rax
div rsi
testrdx, rdx
je  .L3

reduce_opt.L8:
xor edx, edx
mov r8, rax
div rsi
testrdx, rdx
je  .L8

https://godbolt.org/z/9dqs8avE5

It would be great if GCC generated the same code for both of these functions.

[Bug c++/96517] ICE in is_this_parameter when accessing constexpr method of a field inside coroutine lambda (with optimization)

2021-10-21 Thread iains at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96517

Iain Sandoe  changed:

   What|Removed |Added

 CC||lcw at fb dot com

--- Comment #1 from Iain Sandoe  ---
*** Bug 102878 has been marked as a duplicate of this bug. ***

[Bug c++/102878] Internal compiler error with coroutine calling constexpr function

2021-10-21 Thread iains at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102878

Iain Sandoe  changed:

   What|Removed |Added

 Resolution|--- |DUPLICATE
 Status|NEW |RESOLVED

--- Comment #2 from Iain Sandoe  ---
actually, I posted a patch for this today ...

*** This bug has been marked as a duplicate of bug 96517 ***

[Bug middle-end/102887] wrong warning location with macro expansion

2021-10-21 Thread msebor at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102887

Martin Sebor  changed:

   What|Removed |Added

   Keywords||diagnostic
   Last reconfirmed||2021-10-21
 Status|UNCONFIRMED |NEW
 Ever confirmed|0   |1
 Blocks||24639


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=24639
[Bug 24639] [meta-bug] bug to track all Wuninitialized issues

[Bug middle-end/102887] New: wrong warning location with macro expansion

2021-10-21 Thread msebor at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102887

Bug ID: 102887
   Summary: wrong warning location with macro expansion
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: middle-end
  Assignee: unassigned at gcc dot gnu.org
  Reporter: msebor at gcc dot gnu.org
  Target Milestone: ---

As noted here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-October/582316.html
in a -Wuninitialized warning for the use of a variable in the expansion of a
macro the warning points to the macro definition and not to the point of its
expansion as in other warnings such as -Warray-bounds.  The test case below
shows the difference.  Pointing also to the point of the macro's expansion is
important in large functions.

$ cat t.C && gcc -O2 -S -Wall -Wextra t.C
#define X(x) x ? x + 1 : 2   // warning here (not helpful)

int f (int i, int j)
{
  int x; // note here (good)
  if (i < 0)
x = i + 1;
  if (j < 0)
return X (x);
  return -1;
}

#define A(i) a[i]// note here (good)
extern int a[4]; // note here (good)

int g (int i)
{
  if (i < 0)
return A (7);// warning here (helpful)
  return -1;
}
t.C: In function ‘int f(int, int)’:
t.C:1:16: warning: ‘x’ may be used uninitialized in this function
[-Wmaybe-uninitialized]
1 | #define X(x) x ? x + 1 : 2   // warning here (not helpful)
  |^
t.C:5:7: note: ‘x’ was declared here
5 |   int x; // note here (good)
  |   ^
t.C: In function ‘int g(int)’:
t.C:13:17: warning: array subscript 7 is above array bounds of ‘int [4]’
[-Warray-bounds]
   13 | #define A(i) a[i]// note here (good)
  |  ~~~^
t.C:19:12: note: in expansion of macro ‘A’
   19 | return A (7);// warning here (helpful)
  |^
t.C:14:12: note: while referencing ‘a’
   14 | extern int a[4]; // note here (good)
  |^

[PATCH] c++: P2360R0: Extend init-stmt to allow alias-decl [PR102617]

2021-10-21 Thread Marek Polacek via Gcc-patches
The following patch implements C++23 P2360R0.  This proposal merely
extends init-statement to contain alias-declaration.  init-statement
is used in if/for/switch.  The unsightly duplication of the new code
seems to be necessary to handle

  for ( init-statement condition[opt] ; expression[opt] ) statement

as well as

  for ( init-statement[opt] for-range-declaration : for-range-initializer ) 
statement

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/102617

gcc/cp/ChangeLog:

* parser.c (cp_parser_init_statement): Allow alias-declaration in
init-statement.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/init-stmt1.C: New test.
* g++.dg/cpp23/init-stmt2.C: New test.
---
 gcc/cp/parser.c | 52 +++--
 gcc/testsuite/g++.dg/cpp23/init-stmt1.C | 31 +++
 gcc/testsuite/g++.dg/cpp23/init-stmt2.C | 25 
 3 files changed, 96 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp23/init-stmt1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp23/init-stmt2.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 49d951cfb19..8ba5370740e 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -12040,6 +12040,7 @@ cp_parser_handle_directive_omp_attributes (cp_parser 
*parser, tree *pattrs,
   init-statement:
 expression-statement
 simple-declaration
+alias-declaration
 
   TM Extension:
 
@@ -13987,12 +13988,13 @@ cp_parser_iteration_statement (cp_parser* parser, 
bool *if_p, bool ivdep,
   return statement;
 }
 
-/* Parse a init-statement or the declarator of a range-based-for.
+/* Parse an init-statement or the declarator of a range-based-for.
Returns true if a range-based-for declaration is seen.
 
init-statement:
  expression-statement
- simple-declaration  */
+ simple-declaration
+ alias-declaration  */
 
 static bool
 cp_parser_init_statement (cp_parser *parser, tree *decl)
@@ -14013,11 +14015,24 @@ cp_parser_init_statement (cp_parser *parser, tree 
*decl)
{
  tree dummy;
  cp_parser_parse_tentatively (parser);
- /* Parse the declaration.  */
- cp_parser_simple_declaration (parser,
-   /*function_definition_allowed_p=*/false,
-   );
- cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+ if (cp_lexer_next_token_is_keyword (parser->lexer, RID_USING))
+   {
+ cp_parser_alias_declaration (parser);
+ if (cxx_dialect < cxx23
+ && !cp_parser_uncommitted_to_tentative_parse_p (parser))
+   pedwarn (cp_lexer_peek_token (parser->lexer)->location,
+OPT_Wc__23_extensions,
+  "alias-declaration in init-statement only "
+  "available with %<-std=c++23%> or %<-std=gnu++23%>");
+   }
+ else
+   {
+ /* Parse the declaration.  */
+ cp_parser_simple_declaration (parser,
+   /*function_definition_allowed_p=*/
+   false, );
+ cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+   }
  if (!cp_parser_parse_definitely (parser))
/* That didn't work, try to parse it as an expression-statement.  */
cp_parser_expression_statement (parser, NULL_TREE);
@@ -14038,10 +14053,23 @@ cp_parser_init_statement (cp_parser *parser, tree 
*decl)
   /* We're going to speculatively look for a declaration, falling back
 to an expression, if necessary.  */
   cp_parser_parse_tentatively (parser);
-  /* Parse the declaration.  */
-  cp_parser_simple_declaration (parser,
-   /*function_definition_allowed_p=*/false,
-   decl);
+  bool expect_semicolon_p = true;
+  if (cp_lexer_next_token_is_keyword (parser->lexer, RID_USING))
+   {
+ cp_parser_alias_declaration (parser);
+ expect_semicolon_p = false;
+ if (cxx_dialect < cxx23
+ && !cp_parser_uncommitted_to_tentative_parse_p (parser))
+   pedwarn (cp_lexer_peek_token (parser->lexer)->location,
+OPT_Wc__23_extensions,
+"alias-declaration in init-statement only "
+"available with %<-std=c++23%> or %<-std=gnu++23%>");
+   }
+  else
+   /* Parse the declaration.  */
+   cp_parser_simple_declaration (parser,
+ /*function_definition_allowed_p=*/false,
+ decl);
   parser->colon_corrects_to_scope_p = saved_colon_corrects_to_scope_p;
   if (cp_lexer_next_token_is (parser->lexer, CPP_COLON))
{
@@ -14054,7 +14082,7 @@ cp_parser_init_statement (cp_parser *parser, tree *decl)
 "range-based % loops 

[Bug tree-optimization/102879] [12 Regression] Dead Code Elimination Regression at -O3

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102879

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |12.0

Re: how does vrp2 rearrange this?

2021-10-21 Thread Andrew Pinski via Gcc-patches
On Thu, Oct 21, 2021 at 8:04 AM Andrew MacLeod  wrote:
>
> On 10/19/21 7:13 PM, Andrew Pinski wrote:
> > On Tue, Oct 19, 2021 at 3:32 PM Andrew MacLeod  wrote:
> >> On 10/19/21 5:13 PM, Andrew Pinski wrote:
> >>> On Tue, Oct 19, 2021 at 1:29 PM Andrew MacLeod via Gcc-patches
> >>>  wrote:
>  using testcase ifcvt-4.c:
> 
> 
>  typedef int word __attribute__((mode(word)));
> 
>  word
>  foo (word x, word y, word a)
>  {
>   word i = x;
>   word j = y;
>   /* Try to make taking the branch likely.  */
>   __builtin_expect (x > y, 1);
>   if (x > y)
> {
>   i = a;
>   j = i;
> }
>   return i * j;
> 
> >
> > The testcase is broken anyways.
> > The builtin_expect should be inside the if to have any effect.  Look
> > at the estimated values:
> > if (x_3(D) > y_4(D))
> >   goto ; [50.00%]<<-- has been reversed.
> > else
> >   goto ; [50.00%]
> > ;;succ:   4 [50.0% (guessed)]  count:536870912 (estimated
> > locally) (TRUE_VALUE,EXECUTABLE)
> > ;;3 [50.0% (guessed)]  count:536870912 (estimated
> > locally) (FALSE_VALUE,EXECUTABLE)
> >
> > See how it is 50/50?
> > The testcase is not even testing what it says it is testing.  Just
> > happened to work previously does not mean anything.  Move the
> > builtin_expect inside the if and try again. I am shocked it took this
> > long to find the testcase issue really.
> >
> > Thanks,
> > Andrew Pinski
> >
> Moving the expect around doesn't change anything, in fact, it makes it
> worse since fre and evrp immediately eliminate it as true if it is in
> the THEN block.

I think you misunderstood the change I was saying to do.
Try this:
typedef int word __attribute__((mode(word)));

word
foo (word x, word y, word a)
{
  word i = x;
  word j = y;
  /* Try to make taking the branch likely.  */
  if (__builtin_expect (x > y, 1))
{
  i = a;
  j = i;
}
  return i * j;
}
/* { dg-final { scan-rtl-dump "2 true changes made" "ce1" } } */

This should fix the "estimated values" to be more correct.

Thanks,
Andrew Pinski

>
> It looks like it is eliminated by the CDDCE pass:
>
> cddce1 sees:
>
>_1 = x_5(D) > y_7(D);
># RANGE [0, 1] NONZERO 1
>_2 = (long int) _1;
>__builtin_expect (_2, 1);
>if (x_5(D) > y_7(D))
>  goto ; [INV]
>else
>  goto ; [INV]
>
> and proceeds:
>
> Marking useful stmt: if (x_5(D) > y_7(D))
> processing: if (x_5(D) > y_7(D))
> processing: i_3 = PHI 
>
> Eliminating unnecessary statements:
> Deleting : __builtin_expect (_2, 1);
> Deleting : _2 = (long int) _1;
> Deleting : _1 = x_5(D) > y_7(D);
>
> IF we are suppose to reverse the If, it is not obvious to me who is
> suppose to..   You seem to be right that its a crap shot that VRP2 does
> it because there isnt enough info to dictate it.. unless somewhere it
> detects that a THEN targets an empty block which fallthrus to the ELSE
> block should be swapped.   Or maybe you are right and that it  flukeily
> happens due to the ASSERTS being added and removed.
>
> IF i turn of DCE, then this all works like it si ssupopse to.. so maybe
> DCE isnt supopse to remove this?
>
> Andrew
>


PING [PATCH v4 0/2] Implement indirect external access

2021-10-21 Thread H.J. Lu via Gcc-patches
On Wed, Sep 22, 2021 at 7:02 PM H.J. Lu  wrote:
>
> Changes in the v4 patch.
>
> 1. Add nodirect_extern_access attribute.
>
> Changes in the v3 patch.
>
> 1. GNU_PROPERTY_1_NEEDED_INDIRECT_EXTERN_ACCESS support has been added to
> GNU binutils 2.38.  But the -z indirect-extern-access linker option is
> only available for Linux/x86.  However, the --max-cache-size=SIZE linker
> option was also addded within a day.  --max-cache-size=SIZE is used to
> check for GNU_PROPERTY_1_NEEDED_INDIRECT_EXTERN_ACCESS support.
>
> Changes in the v2 patch.
>
> 1. Rename the option to -fdirect-extern-access.
>
> ---
> On systems with copy relocation:
> * A copy in executable is created for the definition in a shared library
> at run-time by ld.so.
> * The copy is referenced by executable and shared libraries.
> * Executable can access the copy directly.
>
> Issues are:
> * Overhead of a copy, time and space, may be visible at run-time.
> * Read-only data in the shared library becomes read-write copy in
> executable at run-time.
> * Local access to data with the STV_PROTECTED visibility in the shared
> library must use GOT.
>
> On systems without function descriptor, function pointers vary depending
> on where and how the functions are defined.
> * If the function is defined in executable, it can be the address of
> function body.
> * If the function, including the function with STV_PROTECTED visibility,
> is defined in the shared library, it can be the address of the PLT entry
> in executable or shared library.
>
> Issues are:
> * The address of function body may not be used as its function pointer.
> * ld.so needs to search loaded shared libraries for the function pointer
> of the function with STV_PROTECTED visibility.
>
> Here is a proposal to remove copy relocation and use canonical function
> pointer:
>
> 1. Accesses, including in PIE and non-PIE, to undefined symbols must
> use GOT.
>   a. Linker may optimize out GOT access if the data is defined in PIE or
>   non-PIE.
> 2. Read-only data in the shared library remain read-only at run-time
> 3. Address of global data with the STV_PROTECTED visibility in the shared
> library is the address of data body.
>   a. Can use IP-relative access.
>   b. May need GOT without IP-relative access.
> 4. For systems without function descriptor,
>   a. All global function pointers of undefined functions in PIE and
>   non-PIE must use GOT.  Linker may optimize out GOT access if the
>   function is defined in PIE or non-PIE.
>   b. Function pointer of functions with the STV_PROTECTED visibility in
>   executable and shared library is the address of function body.
>i. Can use IP-relative access.
>ii. May need GOT without IP-relative access.
>iii. Branches to undefined functions may use PLT.
> 5. Single global definition marker:
>
> Add GNU_PROPERTY_1_NEEDED:
>
> #define GNU_PROPERTY_1_NEEDED GNU_PROPERTY_UINT32_OR_LO
>
> to indicate the needed properties by the object file.
>
> Add GNU_PROPERTY_1_NEEDED_INDIRECT_EXTERN_ACCESS:
>
> #define GNU_PROPERTY_1_NEEDED_INDIRECT_EXTERN_ACCESS (1U << 0)
>
> to indicate that the object file requires canonical function pointers and
> cannot be used with copy relocation.  This bit should be cleared in
> executable when there are non-GOT or non-PLT relocations in relocatable
> input files without this bit set.
>
>   a. Protected symbol access within the shared library can be treated as
>   local.
>   b. Copy relocation should be disallowed at link-time and run-time.
>   c. GOT function pointer reference is required at link-time and run-time.
>
> The indirect external access marker can be used in the following ways:
>
> 1. Linker can decide the best way to resolve a relocation against a
> protected symbol before seeing all relocations against the symbol.
> 2. Dynamic linker can decide if it is an error to have a copy relocation
> in executable against the protected symbol in a shared library by checking
> if the shared library is built with -fno-direct-extern-access.
>
> Add a compiler option, -fdirect-extern-access. -fdirect-extern-access is
> the default.  With -fno-direct-extern-access:
>
> 1. Always to use GOT to access undefined symbols, including in PIE and
> non-PIE.  This is safe to do and does not break the ABI.
> 2. In executable and shared library, for symbols with the STV_PROTECTED
> visibility:
>   a. The address of data symbol is the address of data body.
>   b. For systems without function descriptor, the function pointer is
>   the address of function body.
> These break the ABI and resulting shared libraries may not be compatible
> with executables which are not compiled with -fno-direct-extern-access.
> 3. Generate an indirect external access marker in relocatable objects if
> supported by linker.
>
> H.J. Lu (2):
>   Add -f[no-]direct-extern-access
>   Add TARGET_ASM_EMIT_GNU_PROPERTY_NOTE
>

Hi,

This has been implemented in binutils 2.38 and glibc 2.35.
What do I need to do to get it into GCC 12?

Thanks.

-- 
H.J.


[Bug bootstrap/102681] [12 Regression] AArch64 bootstrap failure

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102681

--- Comment #11 from Andrew Pinski  ---
Good news I can reproduce the warning with the preprocessed source on a native
x86_64-linux-gnu trunk GCC.

Re: [PATCH] c++tools: Fix memory leak

2021-10-21 Thread Jason Merrill via Gcc-patches

On 10/21/21 09:28, Jonathan Wakely wrote:

The allocated memory is not freed when returning early due to an error.

c++tools/ChangeLog:

* resolver.cc (module_resolver::read_tuple_file): Use unique_ptr
to ensure memory is freed before returning.
---
  c++tools/resolver.cc | 14 --
  1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/c++tools/resolver.cc b/c++tools/resolver.cc
index 421fdaa55fe..d1b73a47778 100644
--- a/c++tools/resolver.cc
+++ b/c++tools/resolver.cc
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "resolver.h"
  // C++
  #include 
+#include 
  // C
  #include 
  // OS
@@ -114,10 +115,17 @@ module_resolver::read_tuple_file (int fd, char const 
*prefix, bool force)
buffer = mmap (nullptr, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (buffer == MAP_FAILED)
  return -errno;
+  struct Deleter {
+void operator()(void* p) const { munmap(p, size); }
+size_t size;
+  };
+  std::unique_ptr guard(buffer, Deleter{(size_t)stat.st_size});
  #else
buffer = xmalloc (stat.st_size);
if (!buffer)
  return -errno;
+  struct Deleter { void operator()(void* p) const { free(p); } };
+  std::unique_ptr guard;


Don't you need to initialize guard from buffer?


if (read (fd, buffer, stat.st_size) != stat.st_size)
  return -errno;
  #endif
@@ -179,12 +187,6 @@ module_resolver::read_tuple_file (int fd, char const 
*prefix, bool force)
}
  }
  
-#if MAPPED_READING

-  munmap (buffer, stat.st_size);
-#else
-  free (buffer);
-#endif
-
return 0;
  }
  





Re: [PATCH] libcody: Avoid double-free

2021-10-21 Thread Jason Merrill via Gcc-patches

On 10/21/21 09:27, Jonathan Wakely wrote:

If the listen call fails then 'goto fail' will jump to that label and
use freeaddrinfo again. Set the pointer to null to prevent that.


OK.


libcody/ChangeLog:

* netserver.cc (ListenInet6): Set pointer to null after
deallocation.
---
  libcody/netserver.cc | 1 +
  1 file changed, 1 insertion(+)

diff --git a/libcody/netserver.cc b/libcody/netserver.cc
index 30202c5106a..0499b5790b4 100644
--- a/libcody/netserver.cc
+++ b/libcody/netserver.cc
@@ -140,6 +140,7 @@ int ListenInet6 (char const **e, char const *name, int 
port, unsigned backlog)
  
   listen:;

freeaddrinfo (addrs);
+  addrs = nullptr;
  
if (listen (fd, backlog ? backlog : 17) < 0)

  {





[Bug c++/102876] GCC fails to use constant initialization even when it knows the value to initialize

2021-10-21 Thread jason at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102876

--- Comment #7 from Jason Merrill  ---
But yes, the implicit constexpr patch I've been working on would likely improve
this as well.

[Bug target/98667] gcc generates endbr32 invalid opcode on -march=i486

2021-10-21 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98667

--- Comment #19 from CVS Commits  ---
The releases/gcc-9 branch has been updated by H.J. Lu :

https://gcc.gnu.org/g:5ed78f8bd84eb696579d928c816bc840664829b2

commit r9-9792-g5ed78f8bd84eb696579d928c816bc840664829b2
Author: H.J. Lu 
Date:   Thu Oct 21 09:45:14 2021 -0700

x86: Document -fcf-protection requires i686 or newer

PR target/98667
* doc/invoke.texi: Document -fcf-protection requires i686 or
new.

(cherry picked from commit 1373066a46d8d47abd97e46a005aef3b3dbfe94a)

[Bug target/98667] gcc generates endbr32 invalid opcode on -march=i486

2021-10-21 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98667

--- Comment #18 from CVS Commits  ---
The releases/gcc-10 branch has been updated by H.J. Lu :

https://gcc.gnu.org/g:523dc71f5cb858da18e1f648269746dab519b445

commit r10-10228-g523dc71f5cb858da18e1f648269746dab519b445
Author: H.J. Lu 
Date:   Thu Oct 21 09:45:14 2021 -0700

x86: Document -fcf-protection requires i686 or newer

PR target/98667
* doc/invoke.texi: Document -fcf-protection requires i686 or
new.

(cherry picked from commit 1373066a46d8d47abd97e46a005aef3b3dbfe94a)

[Bug target/98667] gcc generates endbr32 invalid opcode on -march=i486

2021-10-21 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98667

--- Comment #17 from CVS Commits  ---
The releases/gcc-11 branch has been updated by H.J. Lu :

https://gcc.gnu.org/g:e74336df42fa36244d576dd155d7e2e2c42bc3a0

commit r11-9179-ge74336df42fa36244d576dd155d7e2e2c42bc3a0
Author: H.J. Lu 
Date:   Thu Oct 21 09:45:14 2021 -0700

x86: Document -fcf-protection requires i686 or newer

PR target/98667
* doc/invoke.texi: Document -fcf-protection requires i686 or
new.

(cherry picked from commit 1373066a46d8d47abd97e46a005aef3b3dbfe94a)

[Bug c/102875] __builtin_strncpy output may be truncated copying bytes from a string of length

2021-10-21 Thread johnnymarler at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102875

Jonathan Marler  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #2 from Jonathan Marler  ---
I confirmed this is fixed in 10.3.0.

[Bug target/98667] gcc generates endbr32 invalid opcode on -march=i486

2021-10-21 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98667

--- Comment #16 from CVS Commits  ---
The master branch has been updated by H.J. Lu :

https://gcc.gnu.org/g:1373066a46d8d47abd97e46a005aef3b3dbfe94a

commit r12-4619-g1373066a46d8d47abd97e46a005aef3b3dbfe94a
Author: H.J. Lu 
Date:   Thu Oct 21 09:45:14 2021 -0700

x86: Document -fcf-protection requires i686 or newer

PR target/98667
* doc/invoke.texi: Document -fcf-protection requires i686 or
new.

Re: [PATCH] x86: Document -fcf-protection requires i686 or newer

2021-10-21 Thread H.J. Lu via Gcc-patches
On Thu, Oct 21, 2021 at 12:04 PM Uros Bizjak  wrote:
>
> On Thu, Oct 21, 2021 at 6:47 PM H.J. Lu  wrote:
> >
> > PR target/98667
> > * doc/invoke.texi: Document -fcf-protection requires i686 or
> > new.
>
> Obvious patch?

I am checking it in and backporting it to release branches.

Thanks.

> Uros.
>
> > ---
> >  gcc/doc/invoke.texi | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index c66a25fcd69..71992b8c597 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -15542,7 +15542,8 @@ which functions and calls should be skipped from 
> > instrumentation
> >  (@pxref{Function Attributes}).
> >
> >  Currently the x86 GNU/Linux target provides an implementation based
> > -on Intel Control-flow Enforcement Technology (CET).
> > +on Intel Control-flow Enforcement Technology (CET) which works for
> > +i686 processor or newer.
> >
> >  @item -fstack-protector
> >  @opindex fstack-protector
> > --
> > 2.32.0
> >



-- 
H.J.


[Bug fortran/101304] Bind(C): CONTIGUOUS attribute not handled correctly in Fortran routines called from C with discontiguous argument

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101304

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED

--- Comment #1 from sandra at gcc dot gnu.org ---
Although it wasn't explicitly tagged with this issue, Tobias's recent big patch
to re-implement CFI <-> C descriptor conversions (commit
64f9623765da3306b0ab6a47997dc5d62c2ea261) fixed the testcases mentioned here
and removed the XFAILs on them, so I am marking this fixed.

Re: [PATCH] x86: Document -fcf-protection requires i686 or newer

2021-10-21 Thread Uros Bizjak via Gcc-patches
On Thu, Oct 21, 2021 at 6:47 PM H.J. Lu  wrote:
>
> PR target/98667
> * doc/invoke.texi: Document -fcf-protection requires i686 or
> new.

Obvious patch?

Uros.

> ---
>  gcc/doc/invoke.texi | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index c66a25fcd69..71992b8c597 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -15542,7 +15542,8 @@ which functions and calls should be skipped from 
> instrumentation
>  (@pxref{Function Attributes}).
>
>  Currently the x86 GNU/Linux target provides an implementation based
> -on Intel Control-flow Enforcement Technology (CET).
> +on Intel Control-flow Enforcement Technology (CET) which works for
> +i686 processor or newer.
>
>  @item -fstack-protector
>  @opindex fstack-protector
> --
> 2.32.0
>


[PATCH] Possible use before def in fortran/trans-decl.c.

2021-10-21 Thread Andrew MacLeod via Gcc-patches
As I'm tweaking installing ranger as the VRP2 pass, I am getting a stage 
2 bootstrap failure now:


In file included from 
/opt/notnfs/amacleod/master/gcc/gcc/fortran/trans-decl.c:28:
/opt/notnfs/amacleod/master/gcc/gcc/tree.h: In function ‘void 
gfc_conv_cfi_to_gfc(stmtblock_t*, stmtblock_t*, tree, tree, gfc_symbol*)’:
/opt/notnfs/amacleod/master/gcc/gcc/tree.h:244:56: error: ‘rank’ may be 
used uninitialized in this function [-Werror=maybe-uninitialized]

  244 | #define TREE_CODE(NODE) ((enum tree_code) (NODE)->base.code)
  | ^~~~
/opt/notnfs/amacleod/master/gcc/gcc/fortran/trans-decl.c:6671:8: note: 
‘rank’ was declared here

 6671 |   tree rank, idx, etype, tmp, tmp2, size_var = NULL_TREE;
  |    ^~~~
cc1plus: all warnings being treated as errors
make[3]: *** [Makefile:1136: fortran/trans-decl.o] Error 1


looking at that function, in the middle I see:

  if (sym->as->rank < 0)
    {
  /* Set gfc->dtype.rank, if assumed-rank.  */
  rank = gfc_get_cfi_desc_rank (cfi);
  gfc_add_modify (, gfc_conv_descriptor_rank (gfc_desc), rank);
    }
  else if (!GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (gfc_desc)))
    /* In that case, the CFI rank and the declared rank can differ.  */
    rank = gfc_get_cfi_desc_rank (cfi);
  else
    rank = build_int_cst (signed_char_type_node, sym->as->rank);


so rank is set on all paths here.   However, stepping back a bit, 
earlier in the function I see:


  if (!sym->attr.dimension || !GFC_DESCRIPTOR_TYPE_P (TREE_TYPE 
(gfc_desc)))

    {
  tmp = gfc_get_cfi_desc_base_addr (cfi);
  gfc_add_modify (, gfc_desc,
  fold_convert (TREE_TYPE (gfc_desc), tmp));
  if (!sym->attr.dimension)
    goto done;
    }

The done: label occurs *after* that block of initialization code, and 
bit furtehr down , I see this:


      gfc_add_modify (_body, tmpidx, idx);
  stmtblock_t inner_loop;
  gfc_init_block (_loop);
  tree dim = gfc_create_var (TREE_TYPE (rank), "dim");

I cannot convince myself by looking at the intervening code that this 
can not be executed along this path.  Perhaps someone more familiar with 
the code can check it out.   However, It seems worthwhile to at least 
initialize rank to NULL_TREE, thus we can be more likely to see a trap 
if that path ever gets followed.


And it makes the warning go away :-)

OK?

Andrew

PS as a side note, it would be handy to have the def point *and* the use 
point that might be undefined.   Its a big function and it took me a 
while just to see where a possible use might be.





commit ed571a93c54e3967fbf445624e47817be5e333ed
Author: Andrew MacLeod 
Date:   Thu Oct 21 14:48:20 2021 -0400

Initialize variable.

gcc/fortran/
* trans-decl.c (gfc_conv_cfi_to_gfc): Initialize rank to NULL_TREE.

diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index de624c82fcf..fe5511b5285 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -6668,7 +6668,7 @@ gfc_conv_cfi_to_gfc (stmtblock_t *init, stmtblock_t *finally,
   stmtblock_t block;
   gfc_init_block ();
   tree cfi = build_fold_indirect_ref_loc (input_location, cfi_desc);
-  tree rank, idx, etype, tmp, tmp2, size_var = NULL_TREE;
+  tree idx, etype, tmp, tmp2, size_var = NULL_TREE, rank = NULL_TREE;
   bool do_copy_inout = false;
 
   /* When allocatable + intent out, free the cfi descriptor.  */


Re: [PATCH] x86: Adjust gcc.target/i386/pr22076.c

2021-10-21 Thread Uros Bizjak via Gcc-patches
On Thu, Oct 21, 2021 at 6:50 PM H.J. Lu  wrote:
>
> On Tue, Oct 19, 2021 at 11:42 PM Uros Bizjak  wrote:
> >
> > On Tue, Oct 19, 2021 at 8:23 PM H.J. Lu  wrote:
> > >
> > > commit 247c407c83f0015f4b92d5f71e45b63192f6757e
> > > Author: Roger Sayle 
> > > Date:   Mon Oct 18 12:15:40 2021 +0100
> > >
> > > Try placing RTL folded constants in the constant pool.
> > >
> > > My recent attempts to come up with a testcase for my patch to evaluate
> > > ss_plus in simplify-rtx.c, identified a missed optimization 
> > > opportunity
> > > (that's potentially a long-time regression): The RTL optimizers no 
> > > longer
> > > place constants in the constant pool.
> > >
> > > changed -m32 codegen from
> > >
> > > movq.LC1, %mm0
> > > paddb   .LC0, %mm0
> > > movq%mm0, x
> > > ret
> > >
> > > to
> > >
> > > movl$807671820, %eax
> > > movl$1616136252, %edx
> > > movl%eax, x
> > > movl%edx, x+4
> > > ret
> > >
> > > and -m64 codegen from
> > >
> > > movq.LC1(%rip), %mm0
> > > paddb   .LC0(%rip), %mm0
> > > movq%xmm0, x(%rip)
> > > ret
> > >
> > > to
> > >
> > > movq.LC2(%rip), %rax
> > > movq%rax, x(%rip)
> > > ret
> > >
> > > Adjust pr22076.c to check that MMX register isn't used since avoiding
> > > MMX register isn't a bad thing.
> > >
> > > PR testsuite/102840
> > > * gcc.target/i386/pr22076.c: Updated to check that MMX register
> > > isn't used.
> >
> > The compiler is now able to evaluate the result at the compile time
> > and it optimizes the test accordingly. Let's provide some MMX
> > instruction that is implemented with UNSPEC, so the compiler won't be
> > able to outsmart us.
> >
> > Something like the attached patch.
> >
> > Uros.
>
> Works for me.

Committed with the following ChangeLog:

testsuite: Adjust pr22076.c to avoid compile-time optimization [PR102840]

2021-10-21  Uroš Bizjak  

PR testsuite/102840

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr22076.c: Adjust to avoid compile time optimization.

Uros.


[Bug rtl-optimization/102840] [12 Regression] gcc.target/i386/pr22076.c by r12-4475

2021-10-21 Thread ubizjak at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102840

Uroš Bizjak  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #6 from Uroš Bizjak  ---
Fixed.

[Bug rtl-optimization/102840] [12 Regression] gcc.target/i386/pr22076.c by r12-4475

2021-10-21 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102840

--- Comment #5 from CVS Commits  ---
The master branch has been updated by Uros Bizjak :

https://gcc.gnu.org/g:6aceeb3fb64b0e82fc3301026669062797ec01a5

commit r12-4618-g6aceeb3fb64b0e82fc3301026669062797ec01a5
Author: Uros Bizjak 
Date:   Thu Oct 21 20:57:38 2021 +0200

testsuite: Adjust pr22076.c to avoid compile-time optimization [PR102840]

2021-10-21  Uroš Bizjak  

PR testsuite/102840

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr22076.c: Adjust to avoid compile time
optimization.

[Bug bootstrap/102681] [12 Regression] AArch64 bootstrap failure

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102681

--- Comment #10 from Andrew Pinski  ---
Hmm, somehow unroll messes up the relationship ...

[Bug bootstrap/102681] [12 Regression] AArch64 bootstrap failure

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102681

--- Comment #9 from Andrew Pinski  ---
So in uninit1 we have:
  if (_6691 != 0)
goto ; [5.50%]
  else
goto ; [94.50%]

   [local count: 17344687]:
  goto ; [100.00%]

   [local count: 298013267]:

   [local count: 315357954]:
  # const_upper_3854 = PHI <_6687(87), 18446744073709551615(287)>
  # _870 = PHI <1(87), 0(287)>
(lots of stuff)

  if (_6691 != 0)
goto ; [5.50%]
  else
goto ; [94.50%]

   [local count: 298013268]:
  goto ; [100.00%]

   [local count: 17344687]:

   [local count: 315357954]:
  # const_upper_3931 = PHI 
  if (_870 != 0)
goto ; [50.00%]
  else
goto ; [50.00%]

   [local count: 157678977]:
  if (const_upper_3931 > _6695)
goto ; [89.00%]
  else
goto ; [11.00%]

But _870 is _6691 == 0 but that relationship is totally missed and there is
full on jump threading miss in the above IR.

[Bug fortran/92621] Problems with memory handling with allocatable intent(out) arrays with bind(c)

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92621

--- Comment #21 from sandra at gcc dot gnu.org ---
Tobias, did your big patch fully fix this issue so that we can close it?

[Bug c++/102876] GCC fails to use constant initialization even when it knows the value to initialize

2021-10-21 Thread jason at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102876

--- Comment #6 from Jason Merrill  ---
It's not clear to me that this optimization should use the constexpr machinery;
as I commented on bug 4131.  If optimization turns the initialization of a
static variable into a simple matter of storing a constant value, it should go
one step further and turn that constant value into a constant initializer.

[Bug bootstrap/102681] [12 Regression] AArch64 bootstrap failure

2021-10-21 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102681

--- Comment #8 from Andrew Pinski  ---
Created attachment 51648
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51648=edit
preprocessed source

unreduced preprocessed source which fails still as of r12-4600.
 -fno-PIE -c   -g -O2 -fno-checking -gtoggle -DIN_GCC -fno-exceptions
-fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings
-Wcast-qual -Wno-error=format-diag -Wmissing-format-attribute
-Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros
-Wno-overlength-strings -Werror -fno-common

Re: [PATCH] Convert strlen pass from evrp to ranger.

2021-10-21 Thread Jeff Law via Gcc-patches




On 10/21/2021 1:42 AM, Aldy Hernandez wrote:

Massaging the IL should only take two forms IIRC.

First, if we have a simplification we can do.  That could be const/copy
propagation, replacing an expression with an SSA_NAME or constant and
the like.  It doesn't massage the IL just to massage the IL.

Second, we do temporarily copy propagate the current known values of an
SSA name into use points and then see if that allows us to determine if
a statement is already in the hash tables.  But we undo that so that
nobody should see that temporary change in state.

Finally, it does create some expressions & statements on the fly to
enter them into the tables.  For example, if it sees a store, it'll
create a load with the source & dest interchanged and enter that into
the expression table.  But none of this stuff ever shows up in the IL.
It's just to create entries in the expression tables.

So ITSM the only real concern would be if those temporary const/copy
propagations were still in the IL and we called back into Ranger and it
poked at that data somehow.

Hmmm, this is all very good insight.  Thanks.

One thing that would have to be adjusted then is remove the
enable_ranger() call in the patch.  This sets a global ranger, and
there are users of get_range_query() that will use it to get on-demand
ranges.  One such use that I added was ssa_name_has_boolean_range in
tree-ssa-dom.c.  This means that if the IL has been temporarily
changed, this function can and will use the global ranger.  The
alternative here would be to just create a new local ranger:

-  gimple_ranger *ranger = enable_ranger (fun);
+  gimple_ranger *ranger = new gimple_ranger;

and then obviously deallocate it at the disable_ranger call site.

This will cause any users of get_range_query() in the compiler to just
use global ranges.  Hopefully, none of these downstream users of
get_range_query() from DOM need context sensitive results.
ssa_name_has_boolean_range??

I think what you'd need to do is check that there are no calls to the
ranger from cprop_into_stmt (?? this is the place where IL changes??),
until wherever the undoing happens (I couldn't find it).  I see a call
to simplify_using_ranges in optimize_stmt that looks like it could be
called with the IL in mid-flight.  Maybe this call needs to happen
before the IL is altered?


So if we're referring to those temporary const/copy propagations
"escaping" into Ranger, then I would fully expect that to cause
problems.  Essentially they're path sensitive const/copy propagations
and may not be valid on all the paths through the CFG to the statement
where the propagation occurs

Yeah.  disabling the global ranger should help, plus making sure you
don't use the ranger in the midst of the path sensitive changes.
I think we should first try to remove those temporary const/copy 
propagations.  As I noted in a different follow-up, I can't remember if 
they were done as part of the original non-copying threader or if they 
enabled further optimizations in the copying threader.  If its the 
former, then they can go away and that would be my preference. I'll try 
to poke at that over the weekend.


jeff


Re: [PATCH] Handle jobserver file descriptors in btest.

2021-10-21 Thread Ian Lance Taylor via Gcc-patches
On Thu, Oct 21, 2021 at 12:48 AM Martin Liška  wrote:
>
> The patch is about sensitive handling of file descriptors opened
> by make's jobserver.

Thanks.  I think a better approach would be, at the start of main,
fstat the descriptors up to 10 and record the ones for which fstat
succeeds.  Then at the end of main only check the descriptors for
which fstat failed earlier.

I can work on that at some point if you don't want to tackle it.

Ian


[r12-4601 Regression] FAIL: gcc.target/i386/avx512fp16-13.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*%ymm[0-9]+[^\n]*\\) 1 on Linux/x86_64

2021-10-21 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

c8a889fc0e115d40a2d02f32842655f3eadc8fa1 is the first bad commit
commit c8a889fc0e115d40a2d02f32842655f3eadc8fa1
Author: Hongyu Wang 
Date:   Wed Oct 20 13:13:39 2021 +0800

i386: Fix wrong codegen for V8HF move without TARGET_AVX512F

caused

FAIL: gcc.target/i386/avx512fp16-13.c scan-assembler-times vmovdqa64[ 
\\t]+[^{\n]*%xmm[0-9]+[^\n]*\\) 1
FAIL: gcc.target/i386/avx512fp16-13.c scan-assembler-times vmovdqa64[ 
\\t]+[^{\n]*%ymm[0-9]+[^\n]*\\) 1

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r12-4601/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx512fp16-13.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx512fp16-13.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx512fp16-13.c 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/avx512fp16-13.c 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


[Bug fortran/94022] Array slices of assumed-size arrays

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94022
Bug 94022 depends on bug 94070, which changed state.

Bug 94070 Summary: Assumed-rank arrays – bounds mishandled, 
SIZE/SHAPE/UBOUND/LBOUND
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94070

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug fortran/94070] Assumed-rank arrays – bounds mishandled, SIZE/SHAPE/UBOUND/LBOUND

2021-10-21 Thread sandra at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94070

sandra at gcc dot gnu.org changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #13 from sandra at gcc dot gnu.org ---
I believe this issue is fixed now.  The original test case at the top of the
issue has been committed (now named assumed_rank_22), and my last commit added
a set of tests for the thing that was still triggering the ICE plus some more
for bind(c).

Re: [PATCH] Try to resolve paths in threader without looking further back.

2021-10-21 Thread Jeff Law via Gcc-patches




On 10/21/2021 1:17 AM, Aldy Hernandez wrote:

On Wed, Oct 20, 2021 at 10:01 PM Jeff Law  wrote:



On 10/20/2021 9:15 AM, Aldy Hernandez wrote:

On Wed, Oct 20, 2021 at 4:35 PM Martin Sebor  wrote:


I appreciate the heads up.  I'm happy that the threader has
improved.  I'm obviously not pleased that it has led to regressions
in warnings but I understand that in some cases they might be due
to limitations in the warning code.  I think the test case you have
xfailed might be one such example.  The uninitialized warnings are
exquisitely sensitive to these types of changes.  If/when this patch
is applied please reopen PR 89230 and reference this commit.

Having said that, to maintain the quality of diagnostics,
the work that goes into these nice optimizer improvements needs
to be balanced by an effort to either update the warning code
to cope with the IL changes, or the optimizers need to take care
to avoid exposing undefined code that the warnings are designed
to detect.  I'm concerned not just that the quality of GCC 12
diagnostics has been eroding, but also that it seems to be not
just acceptable but expected.

You make a very good point.  It is certainly not my intention to make
life difficult for the warning maintainers, but I'm afraid I don't
have sufficient knowledge in the area to improve them.

There may be some low hanging fruit though.  At least in the warnings
that use the ranger, there's no reason to run these passes so late in
the pipeline.  You could run the warning code as early as you want,
insofar as SSA is available and the CFG has been built.  Heck, you may
even be able to run at -O0, though we may need some sort of value
numbering.  I believe Richi even suggested this a while back.

Running them later in the pipeline is to take advantage of the
optimizers removing dead and unreachable code as much as possible. In
fact, that's critical to -Wuninitialized.  Optimizing away unreachable
paths  to avoid Wuninitialized false positives has been the major driver
of jump threading improvements for the last 15 years.

Ughh, that's unfortunate.  We're gonna have to come up with
improvements to the Wuninitialized code, or a different paradigm
altogether.  I'm afraid this will only get worse.
Well, good luck with a different paradigm :-)  It's a tough little nut.  
As long as we want to reduce false positives, then we're going to be 
dependent on optimization and related analysis.


And as I've noted before, we're generally better off fixing the 
optimizers when we stumble over a false positive from Wuninitialized.  
When that's not possible, we should look to fix the predicate analysis 
code.




It is a bit ironic that jump threading helps reduce Wuninitialized
false positives, but yet too much of it causes even more false
positives.
I would expect the latter to be relatively rare for Wuninitialized. I 
think some of the other middle end warnings may be in a different boat 
though.




jeff


[Bug rtl-optimization/102842] [10 Regression] ICE in cselib_record_set at -O2 or greater

2021-10-21 Thread vmakarov at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102842

--- Comment #12 from Vladimir Makarov  ---
The patch just hid the bug.  I believe the bug is still present on the trunk
too.

The insn in question is

(insn 26 64 109 3 (parallel [
(set (reg:SI 134 [ _12 ])
(plus:SI (mult:SI (reg:SI 117 [ _8 ])
(reg:SI 128))
(reg:SI 138)))
(set (reg:SI 135 [ _12+4 ])
(plus:SI (truncate:SI (lshiftrt:DI (plus:DI (mult:DI
(zero_extend:DI (reg:SI 117 [ _8 ]))
(zero_extend:DI (reg:SI 128)))
(zero_extend:DI (reg:SI 138)))
(const_int 32 [0x20])))
(reg:SI 138)))
]) "a.cpp":15:32 70 {umlal}
 (expr_list:REG_DEAD (reg:SI 138)
(expr_list:REG_DEAD (reg:SI 128)
(nil

And its definition is 

(define_insn "mlal"
  [(set (match_operand:SI 0 "s_register_operand" "=r,")
(plus:SI
 (mult:SI
  (match_operand:SI 4 "s_register_operand" "%r,r")
  (match_operand:SI 5 "s_register_operand" "r,r"))
 (match_operand:SI 1 "s_register_operand" "0,0")))
   (set (match_operand:SI 2 "s_register_operand" "=r,")
(plus:SI
 (truncate:SI
  (lshiftrt:DI
   (plus:DI
(mult:DI (SE:DI (match_dup 4)) (SE:DI (match_dup 5)))
(zero_extend:DI (match_dup 1)))
   (const_int 32)))
 (match_operand:SI 3 "s_register_operand" "2,2")))]
  "TARGET_32BIT"
  "mlal%?\\t%0, %2, %4, %5"
  [(set_attr "type" "umlal")
   (set_attr "predicable" "yes")
   (set_attr "arch" "v6,nov6")]

After couple of LRA constraints and assignment sub-passes, the two output
operands get the same hard reg.  And this results in cse abort in post-reload
pass.

The issue is that reload pseudos for pseudos 134 and 135 get the same value as
they both are matched with different occurrences of pseudo 138 in the insn.

The bug is in a very sensitive LRA code area and fixing it will take some time.
 But I hope I'll have a fix at the end of next week.

[Bug testsuite/102886] New: [12 regression] gcc.dg/tree-ssa/sra-18.c fails starting with r12-4607

2021-10-21 Thread seurer at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102886

Bug ID: 102886
   Summary: [12 regression] gcc.dg/tree-ssa/sra-18.c fails
starting with r12-4607
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: testsuite
  Assignee: unassigned at gcc dot gnu.org
  Reporter: seurer at gcc dot gnu.org
  Target Milestone: ---

g:701ee067807b80957c65bd7ff94b6099a27181de, r12-4607

make  -k check-gcc RUNTESTFLAGS="tree-ssa.exp=gcc.dg/tree-ssa/sra-18.c"
FAIL: gcc.dg/tree-ssa/sra-18.c scan-tree-dump-times esra "Removing load: a =
\\*.?L.?C.?.?.?0;" 1
FAIL: gcc.dg/tree-ssa/sra-18.c scan-tree-dump-times esra "SR[.$][0-9_]+ =
\\*.?L.?C.?.?.?0\\.b\\[0\\]\\.f\\[0\\]\\.x" 1
FAIL: gcc.dg/tree-ssa/sra-18.c scan-tree-dump-times esra "SR[.$][0-9_]+ =
\\*.?L.?C.?.?.?0\\.b\\[0\\]\\.f\\[1\\]\\.x" 1
FAIL: gcc.dg/tree-ssa/sra-18.c scan-tree-dump-times esra "SR[.$][0-9_]+ =
\\*.?L.?C.?.?.?0\\.b\\[1\\]\\.f\\[0\\]\\.x" 1
FAIL: gcc.dg/tree-ssa/sra-18.c scan-tree-dump-times esra "SR[.$][0-9_]+ =
\\*.?L.?C.?.?.?0\\.b\\[1\\]\\.f\\[1\\]\\.x" 1
# of expected passes2
# of unexpected failures5


commit 701ee067807b80957c65bd7ff94b6099a27181de (HEAD, refs/bisect/bad)
Author: Martin Jambor 
Date:   Thu Oct 21 14:26:45 2021 +0200

sra: Fix corner case of total scalarization with virtual inheritance (PR
102505)

[PATCH] rs6000: Add Power10 optimization for most _mm_movemask*

2021-10-21 Thread Paul A. Clarke via Gcc-patches
Power10 ISA added `vextract*` instructions which are realized in the
`vec_extractm` instrinsic.

Use `vec_extractm` for `_mm_movemask_ps`, `_mm_movemask_pd`, and
`_mm_movemask_epi8` compatibility intrinsics, when `_ARCH_PWR10`.

2021-10-21  Paul A. Clarke  

gcc
* config/rs6000/xmmintrin.h (_mm_movemask_ps): Use vec_extractm
when _ARCH_PWR10.
* config/rs6000/emmintrin.h (_mm_movemask_pd): Likewise.
(_mm_movemask_epi8): Likewise.
---
Tested on Power10 powerpc64le-linux (compiled with and without
`-mcpu=power10`).

OK for trunk?

 gcc/config/rs6000/emmintrin.h | 8 
 gcc/config/rs6000/xmmintrin.h | 4 
 2 files changed, 12 insertions(+)

diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
index 32ad72b4cc35..ab16c13c379e 100644
--- a/gcc/config/rs6000/emmintrin.h
+++ b/gcc/config/rs6000/emmintrin.h
@@ -1233,6 +1233,9 @@ _mm_loadl_pd (__m128d __A, double const *__B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_movemask_pd (__m128d  __A)
 {
+#ifdef _ARCH_PWR10
+  return vec_extractm ((__v2du) __A);
+#else
   __vector unsigned long long result;
   static const __vector unsigned int perm_mask =
 {
@@ -1252,6 +1255,7 @@ _mm_movemask_pd (__m128d  __A)
 #else
   return result[0];
 #endif
+#endif /* !_ARCH_PWR10 */
 }
 #endif /* _ARCH_PWR8 */
 
@@ -2030,6 +2034,9 @@ _mm_min_epu8 (__m128i __A, __m128i __B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_movemask_epi8 (__m128i __A)
 {
+#ifdef _ARCH_PWR10
+  return vec_extractm ((__v16qu) __A);
+#else
   __vector unsigned long long result;
   static const __vector unsigned char perm_mask =
 {
@@ -2046,6 +2053,7 @@ _mm_movemask_epi8 (__m128i __A)
 #else
   return result[0];
 #endif
+#endif /* !_ARCH_PWR10 */
 }
 #endif /* _ARCH_PWR8 */
 
diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
index ae1a33e8d95b..4c093fd1d5ae 100644
--- a/gcc/config/rs6000/xmmintrin.h
+++ b/gcc/config/rs6000/xmmintrin.h
@@ -1352,6 +1352,9 @@ _mm_storel_pi (__m64 *__P, __m128 __A)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_movemask_ps (__m128  __A)
 {
+#ifdef _ARCH_PWR10
+  return vec_extractm ((vector unsigned int) __A);
+#else
   __vector unsigned long long result;
   static const __vector unsigned int perm_mask =
 {
@@ -1371,6 +1374,7 @@ _mm_movemask_ps (__m128  __A)
 #else
   return result[0];
 #endif
+#endif /* !_ARCH_PWR10 */
 }
 #endif /* _ARCH_PWR8 */
 
-- 
2.27.0



Re: [PATH][_GLIBCXX_DEBUG] Fix unordered container merge

2021-10-21 Thread Jonathan Wakely via Gcc-patches
On Thu, 21 Oct 2021 at 17:52, François Dumont  wrote:

> I eventually would like to propose a different approach.
>
> I am adding a hook in normal implementation to let the _GLIBCXX_DEBUG code
> know when a node is being extracted. This way invalidation is only done by
> comparing nodes, no need to compute hash code for this operation.
>

Ugh, this is horrible, I don't like the normal mode depending on the debug
mode (even if it's just having to add hooks like this).

The previous patch seemed fine to me. Already an improvement on what is on
trunk now.


Re: [PATH][_GLIBCXX_DEBUG] Fix unordered container merge

2021-10-21 Thread François Dumont via Gcc-patches

I eventually would like to propose a different approach.

I am adding a hook in normal implementation to let the _GLIBCXX_DEBUG 
code know when a node is being extracted. This way invalidation is only 
done by comparing nodes, no need to compute hash code for this operation.


The only drawback is that for each extraction we have a linear research 
on iterators to invalidate the correct one. I will implement next an 
optimization when hasher/equal_to are noexcept.


This patch also remove the invalid noexcept qualification on the 
_Hashtable merge methods and make use of const_iterator as it is what is 
expected by the extract.


Tested under Linux x86_64.

Ok to commit ?

François


On 16/10/21 4:52 pm, Jonathan Wakely wrote:



On Sat, 16 Oct 2021, 14:49 François Dumont via Libstdc++, 
mailto:libstdc%2b...@gcc.gnu.org>> wrote:


Hi

 Here is the new proposal. My only concern is that we are also
using
hash or equal_to functors in the guard destructor.



Can we catch any exception there, invalidate all iterators, and not 
rethrow the exception?



 I am going to enhance merge normal implementation to make use of
the cached hash code when hash functors are the same between the
source
and destination of nodes. Maybe I'll be able to make use of it in
Debug
implementation too.

François


On 14/10/21 10:23 am, Jonathan Wakely wrote:
> On Wed, 13 Oct 2021 at 18:10, François Dumont via Libstdc++
> mailto:libstdc%2b...@gcc.gnu.org>> wrote:
>> Hi
>>
>>       libstdc++: [_GLIBCXX_DEBUG] Implement unordered container
merge
>>
>>       The _GLIBCXX_DEBUG unordered containers need a dedicated
merge
>> implementation
>>       so that any existing iterator on the transfered nodes is
properly
>> invalidated.
>>
>>       Add typedef/using declaration for everything used as-is
from normal
>> implementation.
>>
>>       libstdc++-v3/ChangeLog:
>>
>>               * include/debug/safe_container.h
(_Safe_container<>): Make
>> all methods
>>               protected.
>>               * include/debug/safe_unordered_container.h
>>  (_Safe_unordered_container<>::_M_invalide_all): Make public.
>>  (_Safe_unordered_container<>::_M_invalide_if): Likewise.
>> (_Safe_unordered_container<>::_M_invalide_local_if): Likewise.
>>               * include/debug/unordered_map
>>  (unordered_map<>::mapped_type, pointer, const_pointer): New
>> typedef.
>>               (unordered_map<>::reference, const_reference,
>> difference_type): New typedef.
>>  (unordered_map<>::get_allocator, empty, size, max_size):
>> Add usings.
>>  (unordered_map<>::bucket_count, max_bucket_count, bucket):
>> Add usings.
>>  (unordered_map<>::hash_function, key_equal, count,
>> contains): Add usings.
>>               (unordered_map<>::operator[], at, rehash,
reserve): Add usings.
>>               (unordered_map<>::merge): New.
>>  (unordered_multimap<>::mapped_type, pointer,
>> const_pointer): New typedef.
>>  (unordered_multimap<>::reference, const_reference,
>> difference_type): New typedef.
>>  (unordered_multimap<>::get_allocator, empty, size,
>> max_size): Add usings.
>>  (unordered_multimap<>::bucket_count, max_bucket_count,
>> bucket): Add usings.
>>  (unordered_multimap<>::hash_function, key_equal, count,
>> contains): Add usings.
>>  (unordered_multimap<>::rehash, reserve): Add usings.
>>  (unordered_multimap<>::merge): New.
>>               * include/debug/unordered_set
>>  (unordered_set<>::mapped_type, pointer, const_pointer): New
>> typedef.
>>               (unordered_set<>::reference, const_reference,
>> difference_type): New typedef.
>>  (unordered_set<>::get_allocator, empty, size, max_size):
>> Add usings.
>>  (unordered_set<>::bucket_count, max_bucket_count, bucket):
>> Add usings.
>>  (unordered_set<>::hash_function, key_equal, count,
>> contains): Add usings.
>>               (unordered_set<>::rehash, reserve): Add usings.
>>               (unordered_set<>::merge): New.
>>  (unordered_multiset<>::mapped_type, pointer,
>> const_pointer): New typedef.
>>  (unordered_multiset<>::reference, const_reference,
>> difference_type): New typedef.
>>  (unordered_multiset<>::get_allocator, empty, size,
>> max_size): Add usings.
>>  (unordered_multiset<>::bucket_count, max_bucket_count,
>> bucket): Add usings.
>>  (unordered_multiset<>::hash_function, key_equal, count,
>> contains): Add usings.
>>  (unordered_multiset<>::rehash, reserve): Add usings.
>>  (unordered_multiset<>::merge): New.
>>               *
>> testsuite/23_containers/unordered_map/debug/merge1_neg.cc: New
test.
>>               *
>> testsuite/23_containers/unordered_map/debug/merge2_neg.cc: New
test.
 

[RFC PATCH v2 1/1] [ARM] Add support for TLS register based stack protector canary access

2021-10-21 Thread Ard Biesheuvel via Gcc-patches
Add support for accessing the stack canary value via the TLS register,
so that multiple threads running in the same address space can use
distinct canary values. This is intended for the Linux kernel running in
SMP mode, where processes entering the kernel are essentially threads
running the same program concurrently: using a global variable for the
canary in that context is problematic because it can never be rotated,
and so the OS is forced to use the same value as long as it remains up.

Using the TLS register to index the stack canary helps with this, as it
allows each CPU to context switch the TLS register along with the rest
of the process, permitting each process to use its own value for the
stack canary.

2021-10-21 Ard Biesheuvel 

* config/arm/arm-opts.h (enum stack_protector_guard): New
* config/arm/arm-protos.h (arm_stack_protect_tls_canary_mem):
New
* config/arm/arm.c (TARGET_STACK_PROTECT_GUARD): Define
(arm_option_override_internal): Handle and put in error checks
for stack protector guard options.
(arm_option_reconfigure_globals): Likewise
(arm_stack_protect_tls_canary_mem): New
(arm_stack_protect_guard): New
* config/arm/arm.md (stack_protect_set): New
(stack_protect_set_tls): Likewise
(stack_protect_test): Likewise
(stack_protect_test_tls): Likewise
* config/arm/arm.opt (-mstack-protector-guard): New
(-mstack-protector-guard-offset): New.

Signed-off-by: Ard Biesheuvel 
---
 gcc/config/arm/arm-opts.h   |  6 ++
 gcc/config/arm/arm-protos.h |  2 +
 gcc/config/arm/arm.c| 52 
 gcc/config/arm/arm.md   | 62 +++-
 gcc/config/arm/arm.opt  | 22 +++
 gcc/doc/invoke.texi |  9 +++
 6 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h
index 5c4b62f404f7..581ba3c4fbbb 100644
--- a/gcc/config/arm/arm-opts.h
+++ b/gcc/config/arm/arm-opts.h
@@ -69,4 +69,10 @@ enum arm_tls_type {
   TLS_GNU,
   TLS_GNU2
 };
+
+/* Where to get the canary for the stack protector.  */
+enum stack_protector_guard {
+  SSP_TLSREG,  /* per-thread canary in TLS register */
+  SSP_GLOBAL   /* global canary */
+};
 #endif
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 9b1f61394ad7..37e80256a78d 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -195,6 +195,8 @@ extern void arm_split_atomic_op (enum rtx_code, rtx, rtx, 
rtx, rtx, rtx, rtx);
 extern rtx arm_load_tp (rtx);
 extern bool arm_coproc_builtin_available (enum unspecv);
 extern bool arm_coproc_ldc_stc_legitimate_address (rtx);
+extern rtx arm_stack_protect_tls_canary_mem (void);
+
 
 #if defined TREE_CODE
 extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index c4ff06b087eb..0bf06e764dbb 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -829,6 +829,9 @@ static const struct attribute_spec arm_attribute_table[] =
 
 #undef TARGET_MD_ASM_ADJUST
 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
+
+#undef TARGET_STACK_PROTECT_GUARD
+#define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
 
 /* Obstack for minipool constant handling.  */
 static struct obstack minipool_obstack;
@@ -3155,6 +3158,26 @@ arm_option_override_internal (struct gcc_options *opts,
   if (TARGET_THUMB2_P (opts->x_target_flags))
 opts->x_inline_asm_unified = true;
 
+  if (arm_stack_protector_guard == SSP_GLOBAL
+  && opts->x_arm_stack_protector_guard_offset_str)
+{
+  error ("incompatible options %'-mstack-protector-guard=global%' and"
+"%'-mstack-protector-guard-offset=%qs%'",
+arm_stack_protector_guard_offset_str);
+}
+
+  if (opts->x_arm_stack_protector_guard_offset_str)
+{
+  char *end;
+  const char *str = arm_stack_protector_guard_offset_str;
+  errno = 0;
+  long offs = strtol (arm_stack_protector_guard_offset_str, , 0);
+  if (!*str || *end || errno)
+   error ("%qs is not a valid offset in %qs", str,
+  "-mstack-protector-guard-offset=");
+  arm_stack_protector_guard_offset = offs;
+}
+
 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
 #endif
@@ -3822,6 +3845,10 @@ arm_option_reconfigure_globals (void)
   else
target_thread_pointer = TP_SOFT;
 }
+
+  if (arm_stack_protector_guard == SSP_TLSREG
+  && target_thread_pointer != TP_CP15)
+error("%'-mstack-protector-guard=tls%' needs a hardware TLS register");
 }
 
 /* Perform some validation between the desired architecture and the rest of the
@@ -8087,6 +8114,19 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx 
reg, rtx pic_reg,
 }
 
 
+rtx
+arm_stack_protect_tls_canary_mem (void)
+{
+  rtx tp = gen_reg_rtx (SImode);
+  emit_insn (gen_load_tp_hard (tp));
+
+  rtx reg 

[RFC PATCH v2 0/1] implement TLS register based stack canary for ARM

2021-10-21 Thread Ard Biesheuvel via Gcc-patches
Bugzilla: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102352

In the Linux kernel, user processes calling into the kernel are
essentially threads running in the same address space, of a program that
never terminates. This means that using a global variable for the stack
protector canary value is problematic on SMP systems, as we can never
change it unless we reboot the system. (Processes that sleep for any
reason will do so on a call into the kernel, which means that there will
always be live kernel stack frames carrying copies of the canary taken
when the function was entered)

AArch64 implements -mstack-protector-guard=sysreg for this purpose, as
this permits the kernel to use different memory addresses for the stack
canary for each CPU, and context switch the chosen system register with
the rest of the process, allowing each process to use its own unique
value for the stack canary.

This patch implements something similar, but for the 32-bit ARM kernel,
which will start using the user space TLS register TPIDRURO to index
per-process metadata while running in the kernel. This means we can just
add an offset to TPIDRURO to obtain the address from which to load the
canary value.

As for the spilling issues that have been fixed in this code in the
past: I suppose a register carrying the TLS register value will never
get spilled to begin with?

Comments/suggestions welcome.

Cc: Keith Packard 
Cc: thomas.preudho...@celest.fr
Cc: adhemerval.zane...@linaro.org
Cc: Qing Zhao 
Cc: Richard Sandiford 
Cc: gcc-patches@gcc.gnu.org

Ard Biesheuvel (1):
  [ARM] Add support for TLS register based stack protector canary access

 gcc/config/arm/arm-opts.h   |  6 ++
 gcc/config/arm/arm-protos.h |  2 +
 gcc/config/arm/arm.c| 52 
 gcc/config/arm/arm.md   | 62 +++-
 gcc/config/arm/arm.opt  | 22 +++
 gcc/doc/invoke.texi |  9 +++
 6 files changed, 151 insertions(+), 2 deletions(-)

-- 
2.30.2

$ cat|arm-linux-gnueabihf-gcc -march=armv7-a -mstack-protector-guard=tls 
-mstack-protector-guard-offset=1296 -mtp=cp15 -S -o - -xc - 
-fstack-protector-all -O3

int foo(void *);
int bar(void)
{

return foo(__builtin_thread_pointer()) + 1;
}

.arch armv7-a
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file   ""
.text
.align  2
.global bar
.syntax unified
.arm
.type   bar, %function
bar:
@ args = 0, pretend = 0, frame = 8
@ frame_needed = 0, uses_anonymous_args = 0
push{r4, lr}
mrc p15, 0, r4, c13, c0, 3  @ load_tp_hard
mov r0, r4
sub sp, sp, #8
ldr r3, [r4, #1296]
str r3, [sp, #4]
mov r3, #0
bl  foo
ldr r2, [sp, #4]
ldr r3, [r4, #1296]
eorsr3, r2, r3
mov r2, #0
bne .L5
add r0, r0, #1
add sp, sp, #8
@ sp needed
pop {r4, pc}
.L5:
bl  __stack_chk_fail
.size   bar, .-bar
.ident  "GCC: (GNU) 12.0.0 20211021 (experimental)"
.section.note.GNU-stack,"",%progbits



Re: [PATCH] x86: Adjust gcc.target/i386/pr22076.c

2021-10-21 Thread H.J. Lu via Gcc-patches
On Tue, Oct 19, 2021 at 11:42 PM Uros Bizjak  wrote:
>
> On Tue, Oct 19, 2021 at 8:23 PM H.J. Lu  wrote:
> >
> > commit 247c407c83f0015f4b92d5f71e45b63192f6757e
> > Author: Roger Sayle 
> > Date:   Mon Oct 18 12:15:40 2021 +0100
> >
> > Try placing RTL folded constants in the constant pool.
> >
> > My recent attempts to come up with a testcase for my patch to evaluate
> > ss_plus in simplify-rtx.c, identified a missed optimization opportunity
> > (that's potentially a long-time regression): The RTL optimizers no 
> > longer
> > place constants in the constant pool.
> >
> > changed -m32 codegen from
> >
> > movq.LC1, %mm0
> > paddb   .LC0, %mm0
> > movq%mm0, x
> > ret
> >
> > to
> >
> > movl$807671820, %eax
> > movl$1616136252, %edx
> > movl%eax, x
> > movl%edx, x+4
> > ret
> >
> > and -m64 codegen from
> >
> > movq.LC1(%rip), %mm0
> > paddb   .LC0(%rip), %mm0
> > movq%xmm0, x(%rip)
> > ret
> >
> > to
> >
> > movq.LC2(%rip), %rax
> > movq%rax, x(%rip)
> > ret
> >
> > Adjust pr22076.c to check that MMX register isn't used since avoiding
> > MMX register isn't a bad thing.
> >
> > PR testsuite/102840
> > * gcc.target/i386/pr22076.c: Updated to check that MMX register
> > isn't used.
>
> The compiler is now able to evaluate the result at the compile time
> and it optimizes the test accordingly. Let's provide some MMX
> instruction that is implemented with UNSPEC, so the compiler won't be
> able to outsmart us.
>
> Something like the attached patch.
>
> Uros.

Works for me.

Thanks.

-- 
H.J.


[Bug testsuite/102859] [OpenMP] Missing testsuite coverage for Fortran task reductions

2021-10-21 Thread dominiq at lps dot ens.fr via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102859

Dominique d'Humieres  changed:

   What|Removed |Added

 Ever confirmed|0   |1
 Status|UNCONFIRMED |NEW
   Last reconfirmed||2021-10-21

--- Comment #1 from Dominique d'Humieres  ---
Please do not import PR88707 unless it is fixed.

[PATCH] x86: Document -fcf-protection requires i686 or newer

2021-10-21 Thread H.J. Lu via Gcc-patches
PR target/98667
* doc/invoke.texi: Document -fcf-protection requires i686 or
new.
---
 gcc/doc/invoke.texi | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c66a25fcd69..71992b8c597 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -15542,7 +15542,8 @@ which functions and calls should be skipped from 
instrumentation
 (@pxref{Function Attributes}).
 
 Currently the x86 GNU/Linux target provides an implementation based
-on Intel Control-flow Enforcement Technology (CET).
+on Intel Control-flow Enforcement Technology (CET) which works for
+i686 processor or newer.
 
 @item -fstack-protector
 @opindex fstack-protector
-- 
2.32.0



Re: [wwwdocs, committed] GCC 12: Add release note for Fortran TS29113 improvements

2021-10-21 Thread Thomas Koenig via Gcc-patches

Hi Sandra,

I've checked in the attached patch to announce the cleanup project that 
Tobias and I have been working on over the last several months in the 
GCC 12 release notes.  I also updated the page for TS29113 on the GCC 
wiki to reflect that anything that still doesn't work ought to be 
considered a bug, not just incomplete work-in-progress.


Thanks for the work that the both of you put into this, and also thanks
for putting this into the release notes.  I was about to suggest you do
so, so you beat me to it :-)

I know that the conformance sections of the Fortran manual are badly in 
need of updating too (not just for TS29113, but the various versions of 
the standard).  That's in my queue to fix up, but I'm likely going to 
need assistance from the gfortran maintainers to get the details right.  
:-S


Just ask :-)

Regards

Thomas



Re: [RFC PATCH 0/1] implement TLS register based stack canary for ARM

2021-10-21 Thread Kees Cook via Gcc-patches
On Thu, Oct 21, 2021 at 06:34:04PM +0200, Ard Biesheuvel wrote:
> On Thu, 21 Oct 2021 at 12:23, Ard Biesheuvel  wrote:
> >
> > Bugzilla: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102352
> >
> > In the Linux kernel, user processes calling into the kernel are
> > essentially threads running in the same address space, of a program that
> > never terminates. This means that using a global variable for the stack
> > protector canary value is problematic on SMP systems, as we can never
> > change it unless we reboot the system. (Processes that sleep for any
> > reason will do so on a call into the kernel, which means that there will
> > always be live kernel stack frames carrying copies of the canary taken
> > when the function was entered)
> >
> > AArch64 implements -mstack-protector-guard=sysreg for this purpose, as
> > this permits the kernel to use different memory addresses for the stack
> > canary for each CPU, and context switch the chosen system register with
> > the rest of the process, allowing each process to use its own unique
> > value for the stack canary.
> >
> > This patch implements something similar, but for the 32-bit ARM kernel,
> > which will start using the user space TLS register TPIDRURO to index
> > per-process metadata while running in the kernel. This means we can just
> > add an offset to TPIDRURO to obtain the address from which to load the
> > canary value.
> >
> > The patch is a bit rough around the edges, but produces the correct
> > results as far as I can tell.
> 
> This is a lie

LOL.

> 
> > However, I couldn't quite figure out how
> > to modify the patterns so that the offset will be moved into the
> > immediate offset field of the LDR instructions, so currently, the ADD of
> > the offset is always a distinct instruction.
> >
> 
> ... and this is no longer true now that I fixed the correctness
> problem. I will be sending out a v2 shortly, so please disregard this
> one for now.

Heh, I hadn't even had a chance to test it, so I'll hold off. :)

Thanks!

-Kees

-- 
Kees Cook


[Bug fortran/102885] New: [12 Regression] ICE when compiling gfortran.dg/bind_c_char_10.f90 with -flto

2021-10-21 Thread dominiq at lps dot ens.fr via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102885

Bug ID: 102885
   Summary: [12 Regression] ICE when compiling
gfortran.dg/bind_c_char_10.f90 with -flto
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: fortran
  Assignee: unassigned at gcc dot gnu.org
  Reporter: dominiq at lps dot ens.fr
CC: burnus at gcc dot gnu.org, hubicka at gcc dot gnu.org,
iains at gcc dot gnu.org, sandra at gcc dot gnu.org
  Target Milestone: ---

The test gfortran.dg/bind_c_char_10.f90 ICE when compiled with -flto:

lto1: internal compiler error: Segmentation fault: 11
Please submit a full bug report,
with preprocessed source if appropriate.
See  for instructions.
lto-wrapper: fatal error: gfc returned 1 exit status
compilation terminated.
collect2: fatal error: lto-wrapper returned 1 exit status
compilation terminated.

Reduced test:

module m
  use iso_c_binding, only: c_char
  implicit none (type, external)

contains

! Assumed-shape array, nonallocatable/nonpointer

subroutine ar3 (xn, n) bind(C)
  integer :: n
  character(len=n) :: xn(..)
  if (size(xn) /= 6) stop
  if (len(xn) /= 5) stop  
  select rank(xn)
rank(1)
  xn = ['FDGhf', &
'hdrhg', &
'fDgFl', &
'DFHs3', &
'4a54G', &
'hSs6k']
  rank default
stop
  end select
end

end

program main
  use m
  implicit none (type, external)
  character(kind=c_char, len=5) :: str5a6(6)

  ! assumed rank - with array descriptor

  str5a6 = ['DDGhf', &
'hdrh$', &
'fDGSl', &
'DFHs3', &
'43grG', &
'hFG$k']
  call ar3 (str5a6, 5)

end

All the other tests compile with -flto.

[Bug c++/102854] [OpenMP] Bogus "initializer expression refers to iteration variable" when using templates

2021-10-21 Thread jakub at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102854

--- Comment #2 from Jakub Jelinek  ---
Created attachment 51647
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51647=edit
gcc12-pr102854-wip.patch

WIP patch.  Clearly still more work is needed, apparently pointer iterators
in non-rectangular loops are rejected, like:
void
foo ()
{
  int a[1024];
  int *p, *q;
  #pragma omp parallel for collapse(2)
  for (p = [0]; p < [512]; p++)
for (q = p + 64; q < p + 128; q++)
  ;
}
and enabling it result in ICEs during omp-expand.c.  Furthermore, for both
pointer and random access iterator non-rect loops, I should verify we only
allow the var-outer, var-outer + a2, a2 + var-outer and var-outer - a2 forms
and no others and test code generation.

Re: [RFC PATCH 0/1] implement TLS register based stack canary for ARM

2021-10-21 Thread Ard Biesheuvel via Gcc-patches
On Thu, 21 Oct 2021 at 12:23, Ard Biesheuvel  wrote:
>
> Bugzilla: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102352
>
> In the Linux kernel, user processes calling into the kernel are
> essentially threads running in the same address space, of a program that
> never terminates. This means that using a global variable for the stack
> protector canary value is problematic on SMP systems, as we can never
> change it unless we reboot the system. (Processes that sleep for any
> reason will do so on a call into the kernel, which means that there will
> always be live kernel stack frames carrying copies of the canary taken
> when the function was entered)
>
> AArch64 implements -mstack-protector-guard=sysreg for this purpose, as
> this permits the kernel to use different memory addresses for the stack
> canary for each CPU, and context switch the chosen system register with
> the rest of the process, allowing each process to use its own unique
> value for the stack canary.
>
> This patch implements something similar, but for the 32-bit ARM kernel,
> which will start using the user space TLS register TPIDRURO to index
> per-process metadata while running in the kernel. This means we can just
> add an offset to TPIDRURO to obtain the address from which to load the
> canary value.
>
> The patch is a bit rough around the edges, but produces the correct
> results as far as I can tell.

This is a lie

> However, I couldn't quite figure out how
> to modify the patterns so that the offset will be moved into the
> immediate offset field of the LDR instructions, so currently, the ADD of
> the offset is always a distinct instruction.
>

... and this is no longer true now that I fixed the correctness
problem. I will be sending out a v2 shortly, so please disregard this
one for now.


> As for the spilling issues that have been fixed in this code in the
> past: I suppose a register carrying the TLS register value will never
> get spilled to begin with? How about a register that carries TLS+?
>
> Comments/suggestions welcome.
>
> Cc: thomas.preudho...@celest.fr
> Cc: adhemerval.zane...@linaro.org
> Cc: Qing Zhao 
> Cc: Richard Sandiford 
> Cc: gcc-patches@gcc.gnu.org
>
> Ard Biesheuvel (1):
>   [ARM] Add support for TLS register based stack protector canary access
>
>  gcc/config/arm/arm-opts.h |  6 +++
>  gcc/config/arm/arm.c  | 39 +
>  gcc/config/arm/arm.md | 44 ++--
>  gcc/config/arm/arm.opt| 22 ++
>  gcc/doc/invoke.texi   |  9 
>  5 files changed, 116 insertions(+), 4 deletions(-)
>
> --
> 2.30.2
>
> $ cat |arm-linux-gnueabihf-gcc -march=armv7-a -mstack-protector-guard=tls 
> -mstack-protector-guard-offset=10 -mtp=cp15 -S -o - -xc - 
> -fstack-protector-all -O3
> int foo(void *);
> int bar(void)
> {
>
> return foo(__builtin_thread_pointer()) + 1;
> }
> .arch armv7-a
> .fpu softvfp
> .eabi_attribute 20, 1
> .eabi_attribute 21, 1
> .eabi_attribute 23, 3
> .eabi_attribute 24, 1
> .eabi_attribute 25, 1
> .eabi_attribute 26, 2
> .eabi_attribute 30, 2
> .eabi_attribute 34, 1
> .eabi_attribute 18, 4
> .file   ""
> .text
> .align  2
> .global bar
> .syntax unified
> .arm
> .type   bar, %function
> bar:
> @ args = 0, pretend = 0, frame = 8
> @ frame_needed = 0, uses_anonymous_args = 0
> push{r4, lr}
> mrc p15, 0, r4, c13, c0, 3  @ load_tp_hard
> add r3, r4, #10
> sub sp, sp, #8
> mov r0, r4
> add r4, r4, #10
> ldr r3, [r3]
> str r3, [sp, #4]
> mov r3, #0
> bl  foo
> ldr r3, [r4]
> ldr r4, [sp, #4]
> eorsr3, r4, r3
> mov r4, #0
> bne .L5
> add r0, r0, #1
> add sp, sp, #8
> @ sp needed
> pop {r4, pc}
> .L5:
> bl  __stack_chk_fail
> .size   bar, .-bar
> .ident  "GCC: (GNU) 12.0.0 20211019 (experimental)"
> .section.note.GNU-stack,"",%progbits
>


[Bug c++/102884] Incorrect compile error with id-expression in requires clause before member initializer

2021-10-21 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102884

Jonathan Wakely  changed:

   What|Removed |Added

 Status|UNCONFIRMED |NEW
 Ever confirmed|0   |1
   Keywords||rejects-valid
   Last reconfirmed||2021-10-21

[Bug libstdc++/102882] [AIX] 23_containers 96088 testsuite failures

2021-10-21 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102882

--- Comment #2 from Jonathan Wakely  ---
Created attachment 51646
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51646=edit
Change test to not use std::string

This patch changes the testcase to use a custom instantiation of
std::basic_string, so it doesn't use the explicit instantiation definitions in
the library. Does this make the patched test PASS?

But avoiding using std::string in our own testsuite is a major pain, we should
just make it so it works.

[wwwdocs, committed] GCC 12: Add release note for Fortran TS29113 improvements

2021-10-21 Thread Sandra Loosemore
I've checked in the attached patch to announce the cleanup project that 
Tobias and I have been working on over the last several months in the 
GCC 12 release notes.  I also updated the page for TS29113 on the GCC 
wiki to reflect that anything that still doesn't work ought to be 
considered a bug, not just incomplete work-in-progress.


I know that the conformance sections of the Fortran manual are badly in 
need of updating too (not just for TS29113, but the various versions of 
the standard).  That's in my queue to fix up, but I'm likely going to 
need assistance from the gfortran maintainers to get the details right.  :-S


-Sandra
commit f5971f451ae8834e928738bbfe465670aa481cea
Author: Sandra Loosemore 
Date:   Thu Oct 21 09:00:16 2021 -0700

GCC 12: Add release note for Fortran TS29113 improvements.

diff --git a/htdocs/gcc-12/changes.html b/htdocs/gcc-12/changes.html
index 5be1570..5974b9b 100644
--- a/htdocs/gcc-12/changes.html
+++ b/htdocs/gcc-12/changes.html
@@ -147,7 +147,17 @@ a work-in-progress.
 
 
 
-
+Fortran
+
+  WG5/N1942, "TS 29113 Further Interoperability of Fortran with C",
+is now fully supported.  In addition to implementing previously
+missing functionality, such as support for character arguments of
+length greater than one in functions marked bind(c)
+and gaps in the handling for assumed-rank arrays, numerous other bugs
+have been fixed, and an extensive set of new conformance test cases
+has been added.
+  
+
 
 
 


[Bug c/102867] [12 Regression] Waddress complaint in readelf.c

2021-10-21 Thread msebor at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102867

Martin Sebor  changed:

   What|Removed |Added

   Assignee|unassigned at gcc dot gnu.org  |msebor at gcc dot 
gnu.org
 Status|NEW |ASSIGNED

--- Comment #4 from Martin Sebor  ---
The warning for macros was most likely inadvertently enabled in the change for
pr102103.  In hindsight, I'm guessing it's what triggered the instance in Glibc
(since fixed):
https://sourceware.org/pipermail/libc-alpha/2021-September/131241.html
and I think it might have also been what prompted the change below (I meant to
follow up there but got busy with other things):
https://gcc.gnu.org/pipermail/gcc-patches/2021-October/580786.html

I have a follow-on patch out for review for pr33925.  I'll look into the macro
suppression at the same time, although I'm not too keen on that idea in general
if it can be easily avoided in user code (e.g., inlining).  I'd rather get away
from it if it's not too painful.

The poor format of the expression in the warning is an independent issue worth
addressing separately.

[Bug target/98667] gcc generates endbr32 invalid opcode on -march=i486

2021-10-21 Thread marxin at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98667

--- Comment #15 from Martin Liška  ---
(In reply to H.J. Lu from comment #14)
> (In reply to Martin Liška from comment #13)
> > @H.J. Can you please document that one needs at least i686 CPU for the
> > functionality?
> 
> Like this?
> 
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index c66a25fcd69..71992b8c597 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -15542,7 +15542,8 @@ which functions and calls should be skipped from
> instrumentation
>  (@pxref{Function Attributes}).
>  
>  Currently the x86 GNU/Linux target provides an implementation based
> -on Intel Control-flow Enforcement Technology (CET).
> +on Intel Control-flow Enforcement Technology (CET) which works for
> +i686 processor or newer.
>  
>  @item -fstack-protector
>  @opindex fstack-protector

Yes, please.

[Bug c++/102884] New: Incorrect compile error with id-expression in requires clause before member initializer

2021-10-21 Thread galston at stellarscience dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102884

Bug ID: 102884
   Summary: Incorrect compile error with id-expression in requires
clause before member initializer
   Product: gcc
   Version: 11.2.1
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: galston at stellarscience dot com
  Target Milestone: ---

Created attachment 51645
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51645=edit
.ii file produced by g++ command

This compiles in MSVC and clang, but gives a compile error in gcc:

constexpr bool kExpression{ true };

struct Foo {

  template< typename T >
  Foo(T ) requires kExpression : x{0} {}

  int x;
};

int main() {
  [[maybe_unused]] Foo foo{1};
}

It will compile if parentheses are put around kExpression. Since kExpression is
an id-expression, and an id-expression is a primary expression, I think this
should be allowed as is in a requires clause (see Requires clause section here
https://en.cppreference.com/w/cpp/language/constraints). It appears to be the
member initializer that it is choking on; it will compile if the ": x{0}" part
is removed.

Thank you!

Output from gcc:

> g++ -v -save-temps -Wall -Werror -std=c++20 -c requires-clause-bug.cpp
Using built-in specs.
COLLECT_GCC=g++
OFFLOAD_TARGET_NAMES=nvptx-none
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-redhat-linux
Configured with: ../configure --enable-bootstrap
--enable-languages=c,c++,fortran,objc,obj-c++,ada,go,d,lto --prefix=/usr
--mandir=/usr/share/man --infodir=/usr/share/info
--with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared
--enable-threads=posix --enable-checking=release --enable-multilib
--with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions
--enable-gnu-unique-object --enable-linker-build-id
--with-gcc-major-version-only --with-linker-hash-style=gnu --enable-plugin
--enable-initfini-array
--with-isl=/builddir/build/BUILD/gcc-11.2.1-20210728/obj-x86_64-redhat-linux/isl-install
--enable-offload-targets=nvptx-none --without-cuda-driver
--enable-gnu-indirect-function --enable-cet --with-tune=generic
--with-arch_32=i686 --build=x86_64-redhat-linux
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 11.2.1 20210728 (Red Hat 11.2.1-1) (GCC) 
COLLECT_GCC_OPTIONS='-v' '-save-temps' '-Wall' '-Werror' '-std=c++20' '-c'
'-shared-libgcc' '-mtune=generic' '-march=x86-64'
 /usr/libexec/gcc/x86_64-redhat-linux/11/cc1plus -E -quiet -v -D_GNU_SOURCE
requires-clause-bug.cpp -mtune=generic -march=x86-64 -std=c++20 -Wall -Werror
-fpch-preprocess -o requires-clause-bug.ii
ignoring nonexistent directory
"/usr/lib/gcc/x86_64-redhat-linux/11/include-fixed"
ignoring nonexistent directory
"/usr/lib/gcc/x86_64-redhat-linux/11/../../../../x86_64-redhat-linux/include"
#include "..." search starts here:
#include <...> search starts here:
 /usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11

/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/x86_64-redhat-linux
 /usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/backward
 /usr/lib/gcc/x86_64-redhat-linux/11/include
 /usr/local/include
 /usr/include
End of search list.
COLLECT_GCC_OPTIONS='-v' '-save-temps' '-Wall' '-Werror' '-std=c++20' '-c'
'-shared-libgcc' '-mtune=generic' '-march=x86-64'
 /usr/libexec/gcc/x86_64-redhat-linux/11/cc1plus -fpreprocessed
requires-clause-bug.ii -quiet -dumpbase requires-clause-bug.cpp -dumpbase-ext
.cpp -mtune=generic -march=x86-64 -Wall -Werror -std=c++20 -version -o
requires-clause-bug.s
GNU C++20 (GCC) version 11.2.1 20210728 (Red Hat 11.2.1-1)
(x86_64-redhat-linux)
compiled by GNU C version 11.2.1 20210728 (Red Hat 11.2.1-1), GMP
version 6.2.0, MPFR version 4.1.0-p13, MPC version 1.2.1, isl version
isl-0.18-GMP

GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
GNU C++20 (GCC) version 11.2.1 20210728 (Red Hat 11.2.1-1)
(x86_64-redhat-linux)
compiled by GNU C version 11.2.1 20210728 (Red Hat 11.2.1-1), GMP
version 6.2.0, MPFR version 4.1.0-p13, MPC version 1.2.1, isl version
isl-0.18-GMP

GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
Compiler executable checksum: ee1a2f208dd48c6a5d24bdcd31126e66
requires-clause-bug.cpp:7:32: error: found ‘:’ in nested-name-specifier,
expected ‘::’
7 |   Foo(T ) requires kExpression : x{0} {}
  |^
  |::
requires-clause-bug.cpp:7:20: error: ‘kExpression’ is not a class, namespace,
or enumeration
7 |   Foo(T ) requires kExpression : x{0} {}
  |^~~
requires-clause-bug.cpp:7:39: error: expected unqualified-id before ‘{’ token
7 |   Foo(T ) requires kExpression : x{0} {}
  |   ^
requires-clause-bug.cpp: In constructor ‘Foo::Foo(T) requires 

  1   2   3   >