[Bug tree-optimization/111595] New: detection of MIN/MAX with truncation and sign change for the result

2023-09-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111595

Bug ID: 111595
   Summary: detection of MIN/MAX with truncation and sign change
for the result
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Keywords: missed-optimization
  Severity: enhancement
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: pinskia at gcc dot gnu.org
  Target Milestone: ---

Take:
```
unsigned short f(long a, long b)
{
short as = a;
short bs = b;
unsigned short asu = a;
unsigned short bsu = b;
if (as < bs) return asu;
return bsu;
}
unsigned short f0(long a, long b)
{
short as = a;
short bs = b;
unsigned short asu = a;
unsigned short bsu = b;
if (as < bs) return as;
return bs;
}

unsigned short f1(long a, long b)
{
short as = a;
short bs = b;
unsigned short asu = a;
unsigned short bsu = b;
signed short t;
if (as < bs) t = as;
else t = bs;
return t;
}
```

Currently only f1 detects MIN here. They all should produce the same IR in the
end.

[Bug middle-end/111594] RISC-V: Failed to fold VEC_COND_EXPR and COND_LEN_ADD

2023-09-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111594

--- Comment #4 from Andrew Pinski  ---
(In reply to JuzheZhong from comment #3)
> (In reply to Andrew Pinski from comment #1)
> > The SVE one was added with r12-4402-g62b505a4d5fc89:
> > ```
> > /* Detect simplication for a conditional reduction where
> > 
> >a = mask1 ? b : 0
> >c = mask2 ? d + a : d
> > 
> >is turned into
> > 
> >c = mask1 && mask2 ? d + b : d.  */
> > (simplify
> >   (IFN_COND_ADD @0 @1 (vec_cond @2 @3 integer_zerop) @1)
> >(IFN_COND_ADD (bit_and @0 @2) @1 @3 @1))
> > ```
> > Most likely should do the similar thing for IFN_COND_LEN_ADD too.
> 
> Hi, I saw ARM SVE failed to fold VEC_COND + COND_ADD into COND_ADD on
> float vector since it can't satisfy integer_zerop.
> 
> Is is reasonable the same optimization should also work for float vector ?

I suspect it would only be valid if `!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS
(type)` is true. So it could use (match on) zerop instead but would need to
check the above conditional too.

[Bug middle-end/111594] RISC-V: Failed to fold VEC_COND_EXPR and COND_LEN_ADD

2023-09-25 Thread juzhe.zhong at rivai dot ai via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111594

--- Comment #3 from JuzheZhong  ---
(In reply to Andrew Pinski from comment #1)
> The SVE one was added with r12-4402-g62b505a4d5fc89:
> ```
> /* Detect simplication for a conditional reduction where
> 
>a = mask1 ? b : 0
>c = mask2 ? d + a : d
> 
>is turned into
> 
>c = mask1 && mask2 ? d + b : d.  */
> (simplify
>   (IFN_COND_ADD @0 @1 (vec_cond @2 @3 integer_zerop) @1)
>(IFN_COND_ADD (bit_and @0 @2) @1 @3 @1))
> ```
> Most likely should do the similar thing for IFN_COND_LEN_ADD too.

Hi, I saw ARM SVE failed to fold VEC_COND + COND_ADD into COND_ADD on
float vector since it can't satisfy integer_zerop.

Is is reasonable the same optimization should also work for float vector ?

RE: [PATCH v1] RISC-V: Rename rounding const fp function for refactor

2023-09-25 Thread Li, Pan2
Committed, thanks Juzhe.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Tuesday, September 26, 2023 11:18 AM
To: Li, Pan2 ; gcc-patches 
Cc: Li, Pan2 ; Wang, Yanzhang ; 
kito.cheng 
Subject: Re: [PATCH v1] RISC-V: Rename rounding const fp function for refactor

LGTM.


juzhe.zh...@rivai.ai

From: pan2.li
Date: 2023-09-26 11:12
To: gcc-patches
CC: juzhe.zhong; 
pan2.li; 
yanzhang.wang; 
kito.cheng
Subject: [PATCH v1] RISC-V: Rename rounding const fp function for refactor
From: Pan Li mailto:pan2...@intel.com>>

The rounding related API shared one const, rename it to avoid
unnecessary redundant code.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (gen_ceil_const_fp): Remove.
(get_fp_rounding_coefficient): Rename.
(gen_floor_const_fp): Remove.
(expand_vec_ceil): Take renamed func.
(expand_vec_floor): Ditto.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>
---
gcc/config/riscv/riscv-v.cc | 13 +++--
1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a1ffefb23f3..9a1df950d58 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3548,7 +3548,7 @@ cmp_lmul_gt_one (machine_mode mode)
   greater than and equal to 4503599627370496.
  */
static rtx
-gen_ceil_const_fp (machine_mode inner_mode)
+get_fp_rounding_coefficient (machine_mode inner_mode)
{
   REAL_VALUE_TYPE real;
@@ -3564,13 +3564,6 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
}
-static rtx
-gen_floor_const_fp (machine_mode inner_mode)
-{
-  /* The floor needs the same floating point const as ceil.  */
-  return gen_ceil_const_fp (inner_mode);
-}
-
static rtx
emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
machine_mode vec_fp_mode)
@@ -3637,7 +3630,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
@@ -3662,7 +3655,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
--
2.34.1




Re: [PATCH v1] RISC-V: Rename rounding const fp function for refactor

2023-09-25 Thread juzhe.zh...@rivai.ai
LGTM.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-09-26 11:12
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Rename rounding const fp function for refactor
From: Pan Li 
 
The rounding related API shared one const, rename it to avoid
unnecessary redundant code.
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (gen_ceil_const_fp): Remove.
(get_fp_rounding_coefficient): Rename.
(gen_floor_const_fp): Remove.
(expand_vec_ceil): Take renamed func.
(expand_vec_floor): Ditto.
 
Signed-off-by: Pan Li 
---
gcc/config/riscv/riscv-v.cc | 13 +++--
1 file changed, 3 insertions(+), 10 deletions(-)
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a1ffefb23f3..9a1df950d58 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3548,7 +3548,7 @@ cmp_lmul_gt_one (machine_mode mode)
   greater than and equal to 4503599627370496.
  */
static rtx
-gen_ceil_const_fp (machine_mode inner_mode)
+get_fp_rounding_coefficient (machine_mode inner_mode)
{
   REAL_VALUE_TYPE real;
@@ -3564,13 +3564,6 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
}
-static rtx
-gen_floor_const_fp (machine_mode inner_mode)
-{
-  /* The floor needs the same floating point const as ceil.  */
-  return gen_ceil_const_fp (inner_mode);
-}
-
static rtx
emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
machine_mode vec_fp_mode)
@@ -3637,7 +3630,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
@@ -3662,7 +3655,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
-- 
2.34.1
 
 


[PATCH v1] RISC-V: Rename rounding const fp function for refactor

2023-09-25 Thread pan2 . li
From: Pan Li 

The rounding related API shared one const, rename it to avoid
unnecessary redundant code.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (gen_ceil_const_fp): Remove.
(get_fp_rounding_coefficient): Rename.
(gen_floor_const_fp): Remove.
(expand_vec_ceil): Take renamed func.
(expand_vec_floor): Ditto.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-v.cc | 13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a1ffefb23f3..9a1df950d58 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3548,7 +3548,7 @@ cmp_lmul_gt_one (machine_mode mode)
   greater than and equal to 4503599627370496.
  */
 static rtx
-gen_ceil_const_fp (machine_mode inner_mode)
+get_fp_rounding_coefficient (machine_mode inner_mode)
 {
   REAL_VALUE_TYPE real;
 
@@ -3564,13 +3564,6 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
 }
 
-static rtx
-gen_floor_const_fp (machine_mode inner_mode)
-{
-  /* The floor needs the same floating point const as ceil.  */
-  return gen_ceil_const_fp (inner_mode);
-}
-
 static rtx
 emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
 machine_mode vec_fp_mode)
@@ -3637,7 +3630,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
 
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
 
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
@@ -3662,7 +3655,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
 
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
 
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
-- 
2.34.1



Re: [PATCH] Checking undefined_p before using the vr

2023-09-25 Thread Jiufu Guo


Hi Andrew,

Thanks for your explain! And sorry for later reply.

Andrew MacLeod  writes:

> On 9/14/23 22:07, Jiufu Guo wrote:
>>>
>>> undefined is a perfectly acceptable range.  It can be used to
>>> represent either values which has not been initialized, or more
>>> frequently it identifies values that cannot occur due to
>>> conflicting/unreachable code.  VARYING means it can be any range,
>>> UNDEFINED means this is unusable, so treat it accordingly.  Its
>>> propagated like any other range.
>> "undefined" means the ranger is unusable. So, for this ranger, it
>> seems only "undefined_p ()" can be checked, and it seems no other
>> functions of this ranger can be called.
>
> not at all. It means ranger has determined that there is no valid
> range for the item you are asking about probably due to conflicting
> conditions, which imparts important information about the range.. or
> lack of range :-)
>
> Quite frequently it means you are looking at a block of code that
> ranger knows is unreachable, but a pass of the compiler which removes
> such blocks has not been called yet.. so the awareness imparted is
> that there isn't much point in doing optimizations on it because its
> probably going to get thrown away by a following pass.
>
>>
>> I'm thinking that it may be ok to let "range_of_expr" return false
>> if the "vr" is "undefined_p".  I know this may change the meaning
>> of "range_of_expr" slightly :)
>
> No.  That would be like saying NULL is not a valid value for a
> pointer.  undefined_p has very specific meaning that we use.. it just
> has no type.

Oh, get it.:)

BR,
Jeff (Jiufu Guo)
>
> Andrew


[Bug middle-end/111594] RISC-V: Failed to fold VEC_COND_EXPR and COND_LEN_ADD

2023-09-25 Thread juzhe.zhong at rivai dot ai via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111594

--- Comment #2 from JuzheZhong  ---
Oh, I see. Thanks a lot! I will have a try.

Re: [PATCH v1] RISC-V: Support FP nearbyint auto-vectorization

2023-09-25 Thread juzhe.zh...@rivai.ai
+static rtx
+gen_nearbyint_const_fp (machine_mode inner_mode)
+{
+  /* The nearbyint needs the same floating point const as ceil.  */
+  return gen_ceil_const_fp (inner_mode);
+}
This is redundant.

Also, this is also redundant:
static rtx
gen_floor_const_fp (machine_mode inner_mode)
{
  /* The floor needs the same floating point const as ceil.  */
  return gen_ceil_const_fp (inner_mode);
}

So rename it :
gen_ceil_const_fp (machine_mode inner_mode)

into:
get_fp_rounding_coefficient



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-09-26 10:39
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support FP nearbyint auto-vectorization
From: Pan Li 
 
This patch would like to support auto-vectorization for the
nearbyint API in math.h. It depends on the -ffast-math option.
 
When we would like to call nearbyint/nearbyintf like v2 = nearbyint (v1),
we will convert it into below insns (reference the implementation of llvm).
 
* frflags a5
* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3
* fsflags a5
 
However, the floating point value may not need the cvt as above if
its mantissa is zero. Take single precision floating point as example:
 
Assume we have RTZ rounding mode
 
  ++---+-+
  | raw float  | binary layout | after nearbyint |
  ++---+-+
  | 8388607.5  | 0x4aff| 8388607.0   |
  | 8388608.0  | 0x4b00| 8388608.0   |
  | 8388609.0  | 0x4b01| 8388609.0   |
  ++---+-+
 
All single floating point >= 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.
 
Befor this patch:
math-nearbyint-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw fa0,0(s0)
  addis0,s0,4
  addis1,s1,4
  callnearbyint
  fsw fa0,-4(s1)
  bne s0,s2,.L3
 
After this patch:
  vfabs.v v2,v1
  vmflt.vfv0,v2,fa5
  frflags a7
  vfcvt.x.f.v v4,v1,v0.t
  vfcvt.f.x.v v2,v4,v0.t
  fsflags a7
  vfsgnj.vv   v2,v2,v1
 
Please note VLS mode is also involved in this patch and covered by the
test cases.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (nearbyint2): New pattern.
* config/riscv/riscv-protos.h (enum insn_type): New enum.
(expand_vec_nearbyint): New function decl.
* config/riscv/riscv-v.cc (gen_nearbyint_const_fp): New function impl.
(expand_vec_nearbyint): Ditto.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/unop/test-math.h: Add helper function.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c: New test.
 
Signed-off-by: Pan Li 
---
gcc/config/riscv/autovec.md   | 11 
gcc/config/riscv/riscv-protos.h   |  2 +
gcc/config/riscv/riscv-v.cc   | 36 
.../riscv/rvv/autovec/unop/math-nearbyint-0.c | 20 +++
.../riscv/rvv/autovec/unop/math-nearbyint-1.c | 20 +++
.../riscv/rvv/autovec/unop/math-nearbyint-2.c | 20 +++
.../riscv/rvv/autovec/unop/math-nearbyint-3.c | 22 +++
.../rvv/autovec/unop/math-nearbyint-run-1.c   | 48 +++
.../rvv/autovec/unop/math-nearbyint-run-2.c   | 48 +++
.../riscv/rvv/autovec/unop/test-math.h| 33 +++
.../riscv/rvv/autovec/vls/math-nearbyint-1.c  | 58 +++
11 files changed, 318 insertions(+)
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-1.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-2.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-3.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-1.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-2.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a005e17457e..b47f086f5e6 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2210,6 +2210,7 @@ (define_expand "avg3_ceil"
;; Includes:
;; - ceil/ceilf
;; - floor/floorf
+;; - nearbyint/nearbyintf
;; -
(define_expand "ceil2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2230,3 +2231,13 @@ (define_expand "floor2"
 DONE;
   }
)
+
+(define_expand "nearbyint2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   

Ping^2 [PATCH V5 1/4] rs6000: build constant via li;rotldi

2023-09-25 Thread Jiufu Guo
Hi,

Gentle ping...

BR,
Jeff (Jiufu Guo)

Jiufu Guo via Gcc-patches  writes:

> Hi,
>
> Gentle ping...
>
> BR,
> Jeff (Jiufu Guo)
>
> Jiufu Guo  writes:
>
>> Hi,
>>
>> If a constant is possible to be rotated to/from a positive or negative
>> value which "li" can generated, then "li;rotldi" can be used to build
>> the constant.
>>
>> Compare with the previous version:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623528.html
>> This patch just did minor changes to the comments according to previous
>> review.
>>
>> Bootstrap and regtest pass on ppc64{,le}.
>>
>> Is this ok for trunk?
>>
>>
>> BR,
>> Jeff (Jiufu)
>>
>> gcc/ChangeLog:
>>
>>  * config/rs6000/rs6000.cc (can_be_built_by_li_and_rotldi): New function.
>>  (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rotldi.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * gcc.target/powerpc/const-build.c: New test.
>> ---
>>  gcc/config/rs6000/rs6000.cc   | 47 +--
>>  .../gcc.target/powerpc/const-build.c  | 57 +++
>>  2 files changed, 98 insertions(+), 6 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build.c
>>
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index 42f49e4a56b..acc332acc05 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -10258,6 +10258,31 @@ rs6000_emit_set_const (rtx dest, rtx source)
>>return true;
>>  }
>>  
>> +/* Check if value C can be built by 2 instructions: one is 'li', another is
>> +   'rotldi'.
>> +
>> +   If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
>> +   is set to the mask operand of rotldi(rldicl), and return true.
>> +   Return false otherwise.  */
>> +
>> +static bool
>> +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift,
>> +   HOST_WIDE_INT *mask)
>> +{
>> +  /* If C or ~C contains at least 49 successive zeros, then C can be rotated
>> + to/from a positive or negative value that 'li' is able to load.  */
>> +  int n;
>> +  if (can_be_rotated_to_lowbits (c, 15, )
>> +  || can_be_rotated_to_lowbits (~c, 15, ))
>> +{
>> +  *mask = HOST_WIDE_INT_M1;
>> +  *shift = HOST_BITS_PER_WIDE_INT - n;
>> +  return true;
>> +}
>> +
>> +  return false;
>> +}
>> +
>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>> Output insns to set DEST equal to the constant C as a series of
>> lis, ori and shl instructions.  */
>> @@ -10266,15 +10291,14 @@ static void
>>  rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>>  {
>>rtx temp;
>> +  int shift;
>> +  HOST_WIDE_INT mask;
>>HOST_WIDE_INT ud1, ud2, ud3, ud4;
>>  
>>ud1 = c & 0x;
>> -  c = c >> 16;
>> -  ud2 = c & 0x;
>> -  c = c >> 16;
>> -  ud3 = c & 0x;
>> -  c = c >> 16;
>> -  ud4 = c & 0x;
>> +  ud2 = (c >> 16) & 0x;
>> +  ud3 = (c >> 32) & 0x;
>> +  ud4 = (c >> 48) & 0x;
>>  
>>if ((ud4 == 0x && ud3 == 0x && ud2 == 0x && (ud1 & 0x8000))
>>|| (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
>> @@ -10305,6 +10329,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT 
>> c)
>>emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>>   GEN_INT ((ud2 ^ 0x) << 16)));
>>  }
>> +  else if (can_be_built_by_li_and_rotldi (c, , ))
>> +{
>> +  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +  unsigned HOST_WIDE_INT imm = (c | ~mask);
>> +  imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
>> +
>> +  emit_move_insn (temp, GEN_INT (imm));
>> +  if (shift != 0)
>> +temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
>> +  emit_move_insn (dest, temp);
>> +}
>>else if (ud3 == 0 && ud4 == 0)
>>  {
>>temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c 
>> b/gcc/testsuite/gcc.target/powerpc/const-build.c
>> new file mode 100644
>> index 000..69b37e2bb53
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c
>> @@ -0,1 +1,57 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O2 -save-temps" } */
>> +/* { dg-require-effective-target has_arch_ppc64 } */
>> +
>> +/* Verify that two instructions are successfully used to build constants.
>> +   One insn is li, another is rotate: rldicl.  */
>> +
>> +#define NOIPA __attribute__ ((noipa))
>> +
>> +struct fun
>> +{
>> +  long long (*f) (void);
>> +  long long val;
>> +};
>> +
>> +long long NOIPA
>> +li_rotldi_1 (void)
>> +{
>> +  return 0x75310LL;
>> +}
>> +
>> +long long NOIPA
>> +li_rotldi_2 (void)
>> +{
>> +  return 0x2164LL;
>> +}
>> +
>> +long long NOIPA
>> +li_rotldi_3 (void)
>> +{
>> +  return 0x8531LL;
>> +}
>> +
>> +long long NOIPA
>> +li_rotldi_4 (void)
>> +{
>> +  return 0x2194LL;
>> +}
>> +
>> +struct fun arr[] = {
>> 

[Bug middle-end/111594] RISC-V: Failed to fold VEC_COND_EXPR and COND_LEN_ADD

2023-09-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111594

Andrew Pinski  changed:

   What|Removed |Added

 Ever confirmed|0   |1
   Severity|normal  |enhancement
 Status|UNCONFIRMED |NEW
   Last reconfirmed||2023-09-26

--- Comment #1 from Andrew Pinski  ---
The SVE one was added with r12-4402-g62b505a4d5fc89:
```
/* Detect simplication for a conditional reduction where

   a = mask1 ? b : 0
   c = mask2 ? d + a : d

   is turned into

   c = mask1 && mask2 ? d + b : d.  */
(simplify
  (IFN_COND_ADD @0 @1 (vec_cond @2 @3 integer_zerop) @1)
   (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1))
```
Most likely should do the similar thing for IFN_COND_LEN_ADD too.

[PATCH] RISC-V: Add opaque integer modes to fix ICE on DSE[PR111590]

2023-09-25 Thread Juzhe-Zhong
When doing fortran test with 'V' extension enabled on RISC-V port.
I saw multiple ICE: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111590

The root cause is on DSE:

internal compiler error: in smallest_mode_for_size, at stor-layout.cc:356
0x1918f70 smallest_mode_for_size(poly_int<2u, unsigned long>, mode_class)
../../../../gcc/gcc/stor-layout.cc:356
0x11f75bb smallest_int_mode_for_size(poly_int<2u, unsigned long>)
../../../../gcc/gcc/machmode.h:916
0x3304141 find_shift_sequence
../../../../gcc/gcc/dse.cc:1738
0x3304f1a get_stored_val
../../../../gcc/gcc/dse.cc:1906
0x3305377 replace_read
../../../../gcc/gcc/dse.cc:2010
0x3306226 check_mem_read_rtx
../../../../gcc/gcc/dse.cc:2310
0x330667b check_mem_read_use
../../../../gcc/gcc/dse.cc:2415

After investigations, DSE is trying to do optimization like this following 
codes:

(insn 86 85 87 9 (set (reg:V4DI 168)
(mem/u/c:V4DI (reg/f:DI 171) [0  S32 A128])) "bug.f90":6:18 discrim 6 
1167 {*movv4di}
 (expr_list:REG_EQUAL (const_vector:V4DI [
(const_int 4 [0x4])
(const_int 1 [0x1]) repeated x2
(const_int 3 [0x3])
])
(nil)))

(set (mem) (reg:V4DI 168))

Then it ICE on: auto new_mode = smallest_int_mode_for_size (access_size * 
BITS_PER_UNIT);

The access_size may be 24 or 32. We don't have such integer modes with these 
size so it ICE.

I saw both aarch64 and ARM has EI/OI/CI/XI opaque modes. 

So I add it to walk around ICE on DCE, it works as all ICE are resolved.

CC Richard to review to make sure I am doing the right thing to fix the bug.

Hi, Richard, could you help me with this issue ? Thanks.

gcc/ChangeLog:

* config/riscv/riscv-modes.def (INT_MODE): Add opaque modes

---
 gcc/config/riscv/riscv-modes.def | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index e3c6ccb2809..ab86032c914 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -393,6 +393,12 @@ VLS_MODES (1024); /* V1024QI  V512HI  V256SI V128DI  
V512HF  V256SF V128DF */
 VLS_MODES (2048); /* V2048QI V1024HI  V512SI V256DI V1024HF  V512SF V256DF */
 VLS_MODES (4096); /* V4096QI V2048HI V1024SI V512DI V2048HF V1024SF V512DF */
 
+/* Opaque integer modes 3, 4, 6 or 8 general double registers.  */
+INT_MODE (EI, 24);
+INT_MODE (OI, 32);
+INT_MODE (CI, 48);
+INT_MODE (XI, 64);
+
 /* TODO: According to RISC-V 'V' ISA spec, the maximun vector length can
be 65536 for a single vector register which means the vector mode in
GCC can be maximum = 65536 * 8 bits (LMUL=8).
-- 
2.36.3



[PATCH v1] RISC-V: Support FP nearbyint auto-vectorization

2023-09-25 Thread pan2 . li
From: Pan Li 

This patch would like to support auto-vectorization for the
nearbyint API in math.h. It depends on the -ffast-math option.

When we would like to call nearbyint/nearbyintf like v2 = nearbyint (v1),
we will convert it into below insns (reference the implementation of llvm).

* frflags a5
* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3
* fsflags a5

However, the floating point value may not need the cvt as above if
its mantissa is zero. Take single precision floating point as example:

Assume we have RTZ rounding mode

  ++---+-+
  | raw float  | binary layout | after nearbyint |
  ++---+-+
  | 8388607.5  | 0x4aff| 8388607.0   |
  | 8388608.0  | 0x4b00| 8388608.0   |
  | 8388609.0  | 0x4b01| 8388609.0   |
  ++---+-+

All single floating point >= 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-nearbyint-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw fa0,0(s0)
  addis0,s0,4
  addis1,s1,4
  callnearbyint
  fsw fa0,-4(s1)
  bne s0,s2,.L3

After this patch:
  vfabs.v v2,v1
  vmflt.vfv0,v2,fa5
  frflags a7
  vfcvt.x.f.v v4,v1,v0.t
  vfcvt.f.x.v v2,v4,v0.t
  fsflags a7
  vfsgnj.vv   v2,v2,v1

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

* config/riscv/autovec.md (nearbyint2): New pattern.
* config/riscv/riscv-protos.h (enum insn_type): New enum.
(expand_vec_nearbyint): New function decl.
* config/riscv/riscv-v.cc (gen_nearbyint_const_fp): New function impl.
(expand_vec_nearbyint): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/test-math.h: Add helper function.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec.md   | 11 
 gcc/config/riscv/riscv-protos.h   |  2 +
 gcc/config/riscv/riscv-v.cc   | 36 
 .../riscv/rvv/autovec/unop/math-nearbyint-0.c | 20 +++
 .../riscv/rvv/autovec/unop/math-nearbyint-1.c | 20 +++
 .../riscv/rvv/autovec/unop/math-nearbyint-2.c | 20 +++
 .../riscv/rvv/autovec/unop/math-nearbyint-3.c | 22 +++
 .../rvv/autovec/unop/math-nearbyint-run-1.c   | 48 +++
 .../rvv/autovec/unop/math-nearbyint-run-2.c   | 48 +++
 .../riscv/rvv/autovec/unop/test-math.h| 33 +++
 .../riscv/rvv/autovec/vls/math-nearbyint-1.c  | 58 +++
 11 files changed, 318 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a005e17457e..b47f086f5e6 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2210,6 +2210,7 @@ (define_expand "avg3_ceil"
 ;; Includes:
 ;; - ceil/ceilf
 ;; - floor/floorf
+;; - nearbyint/nearbyintf
 ;; -
 (define_expand "ceil2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2230,3 +2231,13 @@ (define_expand "floor2"
 DONE;
   }
 )
+
+(define_expand "nearbyint2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+riscv_vector::expand_vec_nearbyint (operands[0], operands[1], mode, 
mode);
+DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 63eb2475705..f87bdef0f71 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -296,6 +296,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN 

[Bug c/111594] New: RISC-V: Failed to fold VEC_COND_EXPR and COND_LEN_ADD

2023-09-25 Thread juzhe.zhong at rivai dot ai via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111594

Bug ID: 111594
   Summary: RISC-V: Failed to fold VEC_COND_EXPR and COND_LEN_ADD
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c
  Assignee: unassigned at gcc dot gnu.org
  Reporter: juzhe.zhong at rivai dot ai
  Target Milestone: ---

Consider this following case:


#include 

void single_loop_with_if_condition(uint64_t * restrict a, 
uint64_t * restrict b,
int loop_size) {
  uint64_t result = 0;

  for (int i = 0; i < loop_size; i++) {
if (b[i] <= a[i]) {
  result += a[i];
}
  }

  a[0] = result;
}

In ARM SVE:

vect__ifc__33.15_48 = VEC_COND_EXPR ;
vect__34.16_49 = .COND_ADD (loop_mask_41, vect_result_19.7_38,
vect__ifc__33.15_48, vect_result_19.7_38);

will be folded into:

vect__34.16_49 = .COND_ADD (_50, vect_result_19.7_38, vect__7.13_45,
vect_result_19.7_38);

However, for RVV, if failed to fold VEC_COND_EXPR + COND_LEN_ADD.

vect__ifc__44.30_96 = VEC_COND_EXPR ;
  vect__45.31_97 = .COND_LEN_ADD ({ -1, ... }, vect_result_35.22_78,
vect__ifc__44.30_96, vect_result_35.22_78, _104, 0);

I am not sure where to do this optimization?

Re: seek advice about GCC learning

2023-09-25 Thread weizhe wang via Gcc
Hi Guys,



  Can we build risv-32 gcc compiler from official gcc repository ?



Thanks,



Flint



Sent using https://www.zoho.com/mail/








 On Thu, 02 Feb 2023 05:21:36 -0800 Martin Jambor  wrote 
---



Hello Flint, 
 
On Sat, Jan 28 2023, hmsjwzb via Gcc wrote: 
> Hi GCC developers, 
> 
> I am learning GCC. But the GCC code is hard to understand. 
 
We are delighted you found looking into GCC interesting.  I definitely 
agree that GCC source can be hard to read, especially for newcomers but 
often even for seasoned contributors when they look at a part they are 
not familiar with.  But when you manage to manage to overcome the 
difficulty, the project can be very rewarding.  And so not hesitate to 
ask us any specific question you may have here on the mailing list or on 
IRC. 
 
> I'm reading the c compiler of GCC. It seems the understanding of 
> AST/GENERIC representation is very important. Is there a tool 
> can visualize the AST/GENERIC representation? 
 
Intermediate representations like GENERIC, GIMPLE and RTL are indeed 
fundamental.  In order to see the representation of instructions, 
compile a simple program with option -fdump-tree-all and examine the 
many files that will appear in your working directory (representation of 
things like aggregate data types is unfortunately somewhat missing). 
 
> 
> Do you have some advice for GCC beginner? Is there some documentations 
> can help in the learning of GCC? 
 
David Malcolm wrote a very nice set of tutorials about various aspects 
of starting with GCC: 
https://gcc-newbies-guide.readthedocs.io/en/latest/ 
 
I believe that is the best generic resource there is. 
 
Good luck, 
 
Martin


[Bug target/111533] [14 Regression] ICE: RTL check: expected code 'reg', have 'const_int' in rhs_regno, at rtl.h:1934

2023-09-25 Thread xuli1 at eswincomputing dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111533

--- Comment #3 from xuli1 at eswincomputing dot com  ---
The problem has been reproduced, thank you.

[PATCH v3 1/2] c++: Initial support for P0847R7 (Deducing This) [PR102609]

2023-09-25 Thread waffl3x
> Yes, but I'll warn you that grokdeclarator has resisted refactoring for
> a long time...

That will certainly be what I work on after this is squared off then,
I've been up and down grokdeclarator so I'm confident I'll be able to
do it.

As for the patch, I sure took my sweet time with it, but here it is. I
hope to work on the diagnostics patch tomorrow, but as you've probably
figured out it's best not to take my word on timeframes :^).

On the plus side, I took my time to figure out how to best to pass down
information about whether a param is an xobj param. My initial
impression on what you were suggesting was to push another node on the
front of the list, but I stared at it for a few hours and didn't think
it would work out. However, eventually I realized that the purpose
member if free for xobj params as it is illegal for them to have
default arguments. So I ended up passing it over the TREE_LIST after
all, maybe this is what you meant in the first place anyway too.

I am pretty confident that this version is all good, with only a few
possible issues.

An update on my copyright assignment, I sent an e-mail and haven't
gotten a response yet. From what I saw, I am confident that it's my
preferred option going forward though. Hopefully they get back to me
soon.

Also, just a quick update on my copyright assignment, I have sent an
e-mail to the FSF and haven't gotten a response yet. From what I was
reading, I am confident that it's my preferred option going forward
though. Hopefully they get back to me soon.

Bootstrapped and regtested on x86_64-pc-linux-gnu.

From bbfbcc72e8c0868559284352c71731394c98441e Mon Sep 17 00:00:00 2001
From: waffl3x 
Date: Mon, 25 Sep 2023 16:59:10 -0600
Subject: [PATCH] c++: Initial support for C++23 P0847R7 (Deducing This)
 [PR102609]

This patch implements initial support for P0847R7, without additions to
diagnostics.  Almost everything should work correctly, barring a few
limitations which are listed below.  I attempted to minimize changes to the
existing code, treating explicit object member functions as static functions,
while flagging them to give them extra powers seemed to be the best way of
achieving this.  For this patch, the flag is only utilized in call.cc for
resolving overloads and making the actual function call.

Internally, the difference between a static member function and an implicit
object member function appears to be whether the type node of the decl is a
FUNCTION_TYPE or a METHOD_TYPE.  So to get the desired behavior, it seems to be
sufficient to simply prevent conversion from FUNC_TYPE to METHOD_TYPE in
grokdeclarator when the first parameter is an explicit object parameter.  To
achieve this, explicit object parameters are flagged as such through each the
TREE_LIST's purpose member in declarator->u.function.parameters.  Typically the
purpose member is used for default arguments,  as those are not allowed for
explicit object parameters, we are able to repurpose purpose for our purposes.
The value used as a flag is the "this_identifier" global tree, as it seemed to
be the most fitting of the current global trees.  Even though it is obviously
illegal for any parameter except the first to be an explicit object parameter,
each parameter parsed as an explicit object parameter will be flagged in this
manner.  This will be used for diagnostics in the following patch.  When an
explicit object parameter is encountered in grokdeclarator, the purpose member
is nulled before the list is passed elsewhere to maintain compatibility with
any code that assumes that a non-null purpose member indicates a default
argument.  This patch only checks for and nulls the first parameter however.

As for the previously mentioned limitations, lambdas do not work correctly yet,
but I suspect that a few tweaks are all it will take to have them fully
functional.  User defined conversion functions are not called when an explicit
object member function with an explicit object parameter of an unrelated type
is called.  The following case does not behave correctly because of this.

struct S {
  operator size_t() {
return 42;
  }
  size_t f(this size_t n) {
return n;
  }
};

int main()
{
  S s{};
  size_t a = s.f();
}

Currently, it appears that the object argument is simply reinterpreted as
a size_t instead of properly calling the user defined conversion function.
The validity of such a conversion is still considered however, if there is no
way to convert S to a size_t an appropriate compile error will be emitted.
I have an idea of what changes need to be made to fix this, but I did not
persue this for the initial implementation patch.
This bug can be observed in the explicit-object-param4.C test case, while
explicit-object-param3.C demonstrates the non functioning lambdas.

	PR c++/102609

gcc/cp/ChangeLog:
	PR c++/102609
	Initial support for C++23 P0847R7 - Deducing this.
	* call.cc (add_candidates): Check if fn is an xobj member function.
	(build_over_call): Ditto.
	* 

[Bug middle-end/110148] [14 Regression] TSVC s242 regression between g:c0df96b3cda5738afbba3a65bb054183c5cd5530 and g:e4c986fde56a6248f8fbe6cf0704e1da34b055d8

2023-09-25 Thread lili.cui at intel dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110148

--- Comment #7 from cuilili  ---
(In reply to Martin Jambor from comment #6)
> I believe this has been fixed?

Yes.

Re: [PATCH] AArch64: Fix __sync_val_compare_and_swap [PR111404]

2023-09-25 Thread Wilco Dijkstra
Hi Ramana,

>> __sync_val_compare_and_swap may be used on 128-bit types and either calls the
>> outline atomic code or uses an inline loop.  On AArch64 LDXP is only atomic 
>> if
>> the value is stored successfully using STXP, but the current implementations
>> do not perform the store if the comparison fails.  In this case the value 
>> returned
>> is not read atomically.
>
> IIRC, the previous discussions in this space revolved around the
> difficulty with the store writing to readonly memory which is why I
> think we went with LDXP in this form.

That's not related to this patch - this fixes a serious atomicity bug that may
affect the Linux kernel since it uses the older sync primitives. Given that LDXP
is not atomic on its own, you have to execute the STXP even in the failure case.
Note that you can't rely on compare not to write memory: load-exclusive
loops may either always write or avoid writes in the failure case if the load is
atomic. CAS instructions always write.

> Has something changed from then ?

Yes, we now know that using locking atomics was a bad decision. Developers
actually require efficient and lock-free atomics. Since we didn't support them,
many applications were forced to add their own atomic implementations using
hacky inline assembler. It also resulted in a nasty ABI incompatibility between
GCC and LLVM. Yes - atomics are part of the ABI!

All that is much worse than worrying about a theoretical corner case that
can't happen in real applications - atomics only work on writeable memory
since their purpose is to synchronize reads with writes.

Cheers,
Wilco


Exporting inline functions

2023-09-25 Thread Nima Hamidi via Gcc
Hello all,

Is there any flag that I can pass to gcc to make it generate dynamic symbols 
for inline functions too? Let’s say I need to lookup an inline function via 
dlopen and call it. Is there an easy way to achieve this?


Thanks!


[Bug target/111545] [14 Regression] RISC-V gfortran.dg/host_assoc_function_7.f09 Illegal instruction error

2023-09-25 Thread juzhe.zhong at rivai dot ai via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111545

--- Comment #4 from JuzheZhong  ---
Confirm this is the latent bug in VSETVL PASS which is already existed for a
long time.

Lehua is working on refactoring Phase 1 and Phase 2 of VSETVL PASS which will
fix all potential issues of VSETVL PASS.

Re: [wwwdocs, committed] gcc-14/changes.html (OpenMP): Tweak manual-update wording

2023-09-25 Thread Gerald Pfeifer
On Mon, 25 Sep 2023, Tobias Burnus wrote:
> The 'description' words looked a bit misplaced when reading the full 
> sentence. Likewise "the libnuma" - I changed that to simply "libnuma". 
> (Alternatives would be "the libnuma library" or "the numa library".)
> 
> Hence, I fixed my own wording :-)

Looks good (for the record).

Thanks,
Gerald


Re: [PATCH] RISC-V/testsuite: Fix ILP32 RVV failures from missing

2023-09-25 Thread Maciej W. Rozycki
On Mon, 25 Sep 2023, Maciej W. Rozycki wrote:

>  NB the use of this specific  header, still in place elsewhere, 
> seems gratuitous to me.  We don't need or indeed want to print anything in 
> the test cases (unless verifying something specific to the print facility) 
> and if we want to avoid minor code duplication (i.e. not to have explicit:
> 
>   if (...)
> __builtin_abort ();
> 
> replicated across test cases), we can easily implement this via a local 
> header, there's no need to pull in a complex system facility.

 Overall we ought not to require any system headers in compile tests and 
then link and run tests need a functional target environment anyway.  So 
maybe the use of  in run tests isn't as bad after all if not for 
the -DNDEBUG peculiarity.  However I still think the less we depend in 
verification on external components the better, that's one variable to 
exclude.

  Maciej


[Bug middle-end/94267] Missed folding of _MEM_REF

2023-09-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94267

--- Comment #4 from Andrew Pinski  ---
(In reply to Andrew Pinski from comment #3)
> Right now we depend on not doing the folding, PR 110702.

Well rather we depend on not folding *(_MEM_REF) ...

[Bug middle-end/94267] Missed folding of _MEM_REF

2023-09-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94267

Andrew Pinski  changed:

   What|Removed |Added

   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=110702

--- Comment #3 from Andrew Pinski  ---
Right now we depend on not doing the folding, PR 110702.

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Andrew Pinski
On Mon, Sep 25, 2023 at 1:04 PM Andrew Pinski  wrote:
>
> On Mon, Sep 25, 2023 at 12:59 PM Philipp Tomsich
>  wrote:
> >
> > On Mon, 25 Sept 2023 at 21:54, Andrew Pinski  wrote:
> > >
> > > On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
> > >  wrote:
> > > >
> > > > This patch implements the following TODO in 
> > > > gcc/config/aarch64/aarch64.cc
> > > > to provide the requested behaviour for handling ldp and stp:
> > > >
> > > >   /* Allow the tuning structure to disable LDP instruction formation
> > > >  from combining instructions (e.g., in peephole2).
> > > >  TODO: Implement fine-grained tuning control for LDP and STP:
> > > >1. control policies for load and store separately;
> > > >2. support the following policies:
> > > >   - default (use what is in the tuning structure)
> > > >   - always
> > > >   - never
> > > >   - aligned (only if the compiler can prove that the
> > > > load will be aligned to 2 * element_size)  */
> > > >
> > > > It provides two new and concrete target-specific command-line parameters
> > > > -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> > > > to give the ability to control load and store policies seperately as
> > > > stated in part 1 of the TODO.
> > > >
> > > > The accepted values for both parameters are:
> > > > - 0: Use the policy of the tuning structure (default).
> > > > - 1: Emit ldp/stp regardless of alignment.
> > > > - 2: Do not emit ldp/stp.
> > > > - 3: In order to emit ldp/stp, first check if the load/store will
> > > >   be aligned to 2 * element_size.
> > >
> > > Instead of a number, does it make sense to instead use an string
> > > (ENUM) for this param.
> > > Also I think using --param is a bad idea if it is going to be
> > > documented in the user manual.
> > > Maybe a -m option should be used instead.
> >
> > See https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631283.html
> > for the discussion triggering the change from -m... to --param and the
> > change to using a number instead of a string.
>
> That is the opposite of the current GCC practice across all targets.
> Things like this should be consistent and if one target decides to do
> it different, then maybe it should NOT.
> Anyways we should document the correct coding style for options so we
> don't have these back and forths again.

Kyrylo:
>  It will have to take a number rather than a string but that should be okay, 
> as long as the right values are documented in invoke.texi.

No it does not need to be a number. --param=ranger-debug= does not
take a number, it takes an enum .
One of the benefits of moving --param support over to .opt to allow
more than just numbers even.

Thanks,
Andrew


>
>
> Thanks,
> Andrew
>
> >
> > Thanks,
> > Philipp.
> >
> > >
> > > Thanks,
> > > Andrew
> > >
> > > >
> > > > gcc/ChangeLog:
> > > > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> > > > appropriate enums for the policies.
> > > > * config/aarch64/aarch64-tuning-flags.def
> > > > (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> > > > options.
> > > > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> > > > function to parse ldp-policy parameter.
> > > > (aarch64_parse_stp_policy): New function to parse stp-policy 
> > > > parameter.
> > > > (aarch64_override_options_internal): Call parsing functions.
> > > > (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> > > > alignment check and remove superseded ones.
> > > > (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value 
> > > > check and
> > > > alignment check and remove superseded ones.
> > > > * config/aarch64/aarch64.opt: Add options.
> > > > * doc/invoke.texi: Document the parameters accordingly.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > > * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> > > > * gcc.target/aarch64/ldp_aligned.c: New test.
> > > > * gcc.target/aarch64/ldp_always.c: New test.
> > > > * gcc.target/aarch64/ldp_never.c: New test.
> > > > * gcc.target/aarch64/stp_aligned.c: New test.
> > > > * gcc.target/aarch64/stp_always.c: New test.
> > > > * gcc.target/aarch64/stp_never.c: New test.
> > > >
> > > > Signed-off-by: Manos Anagnostakis 
> > > > ---
> > > > Changes in v3:
> > > > - Changed command-line options to target-specific parameters
> > > >   and documented them accordingly in doc/invoke.texi.
> > > > - Removed ampere1-no_ldp_combine.c test as superseded.
> > > >
> > > >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> > > >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> > > >  gcc/config/aarch64/aarch64.cc | 215 +-
> > > >  gcc/config/aarch64/aarch64.opt|   8 +
> > > >  

Re: [PATCH] AArch64: Fix __sync_val_compare_and_swap [PR111404]

2023-09-25 Thread Ramana Radhakrishnan
On Wed, Sep 13, 2023 at 3:55 PM Wilco Dijkstra via Gcc-patches
 wrote:
>
>
> __sync_val_compare_and_swap may be used on 128-bit types and either calls the
> outline atomic code or uses an inline loop.  On AArch64 LDXP is only atomic if
> the value is stored successfully using STXP, but the current implementations
> do not perform the store if the comparison fails.  In this case the value 
> returned
> is not read atomically.

IIRC, the previous discussions in this space revolved around the
difficulty with the store writing to readonly memory which is why I
think we went with LDXP in this form.
Has something changed from then ?

Reviewed-by : Ramana Radhakrishnan  

regards
Ramana




>
> Passes regress/bootstrap, OK for commit?
>
> gcc/ChangeLog/
> PR target/111404
> * config/aarch64/aarch64.cc (aarch64_split_compare_and_swap):
> For 128-bit store the loaded value and loop if needed.
>
> libgcc/ChangeLog/
> PR target/111404
> * config/aarch64/lse.S (__aarch64_cas16_acq_rel): Execute STLXP using
> either new value or loaded value.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> 5e8d0a0c91bc7719de2a8c5627b354cf905a4db0..c44c0b979d0cc3755c61dcf566cfddedccebf1ea
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -23413,11 +23413,11 @@ aarch64_split_compare_and_swap (rtx operands[])
>mem = operands[1];
>oldval = operands[2];
>newval = operands[3];
> -  is_weak = (operands[4] != const0_rtx);
>model_rtx = operands[5];
>scratch = operands[7];
>mode = GET_MODE (mem);
>model = memmodel_from_int (INTVAL (model_rtx));
> +  is_weak = operands[4] != const0_rtx && mode != TImode;
>
>/* When OLDVAL is zero and we want the strong version we can emit a tighter
>  loop:
> @@ -23478,6 +23478,33 @@ aarch64_split_compare_and_swap (rtx operands[])
>else
>  aarch64_gen_compare_reg (NE, scratch, const0_rtx);
>
> +  /* 128-bit LDAXP is not atomic unless STLXP succeeds.  So for a mismatch,
> + store the returned value and loop if the STLXP fails.  */
> +  if (mode == TImode)
> +{
> +  rtx_code_label *label3 = gen_label_rtx ();
> +  emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (Pmode, 
> label3)));
> +  emit_barrier ();
> +
> +  emit_label (label2);
> +  aarch64_emit_store_exclusive (mode, scratch, mem, rval, model_rtx);
> +
> +  if (aarch64_track_speculation)
> +   {
> + /* Emit an explicit compare instruction, so that we can correctly
> +track the condition codes.  */
> + rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
> + x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
> +   }
> +  else
> +   x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +   gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
> +  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +
> +  label2 = label3;
> +}
> +
>emit_label (label2);
>
>/* If we used a CBNZ in the exchange loop emit an explicit compare with 
> RVAL
> diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
> index 
> dde3a28e07b13669533dfc5e8fac0a9a6ac33dbd..ba05047ff02b6fc5752235bffa924fc4a2f48c04
>  100644
> --- a/libgcc/config/aarch64/lse.S
> +++ b/libgcc/config/aarch64/lse.S
> @@ -160,6 +160,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
>  #define tmp0   16
>  #define tmp1   17
>  #define tmp2   15
> +#define tmp3   14
> +#define tmp4   13
>
>  #define BTI_C  hint34
>
> @@ -233,10 +235,11 @@ STARTFN   NAME(cas)
>  0: LDXPx0, x1, [x4]
> cmp x0, x(tmp0)
> ccmpx1, x(tmp1), #0, eq
> -   bne 1f
> -   STXPw(tmp2), x2, x3, [x4]
> -   cbnzw(tmp2), 0b
> -1: BARRIER
> +   cselx(tmp2), x2, x0, eq
> +   cselx(tmp3), x3, x1, eq
> +   STXPw(tmp4), x(tmp2), x(tmp3), [x4]
> +   cbnzw(tmp4), 0b
> +   BARRIER
> ret
>
>  #endif
>


Re: [PATCH] RISC-V/testsuite: Fix ILP32 RVV failures from missing

2023-09-25 Thread Maciej W. Rozycki
On Sun, 24 Sep 2023, Vineet Gupta wrote:

> This fix is great but is there a more general solution to the problem when we
> toolchain is built for say just rv64 (and thus only those headers) vs. test
> building for say rv32 (and failing to build due to lack of headers) or
> vice-versa.

 The MIPS port has logic in its target test script for combining test 
options and excluding ones that are mutually incompatible due to ABI or 
ISA restrictions.  It wasn't written by me and I have only minimally 
tweaked it (and then many years ago), so I can't remember all the details 
offhand.  See the top comment in gcc/testsuite/gcc.target/mips/mips.exp 
for further information including usage.

 I guess it would make sense to pinch that logic for our port, especially 
given our growing number of machine options.  I think it was mentioned at 
one of the patch review calls (Jeff?).

 NB the use of this specific  header, still in place elsewhere, 
seems gratuitous to me.  We don't need or indeed want to print anything in 
the test cases (unless verifying something specific to the print facility) 
and if we want to avoid minor code duplication (i.e. not to have explicit:

  if (...)
__builtin_abort ();

replicated across test cases), we can easily implement this via a local 
header, there's no need to pull in a complex system facility.

 Also I find the use of this facility questionable in the first place: do 
we want these test cases to pass even in the case of an issue if run with 
-DNDEBUG as a target board option (which would cause some tests to be 
optimised away in their entriety)?

  Maciej


[PATCH 1/2] c++: remove NON_DEPENDENT_EXPR, part 1

2023-09-25 Thread Patrick Palka
Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
for trunk?

-- >8 --

This tree code dates all the way back to r69130[1] which implemented
typing of non-dependent expressions.  Its motivation was never clear (to
me at least) since the documentation for it in e.g. cp-tree.def doesn't
seem accurate anymore.  build_non_dependent_expr has since gained
a bunch of edge cases about whether (or how) to wrap certain templated
trees, making it hard to reason about in general.

So this patch removes this tree code, and temporarily turns
build_non_dependent_expr into the identity function.  The subsequent
patch will remove build_non_dependent_expr and adjust its callers
appropriately.

We now need to gracefully handle templated (sub)trees in a couple of
places, places which previously didn't see templated trees since they
didn't look through NON_DEPENDENT_EXPR.

[1]: https://gcc.gnu.org/pipermail/gcc-patches/2003-July/109355.html

gcc/c-family/ChangeLog:

* c-warn.cc (check_address_or_pointer_of_packed_member): Handle
templated CALL_EXPR naming a local extern function.

gcc/cp/ChangeLog:

* class.cc (instantiate_type): Remove NON_DEPENDENT_EXPR
handling.
* constexpr.cc (cxx_eval_constant_expression): Likewise.
(potential_constant_expression_1): Likewise.
* coroutines.cc (coro_validate_builtin_call): Don't
expect ALIGNOF_EXPR to be wrapped in NON_DEPENDENT_EXPR.
* cp-objcp-common.cc (cp_common_init_ts): Remove
NON_DEPENDENT_EXPR handling.
* cp-tree.def (NON_DEPENDENT_EXPR): Remove.
* cp-tree.h (build_non_dependent_expr): Temporarily redefine as
the identity function.
* cvt.cc (maybe_warn_nodiscard): Handle templated CALL_EXPR
naming a local extern function.
* cxx-pretty-print.cc (cxx_pretty_printer::expression): Remove
NON_DEPENDENT_EXPR handling.
* error.cc (dump_decl): Likewise.
(dump_expr): Likewise.
* expr.cc (mark_use): Likewise.
(mark_exp_read): Likewise.
* pt.cc (build_non_dependent_expr): Remove.
* tree.cc (lvalue_kind): Remove NON_DEPENDENT_EXPR handling.
(cp_stabilize_reference): Likewise.
* typeck.cc (warn_for_null_address): Likewise.
(cp_build_binary_op): Handle type-dependent SIZEOF_EXPR operands.
(cp_build_unary_op) : Don't fold inside a
template.

gcc/testsuite/ChangeLog:

* g++.dg/concepts/var-concept3.C: Adjust expected diagnostic
for attempting to call a variable concept.
---
 gcc/c-family/c-warn.cc   |  2 +-
 gcc/cp/class.cc  |  9 --
 gcc/cp/constexpr.cc  |  9 --
 gcc/cp/coroutines.cc |  3 +-
 gcc/cp/cp-objcp-common.cc|  1 -
 gcc/cp/cp-tree.def   | 11 ---
 gcc/cp/cp-tree.h |  2 +-
 gcc/cp/cvt.cc|  4 +-
 gcc/cp/cxx-pretty-print.cc   |  1 -
 gcc/cp/error.cc  |  8 --
 gcc/cp/expr.cc   |  2 -
 gcc/cp/pt.cc | 92 
 gcc/cp/tree.cc   |  5 --
 gcc/cp/typeck.cc | 13 +--
 gcc/testsuite/g++.dg/concepts/var-concept3.C |  2 +-
 15 files changed, 15 insertions(+), 149 deletions(-)

diff --git a/gcc/c-family/c-warn.cc b/gcc/c-family/c-warn.cc
index e67dd87a773..c07770394bf 100644
--- a/gcc/c-family/c-warn.cc
+++ b/gcc/c-family/c-warn.cc
@@ -3029,7 +3029,7 @@ check_address_or_pointer_of_packed_member (tree type, 
tree rhs)
   if (TREE_CODE (rhs) == CALL_EXPR)
{
  rhs = CALL_EXPR_FN (rhs); /* Pointer expression.  */
- if (rhs == NULL_TREE)
+ if (rhs == NULL_TREE || TREE_CODE (rhs) == IDENTIFIER_NODE)
return NULL_TREE;
  rhs = TREE_TYPE (rhs);/* Pointer type.  */
  /* We could be called while processing a template and RHS could be
diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index b71333af1f8..10de0437242 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -8843,15 +8843,6 @@ instantiate_type (tree lhstype, tree rhs, tsubst_flags_t 
complain)
   rhs = BASELINK_FUNCTIONS (rhs);
 }
 
-  /* If we are in a template, and have a NON_DEPENDENT_EXPR, we cannot
- deduce any type information.  */
-  if (TREE_CODE (rhs) == NON_DEPENDENT_EXPR)
-{
-  if (complain & tf_error)
-   error ("not enough type information");
-  return error_mark_node;
-}
-
   /* There are only a few kinds of expressions that may have a type
  dependent on overload resolution.  */
   gcc_assert (TREE_CODE (rhs) == ADDR_EXPR
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 2a6601c0cbc..8c9abeeec1b 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8054,7 +8054,6 @@ 

[PATCH 2/2] c++: remove NON_DEPENDENT_EXPR, part 2

2023-09-25 Thread Patrick Palka
This much more mechanical patch removes build_non_dependent_expr
(and make_args_non_dependent) and adjusts callers accordingly,
no functional change.

gcc/cp/ChangeLog:

* call.cc (build_new_method_call): Remove calls to
build_non_dependent_expr and/or make_args_non_dependent.
* coroutines.cc (finish_co_return_stmt): Likewise.
* cp-tree.h (build_non_dependent_expr): Remove.
(make_args_non_dependent): Remove.
* decl2.cc (grok_array_decl): Remove calls to
build_non_dependent_expr and/or make_args_non_dependent.
(build_offset_ref_call_from_tree): Likewise.
* init.cc (build_new): Likewise.
* pt.cc (make_args_non_dependent): Remove.
(test_build_non_dependent_expr): Remove.
(cp_pt_cc_tests): Adjust.
* semantics.cc (finish_expr_stmt): Remove calls to
build_non_dependent_expr and/or make_args_non_dependent.
(finish_for_expr): Likewise.
(finish_call_expr): Likewise.
(finish_omp_atomic): Likewise.
* typeck.cc (finish_class_member_access_expr): Likewise.
(build_x_indirect_ref): Likewise.
(build_x_binary_op): Likewise.
(build_x_array_ref): Likewise.
(build_x_vec_perm_expr): Likewise.
(build_x_shufflevector): Likewise.
(build_x_unary_op): Likewise.
(cp_build_addressof): Likewise.
(build_x_conditional_expr):
(build_x_compound_expr): Likewise.
(build_static_cast): Likewise.
(build_x_modify_expr): Likewise.
(check_return_expr): Likewise.
* typeck2.cc (build_x_arrow): Likewise.
---
 gcc/cp/call.cc   |  7 +--
 gcc/cp/coroutines.cc |  3 ---
 gcc/cp/cp-tree.h |  2 --
 gcc/cp/decl2.cc  | 17 +++-
 gcc/cp/init.cc   |  5 -
 gcc/cp/pt.cc | 46 
 gcc/cp/semantics.cc  | 25 ++--
 gcc/cp/typeck.cc | 31 -
 gcc/cp/typeck2.cc|  1 -
 9 files changed, 6 insertions(+), 131 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index e8dafbd8ba6..15079ddf6dc 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -11430,12 +11430,7 @@ build_new_method_call (tree instance, tree fns, 
vec **args,
 }
 
   if (processing_template_decl)
-{
-  orig_args = args == NULL ? NULL : make_tree_vector_copy (*args);
-  instance = build_non_dependent_expr (instance);
-  if (args != NULL)
-   make_args_non_dependent (*args);
-}
+orig_args = args == NULL ? NULL : make_tree_vector_copy (*args);
 
   /* Process the argument list.  */
   if (args != NULL && *args != NULL)
diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index df3cc820797..a5464becf7f 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -1351,9 +1351,6 @@ finish_co_return_stmt (location_t kw, tree expr)
 to undo it so we can try to treat it as an rvalue below.  */
   expr = maybe_undo_parenthesized_ref (expr);
 
-  if (processing_template_decl)
-   expr = build_non_dependent_expr (expr);
-
   if (error_operand_p (expr))
return error_mark_node;
 }
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 66b9a9c4b9a..8b9a7d58462 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7488,8 +7488,6 @@ extern bool any_value_dependent_elements_p  
(const_tree);
 extern bool dependent_omp_for_p(tree, tree, tree, 
tree);
 extern tree resolve_typename_type  (tree, bool);
 extern tree template_for_substitution  (tree);
-inline tree build_non_dependent_expr   (tree t) { return t; } // XXX 
remove
-extern void make_args_non_dependent(vec *);
 extern bool reregister_specialization  (tree, tree, tree);
 extern tree instantiate_non_dependent_expr (tree, tsubst_flags_t = 
tf_error);
 extern tree instantiate_non_dependent_expr_internal (tree, tsubst_flags_t);
diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 344e19ec98b..0aa1e355972 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -427,14 +427,8 @@ grok_array_decl (location_t loc, tree array_expr, tree 
index_exp,
  return build_min_nt_loc (loc, ARRAY_REF, array_expr, index_exp,
   NULL_TREE, NULL_TREE);
}
-  array_expr = build_non_dependent_expr (array_expr);
-  if (index_exp)
-   index_exp = build_non_dependent_expr (index_exp);
-  else
-   {
- orig_index_exp_list = make_tree_vector_copy (*index_exp_list);
- make_args_non_dependent (*index_exp_list);
-   }
+  if (!index_exp)
+   orig_index_exp_list = make_tree_vector_copy (*index_exp_list);
 }
 
   type = TREE_TYPE (array_expr);
@@ -5435,18 +5429,13 @@ build_offset_ref_call_from_tree (tree fn, vec **args,
   orig_args = make_tree_vector_copy (*args);
 
   /* Transform the arguments and add the implicit "this"
-parameter.  That must be done 

[pushed] [PR111497][LRA]: Copy substituted equivalence

2023-09-25 Thread Vladimir Makarov

The following patch solves

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111497

The patch was successfully tested and bootstrapped on x86-64 and aarch64.
commit 3c23defed384cf17518ad6c817d94463a445d21b
Author: Vladimir N. Makarov 
Date:   Mon Sep 25 16:19:50 2023 -0400

[PR111497][LRA]: Copy substituted equivalence

When we substitute the equivalence and it becomes shared, we can fail
to correctly update reg info used by LRA.  This can result in wrong
code generation, e.g. because of incorrect live analysis.  It can also
result in compiler crash as the pseudo survives RA.  This is what
exactly happened for the PR.  This patch solves this problem by
unsharing substituted equivalences.

gcc/ChangeLog:

PR middle-end/111497
* lra-constraints.cc (lra_constraints): Copy substituted
equivalence.
* lra.cc (lra): Change comment for calling unshare_all_rtl_again.

gcc/testsuite/ChangeLog:

PR middle-end/111497
* g++.target/i386/pr111497.C: new test.

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 3aaa4906999..76a1393ab23 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -5424,6 +5424,11 @@ lra_constraints (bool first_p)
 	   loc_equivalence_callback, curr_insn);
 	  if (old != *curr_id->operand_loc[0])
 		{
+		  /* If we substitute pseudo by shared equivalence, we can fail
+		 to update LRA reg info and this can result in many
+		 unexpected consequences.  So keep rtl unshared:  */
+		  *curr_id->operand_loc[0]
+		= copy_rtx (*curr_id->operand_loc[0]);
 		  lra_update_insn_regno_info (curr_insn);
 		  changed_p = true;
 		}
diff --git a/gcc/lra.cc b/gcc/lra.cc
index 563aff10b96..361f84fdacb 100644
--- a/gcc/lra.cc
+++ b/gcc/lra.cc
@@ -2579,9 +2579,8 @@ lra (FILE *f)
   if (inserted_p)
 commit_edge_insertions ();
 
-  /* Replacing pseudos with their memory equivalents might have
- created shared rtx.  Subsequent passes would get confused
- by this, so unshare everything here.  */
+  /* Subsequent passes expect that rtl is unshared, so unshare everything
+ here.  */
   unshare_all_rtl_again (get_insns ());
 
   if (flag_checking)
diff --git a/gcc/testsuite/g++.target/i386/pr111497.C b/gcc/testsuite/g++.target/i386/pr111497.C
new file mode 100644
index 000..a645bb95907
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr111497.C
@@ -0,0 +1,22 @@
+// { dg-do compile { target ia32 } }
+// { dg-options "-march=i686 -mtune=generic -fPIC -O2 -g" }
+
+class A;
+struct B { const char *b1; int b2; };
+struct C : B { C (const char *x, int y) { b1 = x; b2 = y; } };
+struct D : C { D (B x) : C (x.b1, x.b2) {} };
+struct E { E (A *); };
+struct F : E { D f1, f2, f3, f4, f5, f6; F (A *, const B &, const B &, const B &); };
+struct G : F { G (A *, const B &, const B &, const B &); };
+struct H { int h; };
+struct I { H i; };
+struct J { I *j; };
+struct A : J {};
+inline F::F (A *x, const B , const B , const B )
+  : E(x), f1(y), f2(z), f3(w), f4(y), f5(z), f6(w) {}
+G::G (A *x, const B , const B , const B ) : F(x, y, z, w)
+{
+  H *h = >j->i;
+  if (h)
+h->h++;
+}


[Bug middle-end/111497] [11/12/13/14 Regression] ICE building mariadb on i686 since r8-470

2023-09-25 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111497

--- Comment #5 from CVS Commits  ---
The master branch has been updated by Vladimir Makarov :

https://gcc.gnu.org/g:3c23defed384cf17518ad6c817d94463a445d21b

commit r14-4256-g3c23defed384cf17518ad6c817d94463a445d21b
Author: Vladimir N. Makarov 
Date:   Mon Sep 25 16:19:50 2023 -0400

[PR111497][LRA]: Copy substituted equivalence

When we substitute the equivalence and it becomes shared, we can fail
to correctly update reg info used by LRA.  This can result in wrong
code generation, e.g. because of incorrect live analysis.  It can also
result in compiler crash as the pseudo survives RA.  This is what
exactly happened for the PR.  This patch solves this problem by
unsharing substituted equivalences.

gcc/ChangeLog:

PR middle-end/111497
* lra-constraints.cc (lra_constraints): Copy substituted
equivalence.
* lra.cc (lra): Change comment for calling unshare_all_rtl_again.

gcc/testsuite/ChangeLog:

PR middle-end/111497
* g++.target/i386/pr111497.C: new test.

[Bug libstdc++/111588] Provide opt-out of shared_ptr single-threaded optimization

2023-09-25 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111588

--- Comment #2 from Jonathan Wakely  ---
This needs numbers, not opinions.

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Andrew Pinski
On Mon, Sep 25, 2023 at 12:59 PM Philipp Tomsich
 wrote:
>
> On Mon, 25 Sept 2023 at 21:54, Andrew Pinski  wrote:
> >
> > On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
> >  wrote:
> > >
> > > This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> > > to provide the requested behaviour for handling ldp and stp:
> > >
> > >   /* Allow the tuning structure to disable LDP instruction formation
> > >  from combining instructions (e.g., in peephole2).
> > >  TODO: Implement fine-grained tuning control for LDP and STP:
> > >1. control policies for load and store separately;
> > >2. support the following policies:
> > >   - default (use what is in the tuning structure)
> > >   - always
> > >   - never
> > >   - aligned (only if the compiler can prove that the
> > > load will be aligned to 2 * element_size)  */
> > >
> > > It provides two new and concrete target-specific command-line parameters
> > > -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> > > to give the ability to control load and store policies seperately as
> > > stated in part 1 of the TODO.
> > >
> > > The accepted values for both parameters are:
> > > - 0: Use the policy of the tuning structure (default).
> > > - 1: Emit ldp/stp regardless of alignment.
> > > - 2: Do not emit ldp/stp.
> > > - 3: In order to emit ldp/stp, first check if the load/store will
> > >   be aligned to 2 * element_size.
> >
> > Instead of a number, does it make sense to instead use an string
> > (ENUM) for this param.
> > Also I think using --param is a bad idea if it is going to be
> > documented in the user manual.
> > Maybe a -m option should be used instead.
>
> See https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631283.html
> for the discussion triggering the change from -m... to --param and the
> change to using a number instead of a string.

That is the opposite of the current GCC practice across all targets.
Things like this should be consistent and if one target decides to do
it different, then maybe it should NOT.
Anyways we should document the correct coding style for options so we
don't have these back and forths again.


Thanks,
Andrew

>
> Thanks,
> Philipp.
>
> >
> > Thanks,
> > Andrew
> >
> > >
> > > gcc/ChangeLog:
> > > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> > > appropriate enums for the policies.
> > > * config/aarch64/aarch64-tuning-flags.def
> > > (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> > > options.
> > > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> > > function to parse ldp-policy parameter.
> > > (aarch64_parse_stp_policy): New function to parse stp-policy 
> > > parameter.
> > > (aarch64_override_options_internal): Call parsing functions.
> > > (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> > > alignment check and remove superseded ones.
> > > (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value 
> > > check and
> > > alignment check and remove superseded ones.
> > > * config/aarch64/aarch64.opt: Add options.
> > > * doc/invoke.texi: Document the parameters accordingly.
> > >
> > > gcc/testsuite/ChangeLog:
> > > * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> > > * gcc.target/aarch64/ldp_aligned.c: New test.
> > > * gcc.target/aarch64/ldp_always.c: New test.
> > > * gcc.target/aarch64/ldp_never.c: New test.
> > > * gcc.target/aarch64/stp_aligned.c: New test.
> > > * gcc.target/aarch64/stp_always.c: New test.
> > > * gcc.target/aarch64/stp_never.c: New test.
> > >
> > > Signed-off-by: Manos Anagnostakis 
> > > ---
> > > Changes in v3:
> > > - Changed command-line options to target-specific parameters
> > >   and documented them accordingly in doc/invoke.texi.
> > > - Removed ampere1-no_ldp_combine.c test as superseded.
> > >
> > >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> > >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> > >  gcc/config/aarch64/aarch64.cc | 215 +-
> > >  gcc/config/aarch64/aarch64.opt|   8 +
> > >  gcc/doc/invoke.texi   |  30 +++
> > >  .../aarch64/ampere1-no_ldp_combine.c  |  11 -
> > >  .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
> > >  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
> > >  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
> > >  .../gcc.target/aarch64/stp_aligned.c  |  60 +
> > >  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
> > >  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
> > >  12 files changed, 600 insertions(+), 74 deletions(-)
> > >  delete mode 100644 
> > > 

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Manos Anagnostakis
Hello Andrew,

what you describe was my previous version, but @Kyrylo Tkachov
 prompted me to use -param.

Thank you for taking a look anyway!

Manos Anagnostakis | Compiler Engineer
| E: manos.anagnosta...@vrull.eu

VRULL GmbH | Beatrixgasse 32 1030 Vienna | W: www.vrull.eu

Στις Δευ 25 Σεπ 2023, 22:54 ο χρήστης Andrew Pinski 
έγραψε:

> On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
>  wrote:
> >
> > This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> > to provide the requested behaviour for handling ldp and stp:
> >
> >   /* Allow the tuning structure to disable LDP instruction formation
> >  from combining instructions (e.g., in peephole2).
> >  TODO: Implement fine-grained tuning control for LDP and STP:
> >1. control policies for load and store separately;
> >2. support the following policies:
> >   - default (use what is in the tuning structure)
> >   - always
> >   - never
> >   - aligned (only if the compiler can prove that the
> > load will be aligned to 2 * element_size)  */
> >
> > It provides two new and concrete target-specific command-line parameters
> > -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> > to give the ability to control load and store policies seperately as
> > stated in part 1 of the TODO.
> >
> > The accepted values for both parameters are:
> > - 0: Use the policy of the tuning structure (default).
> > - 1: Emit ldp/stp regardless of alignment.
> > - 2: Do not emit ldp/stp.
> > - 3: In order to emit ldp/stp, first check if the load/store will
> >   be aligned to 2 * element_size.
>
> Instead of a number, does it make sense to instead use an string
> (ENUM) for this param.
> Also I think using --param is a bad idea if it is going to be
> documented in the user manual.
> Maybe a -m option should be used instead.
>
> Thanks,
> Andrew
>
> >
> > gcc/ChangeLog:
> > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> > appropriate enums for the policies.
> > * config/aarch64/aarch64-tuning-flags.def
> > (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> > options.
> > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> > function to parse ldp-policy parameter.
> > (aarch64_parse_stp_policy): New function to parse stp-policy
> parameter.
> > (aarch64_override_options_internal): Call parsing functions.
> > (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> > alignment check and remove superseded ones.
> > (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value
> check and
> > alignment check and remove superseded ones.
> > * config/aarch64/aarch64.opt: Add options.
> > * doc/invoke.texi: Document the parameters accordingly.
> >
> > gcc/testsuite/ChangeLog:
> > * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> > * gcc.target/aarch64/ldp_aligned.c: New test.
> > * gcc.target/aarch64/ldp_always.c: New test.
> > * gcc.target/aarch64/ldp_never.c: New test.
> > * gcc.target/aarch64/stp_aligned.c: New test.
> > * gcc.target/aarch64/stp_always.c: New test.
> > * gcc.target/aarch64/stp_never.c: New test.
> >
> > Signed-off-by: Manos Anagnostakis 
> > ---
> > Changes in v3:
> > - Changed command-line options to target-specific parameters
> >   and documented them accordingly in doc/invoke.texi.
> > - Removed ampere1-no_ldp_combine.c test as superseded.
> >
> >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> >  gcc/config/aarch64/aarch64.cc | 215 +-
> >  gcc/config/aarch64/aarch64.opt|   8 +
> >  gcc/doc/invoke.texi   |  30 +++
> >  .../aarch64/ampere1-no_ldp_combine.c  |  11 -
> >  .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
> >  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
> >  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
> >  .../gcc.target/aarch64/stp_aligned.c  |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
> >  12 files changed, 600 insertions(+), 74 deletions(-)
> >  delete mode 100644
> gcc/testsuite/gcc.target/aarch64/ampere1-no_ldp_combine.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-protos.h
> 

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Philipp Tomsich
On Mon, 25 Sept 2023 at 21:54, Andrew Pinski  wrote:
>
> On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
>  wrote:
> >
> > This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> > to provide the requested behaviour for handling ldp and stp:
> >
> >   /* Allow the tuning structure to disable LDP instruction formation
> >  from combining instructions (e.g., in peephole2).
> >  TODO: Implement fine-grained tuning control for LDP and STP:
> >1. control policies for load and store separately;
> >2. support the following policies:
> >   - default (use what is in the tuning structure)
> >   - always
> >   - never
> >   - aligned (only if the compiler can prove that the
> > load will be aligned to 2 * element_size)  */
> >
> > It provides two new and concrete target-specific command-line parameters
> > -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> > to give the ability to control load and store policies seperately as
> > stated in part 1 of the TODO.
> >
> > The accepted values for both parameters are:
> > - 0: Use the policy of the tuning structure (default).
> > - 1: Emit ldp/stp regardless of alignment.
> > - 2: Do not emit ldp/stp.
> > - 3: In order to emit ldp/stp, first check if the load/store will
> >   be aligned to 2 * element_size.
>
> Instead of a number, does it make sense to instead use an string
> (ENUM) for this param.
> Also I think using --param is a bad idea if it is going to be
> documented in the user manual.
> Maybe a -m option should be used instead.

See https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631283.html
for the discussion triggering the change from -m... to --param and the
change to using a number instead of a string.

Thanks,
Philipp.

>
> Thanks,
> Andrew
>
> >
> > gcc/ChangeLog:
> > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> > appropriate enums for the policies.
> > * config/aarch64/aarch64-tuning-flags.def
> > (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> > options.
> > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> > function to parse ldp-policy parameter.
> > (aarch64_parse_stp_policy): New function to parse stp-policy 
> > parameter.
> > (aarch64_override_options_internal): Call parsing functions.
> > (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> > alignment check and remove superseded ones.
> > (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value check 
> > and
> > alignment check and remove superseded ones.
> > * config/aarch64/aarch64.opt: Add options.
> > * doc/invoke.texi: Document the parameters accordingly.
> >
> > gcc/testsuite/ChangeLog:
> > * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> > * gcc.target/aarch64/ldp_aligned.c: New test.
> > * gcc.target/aarch64/ldp_always.c: New test.
> > * gcc.target/aarch64/ldp_never.c: New test.
> > * gcc.target/aarch64/stp_aligned.c: New test.
> > * gcc.target/aarch64/stp_always.c: New test.
> > * gcc.target/aarch64/stp_never.c: New test.
> >
> > Signed-off-by: Manos Anagnostakis 
> > ---
> > Changes in v3:
> > - Changed command-line options to target-specific parameters
> >   and documented them accordingly in doc/invoke.texi.
> > - Removed ampere1-no_ldp_combine.c test as superseded.
> >
> >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> >  gcc/config/aarch64/aarch64.cc | 215 +-
> >  gcc/config/aarch64/aarch64.opt|   8 +
> >  gcc/doc/invoke.texi   |  30 +++
> >  .../aarch64/ampere1-no_ldp_combine.c  |  11 -
> >  .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
> >  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
> >  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
> >  .../gcc.target/aarch64/stp_aligned.c  |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
> >  12 files changed, 600 insertions(+), 74 deletions(-)
> >  delete mode 100644 
> > gcc/testsuite/gcc.target/aarch64/ampere1-no_ldp_combine.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-protos.h 
> > b/gcc/config/aarch64/aarch64-protos.h
> > index 

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Andrew Pinski
On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
 wrote:
>
> This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> to provide the requested behaviour for handling ldp and stp:
>
>   /* Allow the tuning structure to disable LDP instruction formation
>  from combining instructions (e.g., in peephole2).
>  TODO: Implement fine-grained tuning control for LDP and STP:
>1. control policies for load and store separately;
>2. support the following policies:
>   - default (use what is in the tuning structure)
>   - always
>   - never
>   - aligned (only if the compiler can prove that the
> load will be aligned to 2 * element_size)  */
>
> It provides two new and concrete target-specific command-line parameters
> -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> to give the ability to control load and store policies seperately as
> stated in part 1 of the TODO.
>
> The accepted values for both parameters are:
> - 0: Use the policy of the tuning structure (default).
> - 1: Emit ldp/stp regardless of alignment.
> - 2: Do not emit ldp/stp.
> - 3: In order to emit ldp/stp, first check if the load/store will
>   be aligned to 2 * element_size.

Instead of a number, does it make sense to instead use an string
(ENUM) for this param.
Also I think using --param is a bad idea if it is going to be
documented in the user manual.
Maybe a -m option should be used instead.

Thanks,
Andrew

>
> gcc/ChangeLog:
> * config/aarch64/aarch64-protos.h (struct tune_params): Add
> appropriate enums for the policies.
> * config/aarch64/aarch64-tuning-flags.def
> (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> options.
> * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> function to parse ldp-policy parameter.
> (aarch64_parse_stp_policy): New function to parse stp-policy 
> parameter.
> (aarch64_override_options_internal): Call parsing functions.
> (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> alignment check and remove superseded ones.
> (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value check and
> alignment check and remove superseded ones.
> * config/aarch64/aarch64.opt: Add options.
> * doc/invoke.texi: Document the parameters accordingly.
>
> gcc/testsuite/ChangeLog:
> * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> * gcc.target/aarch64/ldp_aligned.c: New test.
> * gcc.target/aarch64/ldp_always.c: New test.
> * gcc.target/aarch64/ldp_never.c: New test.
> * gcc.target/aarch64/stp_aligned.c: New test.
> * gcc.target/aarch64/stp_always.c: New test.
> * gcc.target/aarch64/stp_never.c: New test.
>
> Signed-off-by: Manos Anagnostakis 
> ---
> Changes in v3:
> - Changed command-line options to target-specific parameters
>   and documented them accordingly in doc/invoke.texi.
> - Removed ampere1-no_ldp_combine.c test as superseded.
>
>  gcc/config/aarch64/aarch64-protos.h   |  24 ++
>  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
>  gcc/config/aarch64/aarch64.cc | 215 +-
>  gcc/config/aarch64/aarch64.opt|   8 +
>  gcc/doc/invoke.texi   |  30 +++
>  .../aarch64/ampere1-no_ldp_combine.c  |  11 -
>  .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
>  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
>  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
>  .../gcc.target/aarch64/stp_aligned.c  |  60 +
>  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
>  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
>  12 files changed, 600 insertions(+), 74 deletions(-)
>  delete mode 100644 gcc/testsuite/gcc.target/aarch64/ampere1-no_ldp_combine.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 70303d6fd95..be1d73490ed 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -568,6 +568,30 @@ struct tune_params
>/* Place prefetch struct pointer at the end to enable type checking
>   errors when tune_params misses elements (e.g., from erroneous merges).  
> */
>const struct cpu_prefetch_tune *prefetch;
> +/* An enum specifying how to handle load pairs using a fine-grained policy:
> +   - LDP_POLICY_ALIGNED: Emit ldp 

[PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Manos Anagnostakis
This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
to provide the requested behaviour for handling ldp and stp:

  /* Allow the tuning structure to disable LDP instruction formation
 from combining instructions (e.g., in peephole2).
 TODO: Implement fine-grained tuning control for LDP and STP:
   1. control policies for load and store separately;
   2. support the following policies:
  - default (use what is in the tuning structure)
  - always
  - never
  - aligned (only if the compiler can prove that the
load will be aligned to 2 * element_size)  */

It provides two new and concrete target-specific command-line parameters
-param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
to give the ability to control load and store policies seperately as
stated in part 1 of the TODO.

The accepted values for both parameters are:
- 0: Use the policy of the tuning structure (default).
- 1: Emit ldp/stp regardless of alignment.
- 2: Do not emit ldp/stp.
- 3: In order to emit ldp/stp, first check if the load/store will
  be aligned to 2 * element_size.

gcc/ChangeLog:
* config/aarch64/aarch64-protos.h (struct tune_params): Add
appropriate enums for the policies.
* config/aarch64/aarch64-tuning-flags.def
(AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
options.
* config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
function to parse ldp-policy parameter.
(aarch64_parse_stp_policy): New function to parse stp-policy parameter.
(aarch64_override_options_internal): Call parsing functions.
(aarch64_operands_ok_for_ldpstp): Add parameter-value check and
alignment check and remove superseded ones.
(aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value check and
alignment check and remove superseded ones.
* config/aarch64/aarch64.opt: Add options.
* doc/invoke.texi: Document the parameters accordingly.

gcc/testsuite/ChangeLog:
* gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
* gcc.target/aarch64/ldp_aligned.c: New test.
* gcc.target/aarch64/ldp_always.c: New test.
* gcc.target/aarch64/ldp_never.c: New test.
* gcc.target/aarch64/stp_aligned.c: New test.
* gcc.target/aarch64/stp_always.c: New test.
* gcc.target/aarch64/stp_never.c: New test.

Signed-off-by: Manos Anagnostakis 
---
Changes in v3:
- Changed command-line options to target-specific parameters
  and documented them accordingly in doc/invoke.texi.
- Removed ampere1-no_ldp_combine.c test as superseded.

 gcc/config/aarch64/aarch64-protos.h   |  24 ++
 gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
 gcc/config/aarch64/aarch64.cc | 215 +-
 gcc/config/aarch64/aarch64.opt|   8 +
 gcc/doc/invoke.texi   |  30 +++
 .../aarch64/ampere1-no_ldp_combine.c  |  11 -
 .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
 gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
 gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
 .../gcc.target/aarch64/stp_aligned.c  |  60 +
 gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
 gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
 12 files changed, 600 insertions(+), 74 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/ampere1-no_ldp_combine.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 70303d6fd95..be1d73490ed 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -568,6 +568,30 @@ struct tune_params
   /* Place prefetch struct pointer at the end to enable type checking
  errors when tune_params misses elements (e.g., from erroneous merges).  */
   const struct cpu_prefetch_tune *prefetch;
+/* An enum specifying how to handle load pairs using a fine-grained policy:
+   - LDP_POLICY_ALIGNED: Emit ldp if the source pointer is aligned
+   to at least double the alignment of the type.
+   - LDP_POLICY_ALWAYS: Emit ldp regardless of alignment.
+   - LDP_POLICY_NEVER: Do not emit ldp.  */
+
+  enum aarch64_ldp_policy_model
+  {
+LDP_POLICY_ALIGNED,
+LDP_POLICY_ALWAYS,
+LDP_POLICY_NEVER
+  } ldp_policy_model;
+/* An enum specifying how to handle store pairs using a fine-grained policy:
+   - STP_POLICY_ALIGNED: Emit stp if the source pointer is aligned
+   to at least 

[Bug target/111593] New: wrong code for 128-bit multiplication on MIPS64R6

2023-09-25 Thread mikulas at artax dot karlin.mff.cuni.cz via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111593

Bug ID: 111593
   Summary: wrong code for 128-bit multiplication on MIPS64R6
   Product: gcc
   Version: 13.1.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: target
  Assignee: unassigned at gcc dot gnu.org
  Reporter: mikulas at artax dot karlin.mff.cuni.cz
  Target Milestone: ---

MIPS64R6 has new instructions for multiplication and division. GCC uses them,
however it miscompiles 128-bit multiplication.

When you compile and run this program with -O1 or -O2 on mips64r6, you get
incorrect result 9F172AF9AEE4FDB2FD12E7537CC82A0F. The correct result is
60E3DC5DAC542B19FD12E7537CC82A0F.

#include 

__attribute__((noinline,noclone)) static unsigned __int128 power(unsigned
__int128 a, unsigned __int128 b)
{
unsigned __int128 c = 1;
while (b) {
if (b & 1)
c *= a;
a *= a;
b >>= 1;
}
return c;
}

int main(void)
{
int i;
unsigned __int128 a = 0x1234567890abcdefULL;
unsigned __int128 b = 0x1234567890abcdefULL;
unsigned __int128 c = power(a, b);
for (i = 124; i >= 0; i -= 4) {
printf("%X", (unsigned)(c >> i) & 0xf);
}
printf("\n");
return 0;
}

How to reproduce:

On Debian SID, install the packages gcc-13-mipsisa64r6-linux-gnuabi64,
libc6-dev-mips64r6-cross and qemu-user.

Run mipsisa64r6-linux-gnuabi64-gcc-13 -O2 power.c && /usr/bin/qemu-mips64 -L
/usr/mipsisa64r6-linux-gnuabi64/ a.out

The bug happens with gcc-10, gcc-11, gcc-12 and gcc-13 (I didn't try older
releases).

[wwwdocs, committed] gcc-14/changes.html (OpenMP): Tweak manual-update wording

2023-09-25 Thread Tobias Burnus

The 'description' words looked a bit misplaced when reading the full sentence.
Likewise "the libnuma" - I changed that to simply "libnuma". (Alternatives 
would be
"the libnuma library" or "the numa library".)

Hence, I fixed my own wording :-)

Committed as attached. See also https://gcc.gnu.org/gcc-14/changes.html

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 50c5c9f94be7b26a2853f64909fa61ebf60086aa
Author: Tobias Burnus 
Date:   Mon Sep 25 19:36:31 2023 +0200

gcc-14/changes.html (OpenMP): Tweak manual-update wording
---
 htdocs/gcc-14/changes.html | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 2ca05ad0..c817dde4 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -74,11 +74,11 @@ a work-in-progress.
 
   The https://gcc.gnu.org/onlinedocs/libgomp/;>GNU Offloading and
   Multi Processing Runtime Library Manual has been updated and extended,
-  improving especially the ICV description, memory allocation, and the
-  description of the environment variables and OpenMP routines. On Linux,
-  the https://github.com/numactl/numactl;>libnuma is now used
-  for allocators requesting the nearest-partition trait as detailed in the
-  manual.
+  improving especially the description of ICVs, memory allocation, environment variables and OpenMP
+  routines. On Linux, https://github.com/numactl/numactl;>libnuma
+  is now used for allocators requesting the nearest-partition trait as
+  detailed in the manual.
 
   
   


[patch] invoke.texi: Update -fopenmp and -fopenmp-simd for omp::decl and loop semantic

2023-09-25 Thread Tobias Burnus

I stumbled over this during the ARM64 talk at the cauldron as they
consider using -fopenmp-simd by default.

→ https://gcc.gnu.org/wiki/cauldron2023 (I put my talk/BoF slides up;
others aren't, yet)

I did stumble over 'omp loop' with SIMD. It turns out that -fopenmp-simd
just turns 'loop' into 'simd', ignoring whatever value the user has
specified for the bind value.

Additionally, [[omp::decl(...)]] was missing.

Any comment to that patch before I commit it?

Tobias

PS: the [[omp::...]] needs a 'C++' → 'C/C++' change once omp:: support
with C23's attributes is in.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
invoke.texi: Update -fopenmp and -fopenmp-simd for omp::decl and loop semantic

gcc/ChangeLog:

	PR middle-end/111547
	* doc/invoke.texi (-fopenmp): Mention C++11 [[omp::decl(...)]] syntax.
	(-fopenmp-simd): Likewise. Clarify 'loop' directive semantic.

 gcc/doc/invoke.texi | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 146b40414b0..89c539f06c2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2766,8 +2766,9 @@ can be omitted, to use a target-specific default value.
 @cindex OpenMP parallel
 @item -fopenmp
 Enable handling of OpenMP directives @code{#pragma omp} in C/C++,
-@code{[[omp::directive(...)]]} and @code{[[omp::sequence(...)]]} in C++ and
-@code{!$omp} in Fortran.  When @option{-fopenmp} is specified, the
+@code{[[omp::directive(...)]]}, @code{[[omp::sequence(...)]]} and
+@code{[[omp::decl(...)]]} in C++ and @code{!$omp} in Fortran.
+When @option{-fopenmp} is specified, the
 compiler generates parallel code according to the OpenMP Application
 Program Interface v4.5 @w{@uref{https://www.openmp.org}}.  This option
 implies @option{-pthread}, and thus is only supported on targets that
@@ -2779,11 +2780,14 @@ have support for @option{-pthread}. @option{-fopenmp} implies
 @cindex SIMD
 @item -fopenmp-simd
 Enable handling of OpenMP's @code{simd}, @code{declare simd},
-@code{declare reduction}, @code{assume}, @code{ordered}, @code{scan},
-@code{loop} directives and combined or composite directives with
+@code{declare reduction}, @code{assume}, @code{ordered}, @code{scan}
+and @code{loop} directive, and of combined or composite directives with
 @code{simd} as constituent with @code{#pragma omp} in C/C++,
-@code{[[omp::directive(...)]]} and @code{[[omp::sequence(...)]]} in C++
-and @code{!$omp} in Fortran.  Other OpenMP directives are ignored.
+@code{[[omp::directive(...)]]}, @code{[[omp::sequence(...)]]} and
+@code{[[omp::decl(...)]]} in C++ and @code{!$omp} in Fortran.
+Other OpenMP directives are ignored.  Unless @option{-fopenmp} is
+additionally specified, the @code{loop} region binds to the current
+task region, independent of the specified @code{bind} clause.
 
 @opindex fopenmp-target-simd-clone
 @cindex OpenMP target SIMD clone


Improve -Wflex-array-member-not-at-end changes.html wording |Plus: and warning bug? (was: [V2][PATCH] gcc-14/changes.html: Deprecate a GCC C extension on flexible array members.)

2023-09-25 Thread Tobias Burnus

Hi all,

I stumbled over this as I found the wording in the release notes rather 
unclear.is.


First, the following gives only a -pedantic warning and not a 
-Wflex-array-member-not-at-end:

  struct t { int b; int x[]; };
  struct q { int b; struct t a[2]; int c; };

warning: invalid use of structure with flexible array member [-Wpedantic]

If I remove the "[2]", it shows additionally:
  warning: structure containing a flexible array member is not at the end of 
another structure [-Wflex-array-member-not-at-end]

It seems as if it should print latter warning also inside the struct.

Qing? Joseph? Thoughts?

* * *

Secondly, if this is deprecated, shouldn't then the warning enabled by, e.g., 
-Wall or made
otherwise more prominent? (-std=?) - Currently, one either has to find the new 
flag or use
-pedantic.

Or is this not really regarded as deprecated? But then (IMHO) we should not 
really claim so and just
add the warning without deprecation.

BTW; clang-15 prints the -Wgnu-variable-sized-type-not-at-end warning by 
default.

Joseph, all: Thoughts?

* * *

Cross ref: The patch adding the new warning is r14-2197-g070a6bf0bdc6761
https://gcc.gnu.org/pipermail/gcc-cvs/2023-June/385730.html (cf. previously in 
this thread)


* * *

Regarding the changes.html wording:

On 07.08.23 16:22, Qing Zhao via Gcc-patches wrote:


Comparing to the 1st version, the only change is to address Richard's
comment on refering a warning option for diagnosing deprecated behavior.

...

+++ b/htdocs/gcc-14/changes.html
@@ -30,7 +30,18 @@ a work-in-progress.
  
  Caveats
  
-  ...
+  C:
+  Support for the GCC extension, a structure containing a C99 flexible 
array
+  member, or a union containing such a structure, is not the last field of
+  another structure, is deprecated. Refer to
+  https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html;>
+  Zero Length Arrays.


...

I find the first sentence difficult to read. What do you think of the following?
(It is hard to come up with some good wording.)

--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -31,9 +31,10 @@ a work-in-progress.
 Caveats
 
   C:
-  Support for the GCC extension, a structure containing a C99 flexible 
array
-  member, or a union containing such a structure, is not the last field of
-  another structure, is deprecated. Refer to
+  Support for the GCC extension that a structure containing a C99 flexible
+  array (and any union containing a member of such structure) can be a
+  member of a structure has been deprecated for the case that it is not
+  the last member. Refer to
   https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html;>
   Zero Length Arrays.
   Any code relying on this extension should be modifed to ensure that


Tobias

PS:  C17 has:
"A structure or union shall not contain a member with incomplete or function 
type (hence, a structure
 shall not contain an instance of itself, but may contain a pointer to an 
instance of itself), except that
 the last member of a structure with more than one named member may have 
incomplete array type;
 such a structure (and any union containing, possibly recursively, a member 
that is such a structure)
 shall not be a member of a structure or an element of an array."

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[Bug gcov-profile/110827] C++20 coroutines aren't being measured by gcov

2023-09-25 Thread mwd at md5i dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110827

--- Comment #10 from Michael Duggan  ---
To sum up what I have figured out, C++ transforms the coroutine "function" into
a trio of functions: a ramp function, an actor function, and a destruction
function.  The ramp function acts as the actual function (by name).  The actor
function contains the original body of the written function (with some
transformations), and thus contains the code associated with most of the lines
that need coverage information.

Since the actor function is generated artificially, it is marked as artificial.
 The gcov program explicitly ignores functions that are marked as artificial. 
Also, even if that were not the case, it looks to me like the line coverage
information for the actor function only includes the initial line of the
function.  This seems to be due to the way the artificial function gets
inserted into the list of functions of the program.

In order to solve this problem, we would need to at least the following: 
  Find a way to not ignore the actor function.  This would involve either not
marking it as artificial or by marking it in some other way that would be
recognized by gcov.
  Ensure that the actor function properly includes the line number information
from the original coroutine body.

Most of this work would probably need to be done in the c++ code (where the
coroutine transformation happens) rather than in the coverage code.  Should
this be reassigned to the c++ component?

Re: [PATCH 0/2] Replace intl/ with out-of-tree GNU gettext

2023-09-25 Thread Arsen Arsenović

Xi Ruoyao  writes:

> On Mon, 2023-09-25 at 17:00 +0200, Arsen Arsenović wrote:
>> Afternoon,
>> 
>> This patch series replaces the old (early 2000s era, AFAICT) libintl
>> implementation in-tree, which relies on C constructs some compilers
>> (newer clang, hopefully GCC 14) refuse to compile by default with
>> out-of-tree gettext, in a manner similar to GMP et al, and adds gettext
>> to download_prerequisites.
>
> I think we need to update install.texi to mention the new dependency.

Ah, thanks.  I had forgotten to update it.  What do you think of the
following prose?

1:  2ac5c8240c0f ! 1:  2cc0029921fb *: add modern gettext
@@ Commit message
 * aclocal.m4: Regenerate.
 * Makefile.in (LIBDEPS): Remove (potential) ./ prefix from
 LIBINTL_DEP.
-* doc/install.texi: Document new (notable) flags added by the 
optional
-gettext tree and by AM_GNU_GETTEXT.
 
 libcpp/ChangeLog:
 
@@ gcc/configure: $as_echo "$as_me: executing $ac_file commands" >&6;}
  "collect-ld":F) chmod +x collect-ld ;;
  "nm":F) chmod +x nm ;;
 
- ## gcc/doc/install.texi ##
-@@ gcc/doc/install.texi: is shown below:
- @code{sys} and @code{time}.
- @end table
- 
-+@item GNU gettext
-+
-+Necessary to build GCC with internationalization support via
-+@option{--enable-nls}.  It can be downloaded from
-+@uref{https://gnu.org/s/gettext/}.  If a GNU gettext distribution is
-+found in a subdirectory of your GCC sources named @file{gettext}, it
-+will be built together with GCC, unless present in the system (either in
-+libc or as a stand-alone library).
-+
-+The in-tree configuration requires GNU gettext v0.22.
-+
- @end table
- 
- @heading Tools/packages necessary for modifying GCC
-@@ gcc/doc/install.texi: components of the binutils you intend to build 
alongside the compiler
- (@file{bfd}, @file{binutils}, @file{gas}, @file{gprof}, @file{ld},
- @file{opcodes}, @dots{}) to the directory containing the GCC sources.
- 
--Likewise the GMP, MPFR and MPC libraries can be automatically built
--together with GCC.  You may simply run the
-+Likewise the GMP, MPFR, MPC and Gettext libraries can be automatically
-+built together with GCC.  You may simply run the
- @command{contrib/download_prerequisites} script in the GCC source 
directory
- to set up everything.
--Otherwise unpack the GMP, MPFR and/or MPC source
-+Otherwise unpack the GMP, MPFR, MPC and/or Gettext source
- distributions in the directory containing the GCC sources and rename
--their directories to @file{gmp}, @file{mpfr} and @file{mpc},
--respectively (or use symbolic links with the same name).
-+their directories to @file{gmp}, @file{mpfr}, @file{mpc} and
-+@file{gettext}, respectively (or use symbolic links with the same name).
- 
- @html
- 
-@@ gcc/doc/install.texi: which lets GCC output diagnostics in languages 
other than American
- English.  Native Language Support is enabled by default if not doing a
- canadian cross build.  The @option{--disable-nls} option disables NLS@.
- 
-+@item --with-libintl-prefix=@var{dir}
-+@itemx --without-libintl-prefix
-+Searches for libintl in @file{@var{dir}/include} and
-+@file{@var{dir}/lib}, or disables manual searching for it, letting the
-+linker handle it.
-+
-+@item --with-libintl-type=@var{type}
-+Specifies the type of library to search for when looking for libintl.
-+@var{type} can be one of @code{auto}, @code{static} or @code{shared}.
-+
- @item --with-included-gettext
--If NLS is enabled, the @option{--with-included-gettext} option causes the 
build
--procedure to prefer its copy of GNU @command{gettext}.
-+Only available if @file{gettext} is present in the source tree.
-+
-+Forces the gettext tree to be configured to build and use a new static
-+libintl, overriding the system libintl.
- 
- @item --with-catgets
- If NLS is enabled, and if the host lacks @code{gettext} but has the
-
  ## libcpp/aclocal.m4 ##
 @@ libcpp/aclocal.m4: m4_include([../config/codeset.m4])
  m4_include([../config/depstand.m4])

Perhaps this is easier to read when not a range-diff..
https://git.sr.ht/~arsen/gcc/commit/2ac5c8240c0f1a670f100c8e38baf40b13cc50b2#gcc/doc/install.texi

> And IIUC if --disable-nls is used, we can still build GCC with neither
> system gettext nor in-tree gettext.  Or am I wrong?  (I'm asking because
> we'll need to adjust Linux From Scratch [1-3] for this change if it's
> applied.)

Yes, this doesn't change how --disable-nls works.

> [1]:https://www.linuxfromscratch.org/lfs/view/development/chapter05/gcc-pass1.html
> [2]:https://www.linuxfromscratch.org/lfs/view/development/chapter06/gcc-pass2.html
> 

Re: ipa-inline & what TARGET_CAN_INLINE_P can assume

2023-09-25 Thread Richard Sandiford via Gcc
Andrew Pinski  writes:
> On Mon, Sep 25, 2023 at 10:16 AM Richard Sandiford via Gcc
>  wrote:
>>
>> Hi,
>>
>> I have a couple of questions about what TARGET_CAN_INLINE_P is
>> alllowed to assume when called from ipa-inline.  (Callers from the
>> front-end don't matter for the moment.)
>>
>> I'm working on an extension where a function F1 without attribute A
>> can't be inlined into a function F2 with attribute A.  That part is
>> easy and standard.
>>
>> But it's expected that many functions won't have attribute A,
>> even if they could.  So we'd like to detect automatically whether
>> F1's implementation is compatible with attribute A.  This is something
>> we can do by scanning the gimple code.
>>
>> However, even if we detect that F1's code is compatible with attribute A,
>> we don't want to add attribute A to F1 itself because (a) it would change
>> F1's ABI and (b) it would restrict the optimisation of any non-inlined
>> copy of F1.  So this is a test for inlining only.
>>
>> TARGET_CAN_INLINE_P (F2, F1) can check whether F1's current code
>> is compatible with attribute A.  But:
>>
>> (a) Is it safe to assume (going forward) that F1 won't change before
>> it is inlined into F2?  Specifically, is it safe to assume that
>> nothing will be inlined into F1 between the call to TARGET_CAN_INLINE_P
>> and the inlining of F1 into F2?
>>
>> (b) For compile-time reasons, I'd like to cache the result in
>> machine_function.  The cache would be a three-state:
>>
>> - not tested
>> - compatible with A
>> - incompatible with A
>>
>> The cache would be reset to "not tested" whenever TARGET_CAN_INLINE_P
>> is called with F1 as the *caller* rather than the callee.  The idea
>> is to handle cases where something is inlined into F1 after F1 has
>> been inlined into F2.  (This would include calls from the main
>> inlining pass, after the early pass has finished.)
>>
>> Is resetting the cache in this way sufficient?  Or should we have a
>> new interface for this?
>>
>> Sorry for the long question :)  I have something that seems to work,
>> but I'm not sure whether it's misusing the interface.
>
>
> The rs6000 backend has a similar issue and defined the following
> target hooks which seems exactly what you need in this case
> TARGET_NEED_IPA_FN_TARGET_INFO
> TARGET_UPDATE_IPA_FN_TARGET_INFO
>
> And then use that information in can_inline_p target hook to mask off
> the ISA bits:
>   unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
>   if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
> {
>   callee_isa &= ~OPTION_MASK_HTM;
>   explicit_isa &= ~OPTION_MASK_HTM;
> }

Thanks!  Like you say, it looks like a perfect fit.

The optimisation of having TARGET_UPDATE_IPA_FN_TARGET_INFO return false
to stop further analysis probably won't trigger for this use case.
I need to track two conditions and the second one is very rare.
But that's still going to be much better than potentially scanning
the same (inlined) stmts multiple times.

Richard


Re: ipa-inline & what TARGET_CAN_INLINE_P can assume

2023-09-25 Thread Andrew Pinski via Gcc
On Mon, Sep 25, 2023 at 10:16 AM Richard Sandiford via Gcc
 wrote:
>
> Hi,
>
> I have a couple of questions about what TARGET_CAN_INLINE_P is
> alllowed to assume when called from ipa-inline.  (Callers from the
> front-end don't matter for the moment.)
>
> I'm working on an extension where a function F1 without attribute A
> can't be inlined into a function F2 with attribute A.  That part is
> easy and standard.
>
> But it's expected that many functions won't have attribute A,
> even if they could.  So we'd like to detect automatically whether
> F1's implementation is compatible with attribute A.  This is something
> we can do by scanning the gimple code.
>
> However, even if we detect that F1's code is compatible with attribute A,
> we don't want to add attribute A to F1 itself because (a) it would change
> F1's ABI and (b) it would restrict the optimisation of any non-inlined
> copy of F1.  So this is a test for inlining only.
>
> TARGET_CAN_INLINE_P (F2, F1) can check whether F1's current code
> is compatible with attribute A.  But:
>
> (a) Is it safe to assume (going forward) that F1 won't change before
> it is inlined into F2?  Specifically, is it safe to assume that
> nothing will be inlined into F1 between the call to TARGET_CAN_INLINE_P
> and the inlining of F1 into F2?
>
> (b) For compile-time reasons, I'd like to cache the result in
> machine_function.  The cache would be a three-state:
>
> - not tested
> - compatible with A
> - incompatible with A
>
> The cache would be reset to "not tested" whenever TARGET_CAN_INLINE_P
> is called with F1 as the *caller* rather than the callee.  The idea
> is to handle cases where something is inlined into F1 after F1 has
> been inlined into F2.  (This would include calls from the main
> inlining pass, after the early pass has finished.)
>
> Is resetting the cache in this way sufficient?  Or should we have a
> new interface for this?
>
> Sorry for the long question :)  I have something that seems to work,
> but I'm not sure whether it's misusing the interface.


The rs6000 backend has a similar issue and defined the following
target hooks which seems exactly what you need in this case
TARGET_NEED_IPA_FN_TARGET_INFO
TARGET_UPDATE_IPA_FN_TARGET_INFO

And then use that information in can_inline_p target hook to mask off
the ISA bits:
  unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
  if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
{
  callee_isa &= ~OPTION_MASK_HTM;
  explicit_isa &= ~OPTION_MASK_HTM;
}


Thanks,
Andrew Pinski


>
> Thanks,
> Richard


[Bug middle-end/109967] [11/12/13/14 Regression] Wrong code at -O2 on x86_64-linux-gnu

2023-09-25 Thread xry111 at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109967

Xi Ruoyao  changed:

   What|Removed |Added

   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=111294
 CC||rguenther at suse dot de

--- Comment #9 from Xi Ruoyao  ---
Bisect shows r14-4089 (the fix for PR111294) either fixes or "covers up" the
issue.

Re: [PATCH 0/2] Replace intl/ with out-of-tree GNU gettext

2023-09-25 Thread Xi Ruoyao
On Mon, 2023-09-25 at 17:00 +0200, Arsen Arsenović wrote:
> Afternoon,
> 
> This patch series replaces the old (early 2000s era, AFAICT) libintl
> implementation in-tree, which relies on C constructs some compilers
> (newer clang, hopefully GCC 14) refuse to compile by default with
> out-of-tree gettext, in a manner similar to GMP et al, and adds gettext
> to download_prerequisites.

I think we need to update install.texi to mention the new dependency.

And IIUC if --disable-nls is used, we can still build GCC with neither
system gettext nor in-tree gettext.  Or am I wrong?  (I'm asking because
we'll need to adjust Linux From Scratch [1-3] for this change if it's
applied.)

[1]:https://www.linuxfromscratch.org/lfs/view/development/chapter05/gcc-pass1.html
[2]:https://www.linuxfromscratch.org/lfs/view/development/chapter06/gcc-pass2.html
[3]:https://www.linuxfromscratch.org/lfs/view/development/chapter08/gcc.html

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


ipa-inline & what TARGET_CAN_INLINE_P can assume

2023-09-25 Thread Richard Sandiford via Gcc
Hi,

I have a couple of questions about what TARGET_CAN_INLINE_P is
alllowed to assume when called from ipa-inline.  (Callers from the
front-end don't matter for the moment.)

I'm working on an extension where a function F1 without attribute A
can't be inlined into a function F2 with attribute A.  That part is
easy and standard.

But it's expected that many functions won't have attribute A,
even if they could.  So we'd like to detect automatically whether
F1's implementation is compatible with attribute A.  This is something
we can do by scanning the gimple code.

However, even if we detect that F1's code is compatible with attribute A,
we don't want to add attribute A to F1 itself because (a) it would change
F1's ABI and (b) it would restrict the optimisation of any non-inlined
copy of F1.  So this is a test for inlining only.

TARGET_CAN_INLINE_P (F2, F1) can check whether F1's current code
is compatible with attribute A.  But:

(a) Is it safe to assume (going forward) that F1 won't change before
it is inlined into F2?  Specifically, is it safe to assume that
nothing will be inlined into F1 between the call to TARGET_CAN_INLINE_P
and the inlining of F1 into F2?

(b) For compile-time reasons, I'd like to cache the result in
machine_function.  The cache would be a three-state:

- not tested
- compatible with A
- incompatible with A

The cache would be reset to "not tested" whenever TARGET_CAN_INLINE_P
is called with F1 as the *caller* rather than the callee.  The idea
is to handle cases where something is inlined into F1 after F1 has
been inlined into F2.  (This would include calls from the main
inlining pass, after the early pass has finished.)

Is resetting the cache in this way sufficient?  Or should we have a
new interface for this?

Sorry for the long question :)  I have something that seems to work,
but I'm not sure whether it's misusing the interface.

Thanks,
Richard


Re: [PATCH] Add missing return in gori_compute::logical_combine

2023-09-25 Thread Andrew MacLeod
OK for trunk at least.   Thanks.  I presume it'll be fine for the other 
releases.


Andrew

On 9/25/23 11:51, Eric Botcazou wrote:

Hi,

the varying case currently falls through to the 1/true case.

Tested on x86_64-suse-linux, OK for mainline, 13 and 12 branches?


2023-09-25  Eric Botcazou  

* gimple-range-gori.cc (gori_compute::logical_combine): Add missing
return statement in the varying case.


2023-09-25  Eric Botcazou  

* gnat.dg/opt102.adb:New test.
* gnat.dg/opt102_pkg.adb, gnat.dg/opt102_pkg.ads: New helper.





[Bug fortran/59298] ICE when initialising PARAMETER array of derived-type (containing an array) using array constructor

2023-09-25 Thread anlauf at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59298

anlauf at gcc dot gnu.org changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|WAITING |RESOLVED
   Keywords||ice-on-valid-code
  Known to work||10.5.0
  Known to fail||7.5.0, 8.5.0, 9.5.0
   Target Milestone|--- |10.5

--- Comment #16 from anlauf at gcc dot gnu.org ---
Fixed in gcc-10.

[Bug fortran/84693] scalar DT not broadcast across an array in an initialization expression

2023-09-25 Thread anlauf at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84693
Bug 84693 depends on bug 59298, which changed state.

Bug 59298 Summary: ICE when initialising PARAMETER array of derived-type 
(containing an array) using array constructor
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59298

   What|Removed |Added

 Status|WAITING |RESOLVED
 Resolution|--- |FIXED

[Bug target/111570] -march=generic prints error

2023-09-25 Thread brjd_epdjq36 at kygur dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111570

--- Comment #2 from Brjd  ---
Thank you and I also read this guide. My point is that the generic arch might
be  possible in theory. If the gcc builds for the oldest CPU with x86_64, is it
possible that code will run on all modern CPU since their subset includes also
that of their predecessor. 

How about making it default to that generic or baseline build for that limited
CPU?

If I could ask you also more questions, let me ask you about this problem. The
guide doesn't mention anything about the specific arch. If -march=cpu what is
better -mtune=cpu where cpu is the same as in arch or -mtune=generic so that
the code tunes to all CPU kinds of this family.If the tune is empty, is it
default generic or native and the arch is not clear either.

One question more, I am not able to find a guide about the gcc build and no
information whether the gcc may be built in targets like LLVM and clang. For
example, is it possible to build first only the LLVM, then stop and resume with
clang etc. or first, gcc's only c modiule and its submodules, then stop and
resume with its g++  module and submodules, next with libgcc, libstdc++ etc.? 

It would be great, especially for long bootstraps and stage 2, but I find only
make all-gcc, target-libgcc which however build almost all of the compiler.

[Bug target/109166] Built-in __atomic_test_and_set does not seem to be atomic on ARMv4T

2023-09-25 Thread hp at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109166

--- Comment #9 from Hans-Peter Nilsson  ---
(In reply to Richard Earnshaw from comment #8)
> I'm going to close this as WONTFIX.

I guess I'll have to find another PR to lean on, for fixing the underlying
cause for the nonatomic code.

[Bug target/104831] RISCV libatomic LR.aq/SC.rl pair insufficient for SEQ_CST

2023-09-25 Thread patrick at rivosinc dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104831

Patrick O'Neill  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|ASSIGNED|RESOLVED

--- Comment #11 from Patrick O'Neill  ---
This has been resolved on trunk:
https://inbox.sourceware.org/gcc-patches/20230427162301.1151333-1-patr...@rivosinc.com/
The cover letter there contains a lot more context about why the mappings are
wrong and why we implemented a strengthened version of Table A.6.
These mappings are included in the RISC-V PSABI doc:
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/378

And this series has been backported to be included in GCC 13.3 (along with a
bugfix):
https://inbox.sourceware.org/gcc-patches/20230725180206.284777-1-patr...@rivosinc.com/

[Bug target/111533] [14 Regression] ICE: RTL check: expected code 'reg', have 'const_int' in rhs_regno, at rtl.h:1934

2023-09-25 Thread patrick at rivosinc dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111533

--- Comment #2 from Patrick O'Neill  ---
Hi,

I believe the issue is that you're using rv64gc, not rv64gcv.

I haven't tried building with multilib, so my commands are:

../configure --with-arch=rv64gcv --with-abi=lp64d --enable-gcc-checking=rtl

make linux -j32

[Bug target/111546] [14 Regression] ICE: gfortran.dg/overload_5.f90:53:2: internal compiler error: in emit_move_insn, at expr.cc:4219 since r14-4163-gbea89f78f2f

2023-09-25 Thread patrick at rivosinc dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111546

Patrick O'Neill  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|UNCONFIRMED |RESOLVED

--- Comment #3 from Patrick O'Neill  ---
gfortran.dg/overload_5.f90 failures have been resolved!

[PATCH] Add missing return in gori_compute::logical_combine

2023-09-25 Thread Eric Botcazou
Hi,

the varying case currently falls through to the 1/true case.

Tested on x86_64-suse-linux, OK for mainline, 13 and 12 branches?


2023-09-25  Eric Botcazou  

* gimple-range-gori.cc (gori_compute::logical_combine): Add missing
return statement in the varying case.


2023-09-25  Eric Botcazou  

* gnat.dg/opt102.adb:New test.
* gnat.dg/opt102_pkg.adb, gnat.dg/opt102_pkg.ads: New helper.

-- 
Eric Botcazoudiff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index 51fb542a19c..2694e551d73 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -876,6 +876,7 @@ gori_compute::logical_combine (vrange , enum tree_code code,
 	  r.dump (dump_file);
 	  fputc ('\n', dump_file);
 	}
+  return res;
 }
 
   switch (code)
package body Opt102_Pkg is

  function Get (E : Enum; F, M : access Integer) return Integer is
  begin
case E is
  when One   => return 0;
  when Two   => return F.all;
  when Three => return M.all;
end case;
  end;

end Opt102_Pkg;
-- { dg-do run }
-- { dg-options "-O2 -gnata" }

with Opt102_Pkg; use Opt102_Pkg;

procedure Opt102 is
  I, F : aliased Integer;
begin
  I := Get (Two, F'Access, null);
end;
package Opt102_Pkg is

  type Enum is (One, Two, Three);

  function Get (E : Enum; F, M : access Integer) return Integer
with Pre => (E = One) = (F = null and M = null) and
(E = Two) = (F /= null) and
(E = Three) = (M /= null);

end Opt102_Pkg;


[committed] hppa: Update baseline symbols for hppa-linux

2023-09-25 Thread John David Anglin
Committed to trunk.

Dave
---

Update baseline symbols for hppa-linux.

2023-09-25  John David Anglin  

libstdc++-v3/ChangeLog:

* config/abi/post/hppa-linux-gnu/baseline_symbols.txt: Update.

diff --git a/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt 
b/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt
index ff40f201eb8..b41e57125ef 100644
--- a/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt
+++ b/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt
@@ -983,8 +983,18 @@ FUNC:_ZNKSt11__timepunctIwE9_M_monthsEPPKw
 FUNC:_ZNKSt11__timepunctIwE9_M_monthsEPPKw@@GLIBCXX_3.4
 FUNC:_ZNKSt11logic_error4whatEv
 FUNC:_ZNKSt11logic_error4whatEv@@GLIBCXX_3.4
+FUNC:_ZNKSt12__basic_fileIcE13native_handleEv
+FUNC:_ZNKSt12__basic_fileIcE13native_handleEv@@GLIBCXX_3.4.32
 FUNC:_ZNKSt12__basic_fileIcE7is_openEv
 FUNC:_ZNKSt12__basic_fileIcE7is_openEv@@GLIBCXX_3.4
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx114_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx114_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
 FUNC:_ZNKSt12bad_weak_ptr4whatEv
 FUNC:_ZNKSt12bad_weak_ptr4whatEv@@GLIBCXX_3.4.15
 FUNC:_ZNKSt12future_error4whatEv
@@ -1313,6 +1323,20 @@ FUNC:_ZNKSt5ctypeIwE9do_narrowEPKwS2_cPc
 FUNC:_ZNKSt5ctypeIwE9do_narrowEPKwS2_cPc@@GLIBCXX_3.4
 FUNC:_ZNKSt5ctypeIwE9do_narrowEwc
 FUNC:_ZNKSt5ctypeIwE9do_narrowEwc@@GLIBCXX_3.4
+FUNC:_ZNKSt6chrono4tzdb11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE
+FUNC:_ZNKSt6chrono4tzdb11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono4tzdb12current_zoneEv
+FUNC:_ZNKSt6chrono4tzdb12current_zoneEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9time_zone15_M_get_sys_infoENS_10time_pointINS_3_V212system_clockENS_8durationIxSt5ratioILx1ELx1EE
+FUNC:_ZNKSt6chrono9time_zone15_M_get_sys_infoENS_10time_pointINS_3_V212system_clockENS_8durationIxSt5ratioILx1ELx1EE@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9time_zone17_M_get_local_infoENS_10time_pointINS_7local_tENS_8durationIxSt5ratioILx1ELx1EE
+FUNC:_ZNKSt6chrono9time_zone17_M_get_local_infoENS_10time_pointINS_7local_tENS_8durationIxSt5ratioILx1ELx1EE@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9tzdb_list14const_iteratordeEv
+FUNC:_ZNKSt6chrono9tzdb_list14const_iteratordeEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9tzdb_list5beginEv
+FUNC:_ZNKSt6chrono9tzdb_list5beginEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9tzdb_list5frontEv
+FUNC:_ZNKSt6chrono9tzdb_list5frontEv@@GLIBCXX_3.4.31
 FUNC:_ZNKSt6locale2id5_M_idEv
 FUNC:_ZNKSt6locale2id5_M_idEv@@GLIBCXX_3.4
 FUNC:_ZNKSt6locale4nameB5cxx11Ev
@@ -6134,12 +6158,30 @@ FUNC:_ZNSt6__norm15_List_node_base8transferEPS0_S1_
 FUNC:_ZNSt6__norm15_List_node_base8transferEPS0_S1_@@GLIBCXX_3.4.9
 FUNC:_ZNSt6__norm15_List_node_base9_M_unhookEv
 FUNC:_ZNSt6__norm15_List_node_base9_M_unhookEv@@GLIBCXX_3.4.14
+FUNC:_ZNSt6chrono11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE
+FUNC:_ZNSt6chrono11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono11reload_tzdbEv
+FUNC:_ZNSt6chrono11reload_tzdbEv@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono12current_zoneEv
+FUNC:_ZNSt6chrono12current_zoneEv@@GLIBCXX_3.4.31
 FUNC:_ZNSt6chrono12system_clock3nowEv
 FUNC:_ZNSt6chrono12system_clock3nowEv@@GLIBCXX_3.4.11
+FUNC:_ZNSt6chrono13get_tzdb_listEv
+FUNC:_ZNSt6chrono13get_tzdb_listEv@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono14remote_versionB5cxx11Ev
+FUNC:_ZNSt6chrono14remote_versionB5cxx11Ev@@GLIBCXX_3.4.31
 FUNC:_ZNSt6chrono3_V212steady_clock3nowEv
 FUNC:_ZNSt6chrono3_V212steady_clock3nowEv@@GLIBCXX_3.4.19
 FUNC:_ZNSt6chrono3_V212system_clock3nowEv
 FUNC:_ZNSt6chrono3_V212system_clock3nowEv@@GLIBCXX_3.4.19
+FUNC:_ZNSt6chrono8get_tzdbEv
+FUNC:_ZNSt6chrono8get_tzdbEv@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono9tzdb_list11erase_afterENS0_14const_iteratorE
+FUNC:_ZNSt6chrono9tzdb_list11erase_afterENS0_14const_iteratorE@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEi
+FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEi@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEv
+FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEv@@GLIBCXX_3.4.31
 FUNC:_ZNSt6gslice8_IndexerC1EjRKSt8valarrayIjES4_
 

Complex numbers support: discussions summary

2023-09-25 Thread Sylvain Noiry via Gcc

Hi,

We had very interesting discussions during our presentation with Paul on 
the

support of complex numbers in gcc at the Cauldron.

Thank you all for your participation !

Here is a small summary from our viewpoint:

- Replace CONCAT with a backend defined internal representation in RTL
--> No particular problems

- Allow backend to write patterns for operation on complex modes
--> No particular problems

- Conditional lowering depending on whether a pattern exists or not
--> Concerns when the vectorization of split complex operations performs 
better

   than not vectorized unified complex operations

- Centralize complex lowering in cplxlower
--> No particular problems if it doesn't prevent IEEE compliance and
   optimizations (like const folding)

- Vectorization of complex operations
--> 2 representations (interleaved and separated real/imag): cannot 
impose one

   if some machines prefer the other
--> Complex are composite modes, the vectorizer assumes that the inner 
mode is

   scalar to do some optimizations (which ones ?)
--> Mixed split/unified complex operations cannot be vectorized easely
--> Assuming that the inner representation of complex vectors is let to 
target
   backends, the vectorizer doesn't know it, which prevent some 
optimizations

   (which ones ?)

- Explicit vectors of complex
--> Cplxlower cannot lower it, and moving veclower before cplxlower is a 
bad

   idea as it prevents some optimizations
--> Teaching cplxlower how to deal with vectors of complex seems to be a
   reasonable alternative
--> Concerns about ABI or indexing if the internal representation is let 
to the

   backend and differs from the representation in memory

- Impact of the current SLP pattern matching of complex operations
--> Only with -ffast-math
--> It can match user defined operations (not C99) that can be 
simplified with a

   complex instruction
--> Dedicated opcode and real vector type choosen VS standard opcode and 
complex

   mode in our implementation
--> Need to preserve SLP pattern matching as too many applications 
redefines

   complex and bypass C99 standard.
--> So need to harmonize with our implementation

- Support of the pure imaginary type (_Imaginary)
--> Still not supported by gcc (and llvm), neither in our implementation
--> Issues comes from the fact that an imaginary is not a complex with 
real part

   set to 0
--> The same issue with complex multiplication by a real (which is split 
in the

   frontend, and our implementation hasn't changed it yet)
--> Idea: Add an attribute to the Tree complex type which specify pure 
real / pure

   imaginary / full complex ?

- Fast pattern for IEEE compliant emulated operations
--> Not enough time to discuss about it

Don't hesitate to add something or bring more precision if you want.

As I said at the end of the presentation, we have written a paper which 
explains
our implementation in details. You can find it on the wiki page of the 
Cauldron 
(https://gcc.gnu.org/wiki/cauldron2023talks?action=AttachFile=view=Exposing+Complex+Numbers+to+Target+Back-ends+%28paper%29.pdf).


Sylvain







Re: [PATCH v2 0/1] Add LoongArch64 support for D frontend

2023-09-25 Thread liushuyu

Hi Yujie,

Sorry, I did not know Loongson Technologies is also working on this.

However, you can jump onto that GitHub pull request to review my changes 
so that they align with your implementation and nobody's effort would go 
to waste.


Thanks,

Zixing

On 2023/9/25 04:04, Yang Yujie wrote:

Hi Zixing,

We are also working on a patch series that could pass the libphobos regression 
tests.
Will post this later once all failed items are fixed.

Yujie

On Sun, Sep 24, 2023 at 03:40:32PM -0600, Zixing Liu wrote:

This patch adds the LoongArch64 support for GCC D frontend.

The runtime support is submitted as a separate patch here:
https://github.com/dlang/dmd/pull/15628.

You can find more information about the LoongArch architecture on this
website:
https://loongson.github.io/LoongArch-Documentation/README-EN.html.

--

Changes since the last revision of the patch:

* Corrected copyright years in loongarch-d.cc and loongarch-d.h.
* Removed changes to the tests, the changes have been rolled into the DMD
   changes in:
   
https://github.com/dlang/dmd/pull/15628/commits/eb84b8a2bc86aa751ad6f472422e8abad63ff500
   .
* Removed D_LP32 and D_LP64 bits. Since LoongArch ABIs are somewhat
   complicated, we may introduce the ABI information in the form of target
   traits in the future.

Zixing Liu (1):

  gcc/config.gcc |  1 +
  gcc/config/loongarch/loongarch-d.cc| 77 ++
  gcc/config/loongarch/loongarch-d.h | 26 
  gcc/config/loongarch/t-loongarch   |  4 ++
  libphobos/configure.tgt|  3 +
  libphobos/libdruntime/gcc/sections/elf.d   |  2 +
  libphobos/libdruntime/gcc/unwind/generic.d |  1 +
  7 files changed, 114 insertions(+)
  create mode 100644 gcc/config/loongarch/loongarch-d.cc
  create mode 100644 gcc/config/loongarch/loongarch-d.h

--
2.42.0


[PATCH 0/2] Replace intl/ with out-of-tree GNU gettext

2023-09-25 Thread Arsen Arsenović
Afternoon,

This patch series replaces the old (early 2000s era, AFAICT) libintl
implementation in-tree, which relies on C constructs some compilers
(newer clang, hopefully GCC 14) refuse to compile by default with
out-of-tree gettext, in a manner similar to GMP et al, and adds gettext
to download_prerequisites.

Regstrapped on x86_64-pc-linux-gnu --with-included-gettext and all
languages enabled.  Tested for localization on x86_64-pc-linux-gnu,
x86_64-unknown-freebsd13.2, x86_64-darwin21, i686-darwin9 (thanks,
Iain!).

Example from FreeBSD:

  [arsen@fbsd132 ~/gcc-bld/_pfx/bin]$ LANG=sr_RS.UTF-8 ./gcc
  gcc: кобна грешка: нема улазних датотека
  компиловање прекинуто.
  [arsen@fbsd132 ~/gcc-bld/_pfx/bin]$ ldd ./gcc
  ./gcc:
libiconv.so.2 => /usr/local/lib/libiconv.so.2 (0x258b24264000)
libm.so.5 => /lib/libm.so.5 (0x258b2314b000)
libc.so.7 => /lib/libc.so.7 (0x258b25acc000)
[vdso] (0x7fffe5d0)

OK for trunk (if passing review on the binutils and GDB sides)?

Thanks in advance, have a lovely day.

Arsen Arsenović (2):
  intl: remove, in favor of out-of-tree gettext
  *: add modern gettext

 .gitignore |1 +
 Makefile.def   |   72 +-
 Makefile.in| 1612 +++
 config/gettext-sister.m4   |   35 +-
 config/gettext.m4  |  357 +-
 config/iconv.m4|  313 +-
 config/intlmacosx.m4   |   65 +
 configure  |   44 +-
 configure.ac   |   44 +-
 contrib/download_prerequisites |2 +
 contrib/prerequisites.md5  |1 +
 contrib/prerequisites.sha512   |1 +
 gcc/Makefile.in|8 +-
 gcc/aclocal.m4 |4 +
 gcc/configure  | 2001 +++-
 intl/ChangeLog |  306 --
 intl/Makefile.in   |  264 -
 intl/README|   21 -
 intl/VERSION   |1 -
 intl/aclocal.m4|   33 -
 intl/bindtextdom.c |  374 --
 intl/config.h.in   |  280 --
 intl/config.intl.in|   12 -
 intl/configure | 8288 
 intl/configure.ac  |  108 -
 intl/dcgettext.c   |   59 -
 intl/dcigettext.c  | 1238 -
 intl/dcngettext.c  |   60 -
 intl/dgettext.c|   60 -
 intl/dngettext.c   |   62 -
 intl/eval-plural.h |  114 -
 intl/explodename.c |  192 -
 intl/finddomain.c  |  195 -
 intl/gettext.c |   64 -
 intl/gettextP.h|  224 -
 intl/gmo.h |  148 -
 intl/hash-string.h |   59 -
 intl/intl-compat.c |  151 -
 intl/l10nflist.c   |  453 --
 intl/libgnuintl.h  |  341 --
 intl/loadinfo.h|  156 -
 intl/loadmsgcat.c  | 1322 -
 intl/localcharset.c|  398 --
 intl/localcharset.h|   42 -
 intl/locale.alias  |   78 -
 intl/localealias.c |  419 --
 intl/localename.c  |  772 ---
 intl/log.c |  104 -
 intl/ngettext.c|   68 -
 intl/osdep.c   |   24 -
 intl/plural-config.h   |1 -
 intl/plural-exp.c  |  156 -
 intl/plural-exp.h  |  132 -
 intl/plural.c  | 1540 --
 intl/plural.y  |  434 --
 intl/relocatable.c |  439 --
 intl/relocatable.h |   67 -
 intl/textdomain.c  |  142 -
 libcpp/aclocal.m4  |5 +
 libcpp/configure   | 2139 -
 libstdc++-v3/configure |  727 +--
 61 files changed, 5398 insertions(+), 21434 deletions(-)
 create mode 100644 config/intlmacosx.m4
 delete mode 100644 intl/ChangeLog
 delete mode 100644 intl/Makefile.in
 delete mode 100644 intl/README
 delete mode 100644 intl/VERSION
 delete mode 100644 intl/aclocal.m4
 delete mode 100644 intl/bindtextdom.c
 delete mode 100644 intl/config.h.in
 delete mode 100644 intl/config.intl.in
 delete mode 100755 intl/configure
 delete mode 100644 intl/configure.ac
 delete mode 100644 intl/dcgettext.c
 delete mode 100644 intl/dcigettext.c
 delete mode 100644 intl/dcngettext.c
 delete mode 100644 intl/dgettext.c
 delete mode 100644 intl/dngettext.c
 delete mode 100644 intl/eval-plural.h
 delete mode 100644 intl/explodename.c
 delete mode 100644 intl/finddomain.c
 delete mode 100644 intl/gettext.c
 delete mode 100644 intl/gettextP.h
 delete mode 100644 intl/gmo.h
 delete mode 100644 intl/hash-string.h
 delete mode 100644 intl/intl-compat.c
 delete mode 100644 intl/l10nflist.c
 delete mode 100644 intl/libgnuintl.h
 delete mode 100644 intl/loadinfo.h
 delete mode 100644 intl/loadmsgcat.c
 delete mode 100644 intl/localcharset.c
 delete mode 100644 intl/localcharset.h
 delete mode 100644 intl/locale.alias
 delete mode 100644 intl/localealias.c
 delete mode 100644 

[Bug c++/111592] [11/12/13/14 Regression] ICE on expanding argument pack into variadic constructor

2023-09-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111592

Andrew Pinski  changed:

   What|Removed |Added

Summary|ICE on expanding argument   |[11/12/13/14 Regression]
   |pack into variadic  |ICE on expanding argument
   |constructor |pack into variadic
   ||constructor
   Last reconfirmed||2023-09-25
   Target Milestone|--- |11.5
  Known to work||5.1.0, 5.5.0
 Ever confirmed|0   |1
 Status|UNCONFIRMED |NEW
  Known to fail||6.1.0, 6.2.0
   Keywords||ice-on-valid-code

--- Comment #1 from Andrew Pinski  ---
Confirmed.

[Bug libstdc++/111588] Provide opt-out of shared_ptr single-threaded optimization

2023-09-25 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111588

--- Comment #1 from Andrew Pinski  ---
>for programs that know they are effectively always multithreaded they pay for 
>a runtime branch and .text segment bloat for an optimization that never 
>applies.

The bloat is not much and the overhead for a branch compared to atomics is
still not going to have a bent.


I suspect you are looking into the wrong place for optimizations really.

[Bug middle-end/109967] [11/12/13/14 Regression] Wrong code at -O2 on x86_64-linux-gnu

2023-09-25 Thread xry111 at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109967

Xi Ruoyao  changed:

   What|Removed |Added

 CC||xry111 at gcc dot gnu.org

--- Comment #8 from Xi Ruoyao  ---
(In reply to Shaohua Li from comment #7)
> This test case does not reproduce anymore on the current trunk. Maybe one of
> the recent fixes fixed the underlying issue as well.

But we still need to ensure the fix backported into 11/12/13.  And there is
still a chance that the issue might be covered up by an unrelated change.

Re: [PATCH] [testsuite] Remove undefined behavior from gcc.dg/tree-ssa/pr44306.c

2023-09-25 Thread Richard Biener



> Am 25.09.2023 um 14:18 schrieb Aldy Hernandez :
> 
> In auditing the DOM code to see what the scoped tables catch that
> ranger doesn't, I've run accross this test, which seems to
> have uninitialized reads from both j and present[].
> 
> From the original PR, it looks like this came from a reduction of a
> failing test in SPEC's 464.h264ref.  A google search of the
> CalculateQuant8Param() in the test yields:
> 
> https://github.com/microsoft/test-suite/blob/master/MultiSource/Applications/JM/lencod/q_matrix.c
> 
> Assuming the above source is similar to the original testcase, it looks
> like both "j" and "present" were initialized before use, so our testcase
> just got reduced a bit too far.
> 
> I tried to build the offending commit to see if my adjustments to the
> test still caused it to fail:
> 
> commit e1449456c0a88f5b3122db5452f7e91f5a9535f6 (HEAD -> master)
> Author: Sebastian Pop 
> Date:   Wed May 26 16:46:59 2010 +
> 
>Reorganize the analysis of basic block predication.
> 
> ...but alas it no longer builds with a recent compiler.  Perhaps
> someone has a ./cc1 of that revision around to verify?
> 
> OK?

Ok


> gcc/testsuite/ChangeLog:
> 
>* gcc.dg/tree-ssa/pr44306.c: Remove undefined behavior.
> ---
> gcc/testsuite/gcc.dg/tree-ssa/pr44306.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
> index 1ea04ce3a98..d322fe048b5 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
> @@ -8,10 +8,10 @@ int LevelScale8x8Luma_Inter[6][8][8];
> int InvLevelScale8x8Luma_Intra[6][8][8];
> int InvLevelScale8x8Luma_Inter[6][8][8];
> short UseDefaultScalingMatrix8x8Flag[2];
> -void CalculateQuant8Param()
> +int present[2];
> +void CalculateQuant8Param(int j)
> {
> - int i, j, k, temp;
> - int present[2];
> + int i, k, temp;
>  for(k=0; j<8; j++)
>for(i=0; i<8; i++)
>  {
> -- 
> 2.41.0
> 


[Bug target/111591] ppc64be: miscompilation with -mstrict-align / -O3

2023-09-25 Thread malat at debian dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111591

Mathieu Malaterre  changed:

   What|Removed |Added

  Known to work||11.4.0

--- Comment #5 from Mathieu Malaterre  ---
(In reply to Mathieu Malaterre from comment #3)
> I can make the upstream code fails using g++-11 / g++-12 version
> (Debian/sid).

Nevermind, it seems g++ 11.4.0 can handle the original test case.

[Bug target/111591] ppc64be: miscompilation with -mstrict-align / -O3

2023-09-25 Thread malat at debian dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111591

Mathieu Malaterre  changed:

   What|Removed |Added

  Known to work||10.5.0

--- Comment #4 from Mathieu Malaterre  ---
g++-10 seems to handle -O3/-mstrict-align

[Bug middle-end/109967] [11/12/13/14 Regression] Wrong code at -O2 on x86_64-linux-gnu

2023-09-25 Thread shaohua.li at inf dot ethz.ch via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109967

--- Comment #7 from Shaohua Li  ---
This test case does not reproduce anymore on the current trunk. Maybe one of
the recent fixes fixed the underlying issue as well.

[Bug modula2/111530] Unable to build GM2 standard library on BSD due to a `getopt_long_only' GNU extension dependency

2023-09-25 Thread gaius at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111530

Gaius Mulley  changed:

   What|Removed |Added

 Status|UNCONFIRMED |ASSIGNED
 Ever confirmed|0   |1
   Last reconfirmed||2023-09-25

--- Comment #1 from Gaius Mulley  ---
Many thanks for the bug report and hints on how to fix it.

[Bug c++/111592] New: ICE on expanding argument pack into variadic constructor

2023-09-25 Thread yankel-pro at scialom dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111592

Bug ID: 111592
   Summary: ICE on expanding argument pack into variadic
constructor
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: yankel-pro at scialom dot org
  Target Milestone: ---

GCC raises an Internal Compiler Error in c_common_parse_file() when
(indirectly, see source) expanding an argument pack into a variadic
constructor.

$ g++ --version
g++
(Compiler-Explorer-Build-gcc-1eb80f78f114f6582c349f75e08b361a0a582091-binutils-2.40)
14.0.0 20230925 (experimental)

$ cat source
struct ignore
{ ignore(...) {} };

template
void
InternalCompilerError(Args... args)
{ ignore{ ignore(args) ... }; }

int
main()
{ InternalCompilerError(0, 0); }

$ g++ -c source
: In instantiation of 'void InternalCompilerError(Args ...) [with Args
= {int, int}]':
:11:24:   required from here
:7:3: internal compiler error: in finish_expr_stmt, at
cp/semantics.cc:910
7 | { ignore{ ignore(args) ... }; }
  |   ^~
0x251c8ee internal_error(char const*, ...)
???:0
0xae8dda fancy_abort(char const*, int, char const*)
???:0
0xcfa8f8 instantiate_decl(tree_node*, bool, bool)
???:0
0xd2dcbb instantiate_pending_templates(int)
???:0
0xbded50 c_parse_final_cleanups()
???:0
0xe149d8 c_common_parse_file()
???:0

Found on Compiler Explorer <https://godbolt.org/z/M788xE44z>.

[Bug target/111591] ppc64be: miscompilation with -mstrict-align / -O3

2023-09-25 Thread malat at debian dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111591

--- Comment #3 from Mathieu Malaterre  ---
I can make the upstream code fails using g++-11 / g++-12 version (Debian/sid).

[PATCH] [testsuite] Remove undefined behavior from gcc.dg/tree-ssa/pr44306.c

2023-09-25 Thread Aldy Hernandez
In auditing the DOM code to see what the scoped tables catch that
ranger doesn't, I've run accross this test, which seems to
have uninitialized reads from both j and present[].

>From the original PR, it looks like this came from a reduction of a
failing test in SPEC's 464.h264ref.  A google search of the
CalculateQuant8Param() in the test yields:

https://github.com/microsoft/test-suite/blob/master/MultiSource/Applications/JM/lencod/q_matrix.c

Assuming the above source is similar to the original testcase, it looks
like both "j" and "present" were initialized before use, so our testcase
just got reduced a bit too far.

I tried to build the offending commit to see if my adjustments to the
test still caused it to fail:

commit e1449456c0a88f5b3122db5452f7e91f5a9535f6 (HEAD -> master)
Author: Sebastian Pop 
Date:   Wed May 26 16:46:59 2010 +

Reorganize the analysis of basic block predication.

...but alas it no longer builds with a recent compiler.  Perhaps
someone has a ./cc1 of that revision around to verify?

OK?

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr44306.c: Remove undefined behavior.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr44306.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
index 1ea04ce3a98..d322fe048b5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
@@ -8,10 +8,10 @@ int LevelScale8x8Luma_Inter[6][8][8];
 int InvLevelScale8x8Luma_Intra[6][8][8];
 int InvLevelScale8x8Luma_Inter[6][8][8];
 short UseDefaultScalingMatrix8x8Flag[2];
-void CalculateQuant8Param()
+int present[2];
+void CalculateQuant8Param(int j)
 {
- int i, j, k, temp;
- int present[2];
+ int i, k, temp;
  for(k=0; j<8; j++)
for(i=0; i<8; i++)
  {
-- 
2.41.0



Re: [PATCH] Always generate else-block in gimplify

2023-09-25 Thread Jørgen Kvalsvik

On 25/09/2023 19:51, Richard Biener wrote:

On Sun, Sep 24, 2023 at 3:09 PM Jørgen Kvalsvik  wrote:


This is a request for feedback and a proof-of-concept, not something I
intend to merge as-is.  It would be nice if gcc, maybe just under some
circumstances, always generated an else-block for coverage purposes.

I am working on the MC/DC support by CFG analysis for a while
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621449.html and have
ironed out a lot of problems. The last problem I know about, which is
impossible to actually fix right now, is the "fusing" of nested ifs.
Here is an example:

 if (a) if (b) if (c) { ... } // 3 conditions, 6 outcomes
 if (a && b && c) { ... } // 3 conditions, 6 outcomes

These form isomorphic CFGs which means there is no way for my algorithm
to distinguish them. This is sort-of acceptable since the coverage
measurements more accurately measure the semantics (and not the syntax),
but this also happens when there is code in-between the nesting:

 if (a) // measures to 2 conditions, 4 outcomes
 {
 a += b * 10;
 b -= a + 2;
 if (b)
 {
 ...
 }
 }

You would expect this to be measured as:

 if (a) // 1 condition, 2 outcomes
 {
 a += b * 10;
 b -= a + 2;
 if (b) // 1 condition, 2 outcomes
 {
 ...
 }
 }

The source of the problem is the missing (or empty) else block, as the
algorithm uses the outcome (then/else) edges to determine the limits of
expressions. If, however, the else blocks are generated, the conditions
are counted as you would expect.

So I have a few questions:

1. Is something like this even acceptable? The semantics of the program
should not change, assuming the else-block only exists but is without
significant behavior. It will only be generated if there is no
explicit else in source.
2. Should this only be generated when necessary (e.g. under condition
coverage? No optimization?)
3. I used a simple int-init { int __mcdc_barrier = 0; } but there might
be better contents for the block that does not add anything
operationally. I am not very familiar with this part of gcc and would
like to see someting better. Any suggestions?


Can you in theory handle this by splitting the 'else' edge before
coverage instrumentation rather than using a stmt inserted during
gimplification?
I don't think so. By the time we get to the instrumentation we do not 
know in if the false edge is to a proper else. The simplest example is 
really:


if (a) if (b) if (c) { ... }
if (a && b && c) { ... }

And the dot representation for both graphs:

digraph {
subgraph cluster_ifs {
label = "ifs";
A0 -> A2 [label="fallthru "];
A2 -> A3 [label="true "];
A2 -> A6 [label="false "];
A3 -> A4 [label="true "];
A3 -> A6 [label="false "];
A6 -> A7 [label="fallthru "];
A7 -> A1 [label=""];
A4 -> A5 [label="true "];
A4 -> A6 [label="false "];
A5 -> A7 [label="fallthru "];
}

subgraph cluster_and {
label = "and";
B0 -> B2 [label="fallthru "];
B2 -> B3 [label="true "];
B2 -> B6 [label="false "];
B3 -> B4 [label="true "];
B3 -> B6 [label="false "];
B6 -> B7 [label="fallthru "];
B7 -> B1 [label=""];
B4 -> B5 [label="true "];
B4 -> B6 [label="false "];
B5 -> B7 [label="fallthru "];
}
} 




The CFGs are identical, so there is no way to recover the else block at 
this stage.


Now, it might be possible to do or recover this in other phases than the 
gimplify, and I am very open for suggestions to where and how.


PS. my patch (maybe unsurprisingly) breaks a bunch of tests, so it is 
obviously not fit as-is.


Thanks,
Jørgen




---
  gcc/gimplify.cc | 8 
  1 file changed, 8 insertions(+)

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index ade6e335da7..43af38df742 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -4370,6 +4370,14 @@ gimplify_cond_expr (tree *expr_p, gimple_seq *pre_p, 
fallback_t fallback)
enum tree_code pred_code;
gimple_seq seq = NULL;

+  if (TREE_OPERAND (expr, 2) == NULL_TREE)
+  {
+  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier
+   ("__mcdc_barrier"), integer_type_node);
+  tree val = build_int_cst (integer_type_node, 0);
+  TREE_OPERAND (expr, 2) = build2 (INIT_EXPR, TREE_TYPE (var), var, val);
+  }
+
/* If this COND_EXPR has a value, copy the values into a temporary within
   the arms.  */
if (!VOID_TYPE_P (type))
--
2.30.2





[Bug target/111591] ppc64be: miscompilation with -mstrict-align / -O3

2023-09-25 Thread rguenth at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111591

Richard Biener  changed:

   What|Removed |Added

   Keywords||needs-bisection

--- Comment #2 from Richard Biener  ---
does it work with older GCC?

[Bug target/111591] ppc64be: miscompilation with -mstrict-align / -O3

2023-09-25 Thread malat at debian dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111591

--- Comment #1 from Mathieu Malaterre  ---
Created attachment 55989
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=55989=edit
cvise reduced test case

% g++ -std=c++11 -o works -DHWY_COMPILE_ONLY_EMU128 -DHWY_BROKEN_EMU128=0
-maltivec -mcpu=power8  -g -O3 alt.cc  -Wall -Wextra -Werror -Wfatal-errors

% g++ -std=c++11 -o fails -DHWY_COMPILE_ONLY_EMU128 -DHWY_BROKEN_EMU128=0
-maltivec -mcpu=power8 -mstrict-align -g -O3 alt.cc  -Wall -Wextra -Werror
-Wfatal-errors

should give:

% ./works
-> success

but:

% ./fails 
fails: alt.cc:395: void hwy::detail::AssertArrayEqual(const TypeInfo&, const
void*, const void*, size_t, const char*, const char*, int): Assertion
`memcmp(a, b, c * ti.sizeof_t) == 0' failed.
zsh: abort  ./fails

[Bug target/111591] New: ppc64be: miscompilation with -mstrict-align / -O3

2023-09-25 Thread malat at debian dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111591

Bug ID: 111591
   Summary: ppc64be: miscompilation with -mstrict-align / -O3
   Product: gcc
   Version: 13.2.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: target
  Assignee: unassigned at gcc dot gnu.org
  Reporter: malat at debian dot org
  Target Milestone: ---

I am seeing a regression in highway unit test on ppc64be when using
-mstrict-align / -O3

454/530 Test #454:
HwyWidenMulTestGroup/HwyWidenMulTest.TestAllSatWidenMulPairwiseAdd/EMU128  #
GetParam() = 2305843009213693952 .Subprocess aborted***Exception:  
0.00 sec
Running main() from ./googletest/src/gtest_main.cc
Note: Google Test filter =
HwyWidenMulTestGroup/HwyWidenMulTest.TestAllSatWidenMulPairwiseAdd/EMU128
[==] Running 1 test from 1 test suite.
[--] Global test environment set-up.
[--] 1 test from HwyWidenMulTestGroup/HwyWidenMulTest
[ RUN  ]
HwyWidenMulTestGroup/HwyWidenMulTest.TestAllSatWidenMulPairwiseAdd/EMU128


i16x4 expect [0+ ->]:
  0x7FFF,0x7FFF,0x7FFF,0x7FFF,
i16x4 actual [0+ ->]:
  0x7FFF,0x01A5,0x7FFF,0x7FFF,
Abort at ./hwy/tests/widen_mul_test.cc:205: EMU128, i16x4 lane 1 mismatch:
expected '0x7FFF', got '0x01A5'.



ref:
https://buildd.debian.org/status/fetch.php?pkg=highway=ppc64=1.0.8%7Egit20230918.1e3a3d7-4=1695113957=0

[Bug c/111590] New: RISC-V: Multiple ICE in gfortran regression with 'V' Extension enabled

2023-09-25 Thread juzhe.zhong at rivai dot ai via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111590

Bug ID: 111590
   Summary: RISC-V: Multiple ICE in gfortran regression with 'V'
Extension enabled
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c
  Assignee: unassigned at gcc dot gnu.org
  Reporter: juzhe.zhong at rivai dot ai
  Target Milestone: ---

FAIL: gfortran.dg/assumed_rank_24.f90   -O2  (internal compiler error: in
smallest_mode_for_size, at stor-layout.cc:356)
FAIL: gfortran.dg/assumed_rank_24.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/assumed_rank_24.f90   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  (internal compiler error: in
smallest_mode_for_size, at stor-layout.cc:356)
FAIL: gfortran.dg/assumed_rank_24.f90   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  (test for excess errors)
FAIL: gfortran.dg/assumed_rank_24.f90   -O3 -g  (internal compiler error: in
smallest_mode_for_size, at stor-layout.cc:356)
FAIL: gfortran.dg/assumed_rank_24.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/class_to_type_1.f03   -O2  (internal compiler error: in
smallest_mode_for_size, at stor-layout.cc:356)
FAIL: gfortran.dg/class_to_type_1.f03   -O2  (test for excess errors)
FAIL: gfortran.dg/class_to_type_1.f03   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  (internal compiler error: in
smallest_mode_for_size, at stor-layout.cc:356)
FAIL: gfortran.dg/class_to_type_1.f03   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  (test for excess errors)
FAIL: gfortran.dg/class_to_type_1.f03   -O3 -g  (internal compiler error: in
smallest_mode_for_size, at stor-layout.cc:356)
FAIL: gfortran.dg/class_to_type_1.f03   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/class_array_4.f03   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  execution test
FAIL: gfortran.dg/cshift_bounds_4.f90   -O2  (internal compiler error: in
smallest_mode_for_size, at stor-layout.cc:356)
FAIL: gfortran.dg/cshift_bounds_4.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/cshift_bounds_4.f90   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  (internal compiler error: in
smallest_mode_for_size, at stor-layout.cc:356)
FAIL: gfortran.dg/cshift_bounds_4.f90   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  (test for excess errors)
FAIL: gfortran.dg/cshift_bounds_4.f90   -O3 -g  (internal compiler error: in
smallest_mode_for_size, at stor-layout.cc:356)
FAIL: gfortran.dg/cshift_bounds_4.f90   -O3 -g  (test for excess errors)

One of the case:

program main
  integer, dimension(:,:), allocatable :: a, b
  integer, dimension(:), allocatable :: sh
  allocate (a(2,2))
  allocate (b(2,2))
  allocate (sh(3))
  a = 1
  b = cshift(a,sh)
end program main

[Bug target/109166] Built-in __atomic_test_and_set does not seem to be atomic on ARMv4T

2023-09-25 Thread rearnsha at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109166

Richard Earnshaw  changed:

   What|Removed |Added

 Resolution|--- |WONTFIX
 Status|NEW |RESOLVED

--- Comment #8 from Richard Earnshaw  ---
I'm going to close this as WONTFIX.

There are several reasons for this.

There's no SWPH operation, so it's impossible to generalize atomic operations
for all basic data types.  It's not possible to synthesize a 16-bit atomic type
with either SWP or SWPB.

There's no support in Thumb state for SWP[B].

The instruction was removed in later versions of the architecture, which makes
code non-portable.

Finally, Armv4, which dates to around 1995, is essentially in maintenance only
mode and this is really a new feature request.  In fact, I don't think we'd
really want to add new features for anything before Armv7 these days (even that
is more than 10 years old).

Re: [PATCH] aarch64: Fine-grained ldp and stp policies with test-cases.

2023-09-25 Thread Manos Anagnostakis
Thanks for the feedback, Kyrill.

I'll resend it as a V3. I believe you have also checked V2 containing just
a small test adjustment.

Manos Anagnostakis | Compiler Engineer
| E: manos.anagnosta...@vrull.eu

VRULL GmbH | Beatrixgasse 32 1030 Vienna | W: www.vrull.eu

Στις Δευ 25 Σεπ 2023, 13:59 ο χρήστης Kyrylo Tkachov 
έγραψε:

> Hi Manos,
>
> Apologies for the long delay.
>
> > -Original Message-
> > From: Manos Anagnostakis 
> > Sent: Friday, August 18, 2023 8:50 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Kyrylo Tkachov ; Philipp Tomsich
> > ; Manos Anagnostakis
> > 
> > Subject: [PATCH] aarch64: Fine-grained ldp and stp policies with
> test-cases.
> >
> > This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> > to provide the requested behaviour for handling ldp and stp:
> >
> >   /* Allow the tuning structure to disable LDP instruction formation
> >  from combining instructions (e.g., in peephole2).
> >  TODO: Implement fine-grained tuning control for LDP and STP:
> >1. control policies for load and store separately;
> >2. support the following policies:
> >   - default (use what is in the tuning structure)
> >   - always
> >   - never
> >   - aligned (only if the compiler can prove that the
> > load will be aligned to 2 * element_size)  */
> >
> > It provides two new and concrete command-line options -mldp-policy and -
> > mstp-policy
> > to give the ability to control load and store policies seperately as
> > stated in part 1 of the TODO.
> >
> > The accepted values for both options are:
> > - default: Use the ldp/stp policy defined in the corresponding tuning
> >   structure.
> > - always: Emit ldp/stp regardless of alignment.
> > - never: Do not emit ldp/stp.
> > - aligned: In order to emit ldp/stp, first check if the load/store will
> >   be aligned to 2 * element_size.
> >
> > gcc/ChangeLog:
> > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> >   appropriate enums for the policies.
> > * config/aarch64/aarch64-tuning-flags.def
> >   (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> >   options.
> > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> >   function to parse ldp-policy option.
> > (aarch64_parse_stp_policy): New function to parse stp-policy
> option.
> > (aarch64_override_options_internal): Call parsing functions.
> > (aarch64_operands_ok_for_ldpstp): Add option-value check and
> >   alignment check and remove superseded ones
> > (aarch64_operands_adjust_ok_for_ldpstp): Add option-value check
> and
> >   alignment check and remove superseded ones.
> > * config/aarch64/aarch64.opt: Add options.
> >
> > gcc/testsuite/ChangeLog:
> > * gcc.target/aarch64/ldp_aligned.c: New test.
> > * gcc.target/aarch64/ldp_always.c: New test.
> > * gcc.target/aarch64/ldp_never.c: New test.
> > * gcc.target/aarch64/stp_aligned.c: New test.
> > * gcc.target/aarch64/stp_always.c: New test.
> > * gcc.target/aarch64/stp_never.c: New test.
> >
> > Signed-off-by: Manos Anagnostakis 
> > ---
> >
> >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> >  gcc/config/aarch64/aarch64.cc | 229 ++
> >  gcc/config/aarch64/aarch64.opt|   8 +
> >  .../gcc.target/aarch64/ldp_aligned.c  |  64 +
> >  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  64 +
> >  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  64 +
> >  .../gcc.target/aarch64/stp_aligned.c  |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
> >  10 files changed, 580 insertions(+), 61 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-protos.h
> > b/gcc/config/aarch64/aarch64-protos.h
> > index 70303d6fd95..be1d73490ed 100644
> > --- a/gcc/config/aarch64/aarch64-protos.h
> > +++ b/gcc/config/aarch64/aarch64-protos.h
> > @@ -568,6 +568,30 @@ struct tune_params
> >/* Place prefetch struct pointer at the end to enable type checking
> >   errors when tune_params misses elements (e.g., from erroneous
> merges).
> > */
> >const struct cpu_prefetch_tune *prefetch;
> > +/* An enum specifying how to handle load pairs using a fine-grained
> policy:
> > +   - LDP_POLICY_ALIGNED: Emit ldp if the source pointer is aligned
> > 

Re: On a Plane During Tomorrow's RISC-V GCC Patchwork Meeting

2023-09-25 Thread Jeff Law




On 9/25/23 04:18, Palmer Dabbelt wrote:

On Mon, 18 Sep 2023 15:13:04 PDT (-0700), Vineet Gupta wrote:

On 9/18/23 09:11, Jeff Law wrote:



On 9/18/23 09:24, Kito Cheng wrote:

I may missed that one time too, not on plane yet, but need to go bed
earlier due to my flight is in next day early morning...

I'm unavailable as well, though I don't get on a plane until Wednesday
evening.


This is one meeting I really look forward to :-)
I'll be on a plane Wednesday evening as  well - see you all soon.


Looks like I'll also be traveling for this week's meeting, so I'll have 
to skip again.
I ran into Vineet at Heathrow and we concluded that we were going to 
skip tomorrow :-)


jeff


RE: [PATCH] aarch64: Fine-grained ldp and stp policies with test-cases.

2023-09-25 Thread Kyrylo Tkachov
Hi Manos,

Apologies for the long delay.

> -Original Message-
> From: Manos Anagnostakis 
> Sent: Friday, August 18, 2023 8:50 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov ; Philipp Tomsich
> ; Manos Anagnostakis
> 
> Subject: [PATCH] aarch64: Fine-grained ldp and stp policies with test-cases.
> 
> This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> to provide the requested behaviour for handling ldp and stp:
> 
>   /* Allow the tuning structure to disable LDP instruction formation
>  from combining instructions (e.g., in peephole2).
>  TODO: Implement fine-grained tuning control for LDP and STP:
>1. control policies for load and store separately;
>2. support the following policies:
>   - default (use what is in the tuning structure)
>   - always
>   - never
>   - aligned (only if the compiler can prove that the
> load will be aligned to 2 * element_size)  */
> 
> It provides two new and concrete command-line options -mldp-policy and -
> mstp-policy
> to give the ability to control load and store policies seperately as
> stated in part 1 of the TODO.
> 
> The accepted values for both options are:
> - default: Use the ldp/stp policy defined in the corresponding tuning
>   structure.
> - always: Emit ldp/stp regardless of alignment.
> - never: Do not emit ldp/stp.
> - aligned: In order to emit ldp/stp, first check if the load/store will
>   be aligned to 2 * element_size.
> 
> gcc/ChangeLog:
> * config/aarch64/aarch64-protos.h (struct tune_params): Add
>   appropriate enums for the policies.
> * config/aarch64/aarch64-tuning-flags.def
>   (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
>   options.
> * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
>   function to parse ldp-policy option.
> (aarch64_parse_stp_policy): New function to parse stp-policy option.
> (aarch64_override_options_internal): Call parsing functions.
> (aarch64_operands_ok_for_ldpstp): Add option-value check and
>   alignment check and remove superseded ones
> (aarch64_operands_adjust_ok_for_ldpstp): Add option-value check and
>   alignment check and remove superseded ones.
> * config/aarch64/aarch64.opt: Add options.
> 
> gcc/testsuite/ChangeLog:
> * gcc.target/aarch64/ldp_aligned.c: New test.
> * gcc.target/aarch64/ldp_always.c: New test.
> * gcc.target/aarch64/ldp_never.c: New test.
> * gcc.target/aarch64/stp_aligned.c: New test.
> * gcc.target/aarch64/stp_always.c: New test.
> * gcc.target/aarch64/stp_never.c: New test.
> 
> Signed-off-by: Manos Anagnostakis 
> ---
> 
>  gcc/config/aarch64/aarch64-protos.h   |  24 ++
>  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
>  gcc/config/aarch64/aarch64.cc | 229 ++
>  gcc/config/aarch64/aarch64.opt|   8 +
>  .../gcc.target/aarch64/ldp_aligned.c  |  64 +
>  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  64 +
>  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  64 +
>  .../gcc.target/aarch64/stp_aligned.c  |  60 +
>  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
>  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
>  10 files changed, 580 insertions(+), 61 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h
> b/gcc/config/aarch64/aarch64-protos.h
> index 70303d6fd95..be1d73490ed 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -568,6 +568,30 @@ struct tune_params
>/* Place prefetch struct pointer at the end to enable type checking
>   errors when tune_params misses elements (e.g., from erroneous merges).
> */
>const struct cpu_prefetch_tune *prefetch;
> +/* An enum specifying how to handle load pairs using a fine-grained policy:
> +   - LDP_POLICY_ALIGNED: Emit ldp if the source pointer is aligned
> +   to at least double the alignment of the type.
> +   - LDP_POLICY_ALWAYS: Emit ldp regardless of alignment.
> +   - LDP_POLICY_NEVER: Do not emit ldp.  */
> +
> +  enum aarch64_ldp_policy_model
> +  {
> +LDP_POLICY_ALIGNED,
> +LDP_POLICY_ALWAYS,
> +LDP_POLICY_NEVER
> +  } ldp_policy_model;
> +/* An enum specifying how to handle store pairs using a fine-grained policy:
> +   - STP_POLICY_ALIGNED: Emit stp if the source pointer is aligned
> +   to at least double the alignment of the type.
> +   - 

Re: PING^5: [PATCH] rtl-optimization/110939 Really fix narrow comparison of memory and constant

2023-09-25 Thread Eric Botcazou
> This is why I got a bit uncertain and hoped to get some feedback whether
> my intuition is correct or not.  Meanwhile I also found a comment in
> the internals book at "14.7 Constant Expression Types" where we have:
> 
>"Constants generated for modes with fewer bits than in HOST_WIDE_INT
> must be sign extended to full width (e.g., with gen_int_mode).
> [...]
> Note however that values are neither inherently signed nor
> inherently unsigned; where necessary, signedness is determined by
> the rtl operation instead."
> 
> At least this and the assert statement document that the normal form of
> a CONST_INT is kind of special w.r.t. unsigned integers.  Is there
> anyone who can shed some light on _why_ such a normal form was chosen?

In RTL integral values have no sign, they just represent a given pattern of 
bits so, in order to have a 1-to-1 mapping, you need to choose a canonical 
form.  The signed form is probably more natural and, since CONST_INTs have no 
mode, the same objects are used for e.g. QImode and HImode, which means that 
you need to sign-extend the bit pattern.

-- 
Eric Botcazou




Re: [PATCH] Always generate else-block in gimplify

2023-09-25 Thread Richard Biener
On Sun, Sep 24, 2023 at 3:09 PM Jørgen Kvalsvik  wrote:
>
> This is a request for feedback and a proof-of-concept, not something I
> intend to merge as-is.  It would be nice if gcc, maybe just under some
> circumstances, always generated an else-block for coverage purposes.
>
> I am working on the MC/DC support by CFG analysis for a while
> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621449.html and have
> ironed out a lot of problems. The last problem I know about, which is
> impossible to actually fix right now, is the "fusing" of nested ifs.
> Here is an example:
>
> if (a) if (b) if (c) { ... } // 3 conditions, 6 outcomes
> if (a && b && c) { ... } // 3 conditions, 6 outcomes
>
> These form isomorphic CFGs which means there is no way for my algorithm
> to distinguish them. This is sort-of acceptable since the coverage
> measurements more accurately measure the semantics (and not the syntax),
> but this also happens when there is code in-between the nesting:
>
> if (a) // measures to 2 conditions, 4 outcomes
> {
> a += b * 10;
> b -= a + 2;
> if (b)
> {
> ...
> }
> }
>
> You would expect this to be measured as:
>
> if (a) // 1 condition, 2 outcomes
> {
> a += b * 10;
> b -= a + 2;
> if (b) // 1 condition, 2 outcomes
> {
> ...
> }
> }
>
> The source of the problem is the missing (or empty) else block, as the
> algorithm uses the outcome (then/else) edges to determine the limits of
> expressions. If, however, the else blocks are generated, the conditions
> are counted as you would expect.
>
> So I have a few questions:
>
> 1. Is something like this even acceptable? The semantics of the program
>should not change, assuming the else-block only exists but is without
>significant behavior. It will only be generated if there is no
>explicit else in source.
> 2. Should this only be generated when necessary (e.g. under condition
>coverage? No optimization?)
> 3. I used a simple int-init { int __mcdc_barrier = 0; } but there might
>be better contents for the block that does not add anything
>operationally. I am not very familiar with this part of gcc and would
>like to see someting better. Any suggestions?

Can you in theory handle this by splitting the 'else' edge before
coverage instrumentation rather than using a stmt inserted during
gimplification?

> ---
>  gcc/gimplify.cc | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
> index ade6e335da7..43af38df742 100644
> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -4370,6 +4370,14 @@ gimplify_cond_expr (tree *expr_p, gimple_seq *pre_p, 
> fallback_t fallback)
>enum tree_code pred_code;
>gimple_seq seq = NULL;
>
> +  if (TREE_OPERAND (expr, 2) == NULL_TREE)
> +  {
> +  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier
> +   ("__mcdc_barrier"), integer_type_node);
> +  tree val = build_int_cst (integer_type_node, 0);
> +  TREE_OPERAND (expr, 2) = build2 (INIT_EXPR, TREE_TYPE (var), var, val);
> +  }
> +
>/* If this COND_EXPR has a value, copy the values into a temporary within
>   the arms.  */
>if (!VOID_TYPE_P (type))
> --
> 2.30.2
>


[Bug target/111500] [arm-none-eabi-gcc] / suboptimal optimization / subs followed by cmp (et alii)

2023-09-25 Thread cptarse-luke at yahoo dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111500

Luke  changed:

   What|Removed |Added

 Status|WAITING |RESOLVED
 Resolution|--- |DUPLICATE

--- Comment #8 from Luke  ---


*** This bug has been marked as a duplicate of bug 104773 ***

[Bug rtl-optimization/104773] compare with 1 not merged with subtract 1

2023-09-25 Thread cptarse-luke at yahoo dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104773

Luke  changed:

   What|Removed |Added

 CC||cptarse-luke at yahoo dot com

--- Comment #3 from Luke  ---
*** Bug 111500 has been marked as a duplicate of this bug. ***

[Bug target/111522] Different code path for static initialization with flto

2023-09-25 Thread malat at debian dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111522

--- Comment #10 from Mathieu Malaterre  ---
for reference:

% c++ --verbose  -O2 -flto   base2.cc  && ./a.out
Using built-in specs.
COLLECT_GCC=c++
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/powerpc64le-linux-gnu/13/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none
OFFLOAD_TARGET_DEFAULT=1
Target: powerpc64le-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 13.2.0-4'
--with-bugurl=file:///usr/share/doc/gcc-13/README.Bugs
--enable-languages=c,ada,c++,go,d,fortran,objc,obj-c++,m2 --prefix=/usr
--with-gcc-major-version-only --program-suffix=-13
--program-prefix=powerpc64le-linux-gnu- --enable-shared
--enable-linker-build-id --libexecdir=/usr/libexec --without-included-gettext
--enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap
--enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object --enable-plugin
--enable-default-pie --with-system-zlib --enable-libphobos-checking=release
--with-target-system-zlib=auto --with-libphobos-druntime-only=yes
--enable-objc-gc=auto --enable-secureplt --enable-targets=powerpcle-linux
--disable-multilib --enable-multiarch --disable-werror --with-long-double-128
--enable-offload-targets=nvptx-none=/build/reproducible-path/gcc-13-13.2.0/debian/tmp-nvptx/usr
--enable-offload-defaulted --without-cuda-driver --enable-checking=release
--build=powerpc64le-linux-gnu --host=powerpc64le-linux-gnu
--target=powerpc64le-linux-gnu --with-build-config=bootstrap-lto-lean
--enable-link-serialization=4
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 13.2.0 (Debian 13.2.0-4) 
COLLECT_GCC_OPTIONS='-v' '-O2' '-flto' '-shared-libgcc' '-dumpdir' 'a-'
 /usr/libexec/gcc/powerpc64le-linux-gnu/13/cc1plus -quiet -v -imultiarch
powerpc64le-linux-gnu -D_GNU_SOURCE base2.cc -msecure-plt -quiet -dumpdir a-
-dumpbase base2.cc -dumpbase-ext .cc -O2 -version -flto
-fasynchronous-unwind-tables -o /tmp/cc1cimSD.s
GNU C++17 (Debian 13.2.0-4) version 13.2.0 (powerpc64le-linux-gnu)
compiled by GNU C version 13.2.0, GMP version 6.3.0, MPFR version
4.2.1, MPC version 1.3.1, isl version isl-0.26-GMP

GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
ignoring nonexistent directory
"/usr/lib/gcc/powerpc64le-linux-gnu/13/../../../../include/powerpc64-linux-gnu/c++/13"
ignoring nonexistent directory "/usr/local/include/powerpc64le-linux-gnu"
ignoring nonexistent directory
"/usr/lib/gcc/powerpc64le-linux-gnu/13/include-fixed/powerpc64le-linux-gnu"
ignoring nonexistent directory
"/usr/lib/gcc/powerpc64le-linux-gnu/13/include-fixed"
ignoring nonexistent directory
"/usr/lib/gcc/powerpc64le-linux-gnu/13/../../../../powerpc64le-linux-gnu/include"
#include "..." search starts here:
#include <...> search starts here:
 /usr/include/c++/13
 /usr/include/powerpc64le-linux-gnu/c++/13
 /usr/include/c++/13/backward
 /usr/lib/gcc/powerpc64le-linux-gnu/13/include
 /usr/local/include
 /usr/include/powerpc64le-linux-gnu
 /usr/include
End of search list.
Compiler executable checksum: 403ce0768541423839c6b7d8fd9dfeff
COLLECT_GCC_OPTIONS='-v' '-O2' '-flto' '-shared-libgcc' '-dumpdir' 'a-'
 as -v -a64 -mpower8 -many -mlittle -o /tmp/ccFzBgtQ.o /tmp/cc1cimSD.s
GNU assembler version 2.41 (powerpc64le-linux-gnu) using BFD version (GNU
Binutils for Debian) 2.41
COMPILER_PATH=/usr/libexec/gcc/powerpc64le-linux-gnu/13/:/usr/libexec/gcc/powerpc64le-linux-gnu/13/:/usr/libexec/gcc/powerpc64le-linux-gnu/:/usr/lib/gcc/powerpc64le-linux-gnu/13/:/usr/lib/gcc/powerpc64le-linux-gnu/
LIBRARY_PATH=/usr/lib/gcc/powerpc64le-linux-gnu/13/:/usr/lib/gcc/powerpc64le-linux-gnu/13/../../../powerpc64le-linux-gnu/:/usr/lib/gcc/powerpc64le-linux-gnu/13/../../../../lib/:/lib/powerpc64le-linux-gnu/:/lib/../lib/:/usr/lib/powerpc64le-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/powerpc64le-linux-gnu/13/../../../:/lib/:/usr/lib/
COLLECT_GCC_OPTIONS='-v' '-O2' '-flto' '-shared-libgcc' '-dumpdir' 'a.'
 /usr/libexec/gcc/powerpc64le-linux-gnu/13/collect2 -plugin
/usr/libexec/gcc/powerpc64le-linux-gnu/13/liblto_plugin.so
-plugin-opt=/usr/libexec/gcc/powerpc64le-linux-gnu/13/lto-wrapper
-plugin-opt=-fresolution=/tmp/ccSvdAAw.res -plugin-opt=-pass-through=-lgcc_s
-plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc
-plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -flto
--build-id --eh-frame-hdr -V -m elf64lppc --hash-style=gnu --as-needed
-dynamic-linker /lib64/ld64.so.2 -pie
/usr/lib/gcc/powerpc64le-linux-gnu/13/../../../powerpc64le-linux-gnu/Scrt1.o
/usr/lib/gcc/powerpc64le-linux-gnu/13/../../../powerpc64le-linux-gnu/crti.o
/usr/lib/gcc/powerpc64le-linux-gnu/13/crtbeginS.o
-L/usr/lib/gcc/powerpc64le-linux-gnu/13
-L/usr/lib/gcc/powerpc64le-linux-gnu/13/../../../powerpc64le-linux-gnu
-L/usr/lib/gcc/powerpc64le-linux-gnu/13/../../../../lib
-L/lib/powerpc64le-linux-gnu -L/lib/../lib -L/usr/lib/powerpc64le-linux-gnu
-L/usr/lib/../lib 

[Bug target/111522] Different code path for static initialization with flto

2023-09-25 Thread malat at debian dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111522

--- Comment #9 from Mathieu Malaterre  ---
If you download pr111522.cc from comment #8, you should be able to reproduce
exactly the original upstream issue.

Steps:

% c++ -O2 -flto pr111522.cc  && ./a.out


vs

% c++ -O2 pr111522.cc && ./a.out

[Bug target/111522] Different code path for static initialization with flto

2023-09-25 Thread malat at debian dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111522

--- Comment #8 from Mathieu Malaterre  ---
Created attachment 55988
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=55988=edit
gcc -E -P

[Bug target/111522] Different code path for static initialization with flto

2023-09-25 Thread malat at debian dot org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111522

--- Comment #7 from Mathieu Malaterre  ---
Created attachment 55987
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=55987=edit
gcc -E -P

[Bug target/104611] memcmp/strcmp/strncmp can be optimized when the result is tested for [in]equality with 0 on aarch64

2023-09-25 Thread redbeard0531 at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104611

Mathias Stearn  changed:

   What|Removed |Added

 CC||redbeard0531 at gmail dot com

--- Comment #4 from Mathias Stearn  ---
clang has already been using the optimized memcmp code since v16, even at -O1:
https://www.godbolt.org/z/qEd768TKr. Older versions (at least since v9) were
still branch-free, but via a less optimal sequence of instructions.

GCC's code gets even more ridiculous at 32 bytes, because it does a branch
after every 8-byte compare, while the clang code is fully branch-free (not that
branch-free is always better, but it seems clearly so in this case).

Judging by the codegen, there seems to be three deficiencies in GCC: 1) an
inability to take advantage of the load-pair instructions to load 16-bytes at a
time, and 2) an inability to use ccmp to combine comparisons. 3) using
branching rather than cset to fill the output register. Ideally these could all
be done in the general case by the low level instruction optimizer, but even
getting them special cased for memcmp (and friends) would be an improvement.

[Bug tree-optimization/111563] Missed optimization of LICM

2023-09-25 Thread 652023330028 at smail dot nju.edu.cn via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111563

--- Comment #5 from Yi <652023330028 at smail dot nju.edu.cn> ---
(In reply to Andrew Pinski from comment #3)

> So this is again reassociation with LIM, the same issue as PR 111560.

For this similar code, GCC works as expected:
https://godbolt.org/z/3TaqfeTqb

```c++
extern int var_24;
int t;
void test(int var_2, int var_3, int var_8, int var_10, int var_14) {

for (int i_2 = -3247424; i_2 < 19; i_2 += var_3 + 1056714155) 
{
var_24 += (-(200 / var_10)) + (-var_8);
var_24 += var_14 + var_2;

i_2+=i_2/3;
}
}
```
So it seems that this and PR 111560 may not be due to the same cause.

Because it doesn't seem to be relevant to the statement, "Our re-association
only produces a canonical order within a single expression."



Meanwhile, in Example 2, 'if(var_3)' is actually optimized out of the Loop by
Loop Unswitch. So maybe the rest of the loop should be optimized as expected
like this similar code?

Re: On a Plane During Tomorrow's RISC-V GCC Patchwork Meeting

2023-09-25 Thread Palmer Dabbelt

On Mon, 18 Sep 2023 15:13:04 PDT (-0700), Vineet Gupta wrote:

On 9/18/23 09:11, Jeff Law wrote:



On 9/18/23 09:24, Kito Cheng wrote:

I may missed that one time too, not on plane yet, but need to go bed
earlier due to my flight is in next day early morning...

I'm unavailable as well, though I don't get on a plane until Wednesday
evening.


This is one meeting I really look forward to :-)
I'll be on a plane Wednesday evening as  well - see you all soon.


Looks like I'll also be traveling for this week's meeting, so I'll have 
to skip again.




-Vineet


[Bug libstdc++/111589] New: Use relaxed atomic increment (but not decrement!) in shared_ptr

2023-09-25 Thread redbeard0531 at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111589

Bug ID: 111589
   Summary: Use relaxed atomic increment (but not decrement!) in
shared_ptr
   Product: gcc
   Version: unknown
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: libstdc++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: redbeard0531 at gmail dot com
  Target Milestone: ---

The atomic increment when copying a shared_ptr can be relaxed because it is
never actually used as a synchronization operation. The current thread must
already have sufficient synchronization to access the memory because it can
already deref the pointer. All synchronization is done either via whatever
program-provided code makes the shared_ptr object available to the thread, or
in the atomic decrement (where the decrements to non-zero are releases that
ensure all uses of the object happen before the final decrement to zero
acquires and destroys the object).

As an argument-from-authority, libc++ already is using relaxed for increments
and acq/rel for decements:
https://github.com/llvm/llvm-project/blob/c649fd34e928ad01951cbff298c5c44853dd41dd/libcxx/include/__memory/shared_ptr.h#L101-L121

This will have no impact on x86 where all atomic RMWs are effectively
sequentially consistent, but it will enable the use of ldadd rather than
ldaddal on aarch64, and similar optimizations on other weaker architectures.

Re: [PATCH v2 0/1] Add LoongArch64 support for D frontend

2023-09-25 Thread Yang Yujie
Hi Zixing,

We are also working on a patch series that could pass the libphobos regression 
tests.
Will post this later once all failed items are fixed.

Yujie

On Sun, Sep 24, 2023 at 03:40:32PM -0600, Zixing Liu wrote:
> This patch adds the LoongArch64 support for GCC D frontend.
> 
> The runtime support is submitted as a separate patch here:
> https://github.com/dlang/dmd/pull/15628.
> 
> You can find more information about the LoongArch architecture on this
> website:
> https://loongson.github.io/LoongArch-Documentation/README-EN.html.
> 
> --
> 
> Changes since the last revision of the patch:
> 
> * Corrected copyright years in loongarch-d.cc and loongarch-d.h.
> * Removed changes to the tests, the changes have been rolled into the DMD
>   changes in:
>   
> https://github.com/dlang/dmd/pull/15628/commits/eb84b8a2bc86aa751ad6f472422e8abad63ff500
>   .
> * Removed D_LP32 and D_LP64 bits. Since LoongArch ABIs are somewhat
>   complicated, we may introduce the ABI information in the form of target
>   traits in the future.
> 
> Zixing Liu (1):
> 
>  gcc/config.gcc |  1 +
>  gcc/config/loongarch/loongarch-d.cc| 77 ++
>  gcc/config/loongarch/loongarch-d.h | 26 
>  gcc/config/loongarch/t-loongarch   |  4 ++
>  libphobos/configure.tgt|  3 +
>  libphobos/libdruntime/gcc/sections/elf.d   |  2 +
>  libphobos/libdruntime/gcc/unwind/generic.d |  1 +
>  7 files changed, 114 insertions(+)
>  create mode 100644 gcc/config/loongarch/loongarch-d.cc
>  create mode 100644 gcc/config/loongarch/loongarch-d.h
> 
> -- 
> 2.42.0



[PING] [PATCH] Harmonize headers between both dg-extract-results scripts

2023-09-25 Thread Paul Iannetta
On Mon, Sep 18, 2023 at 08:39:34AM +0200, Paul Iannetta wrote:
> On Thu, Sep 14, 2023 at 04:24:33PM +0200, Paul Iannetta wrote:
> > Hi,
> > 
> > This is a small patch so that both dg-extract-results.py and
> > dg-extract-results.sh share the same header.  In particular, it fixes
> > the fact that the regexp r'^Test Run By (\S+) on (.*)$' was never
> > matched in the python file.
> 
> By the way, the bash script dg-extract-results.sh checks whether
> python is available by invoking python.  However, it seems that the
> policy on newer machines is to not provide python as a symlink (at
> least on Ubuntu 22.04 and above; and RHEL 8).  Therefore, we might
> want to also check against python3 so that the bash script does not
> fail to find python even though it is available.
> 
> Thanks,
> Paul
> 
> 
> > Author: Paul Iannetta 
> > Date:   Thu Sep 14 15:43:58 2023 +0200
> > 
> > Harmonize headers between both dg-extract-results scripts
> > 
> > The header of the python version looked like:
> > Target is ...
> > Host   is ...
> > The header of the bash version looked like:
> > Test run by ... on ...
> > Target is ...
> > 
> > After this change both headers look like:
> > Test run by ... on ...
> > Target is ...
> > Host   is ...
> > 
> > The order of the tests is not the same but since dg-cmp-results.sh it
> > does not matter much.
> > 
> > contrib/ChangeLog:
> > 
> > 2023-09-14  Paul Iannetta  
> > 
> > * dg-extract-results.py: Print the "Test run" line.
> > * dg-extract-results.sh: Print the "Host" line.
> > 
> > diff --git a/contrib/dg-extract-results.py b/contrib/dg-extract-results.py
> > index 30aa68771d4..34da1808c5f 100644
> > --- a/contrib/dg-extract-results.py
> > +++ b/contrib/dg-extract-results.py
> > @@ -113,7 +113,7 @@ class Prog:
> >  # Whether to create .sum rather than .log output.
> >  self.do_sum = True
> >  # Regexps used while parsing.
> > -self.test_run_re = re.compile (r'^Test Run By (\S+) on (.*)$')
> > +self.test_run_re = re.compile (r'^Test run by (\S+) on (.*)$')
> >  self.tool_re = re.compile (r'^\t\t=== (.*) tests ===$')
> >  self.result_re = re.compile (r'^(PASS|XPASS|FAIL|XFAIL|UNRESOLVED'
> >   r'|WARNING|ERROR|UNSUPPORTED|UNTESTED'
> > diff --git a/contrib/dg-extract-results.sh b/contrib/dg-extract-results.sh
> > index ff6c50d029c..57f6fe0e997 100755
> > --- a/contrib/dg-extract-results.sh
> > +++ b/contrib/dg-extract-results.sh
> > @@ -271,7 +271,7 @@ cat $SUM_FILES \
> >  
> >  # Write the begining of the combined summary file.
> >  
> > -head -n 2 $FIRST_SUM
> > +head -n 3 $FIRST_SUM
> >  echo
> >  echo " === $TOOL tests ==="
> >  echo






[Bug libstdc++/111588] New: Provide opt-out of shared_ptr single-threaded optimization

2023-09-25 Thread redbeard0531 at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111588

Bug ID: 111588
   Summary: Provide opt-out of shared_ptr single-threaded
optimization
   Product: gcc
   Version: unknown
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: libstdc++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: redbeard0531 at gmail dot com
  Target Milestone: ---

Right now there is a fast-path for single-threaded programs to avoid the
overhead of atomics in shared_ptr, but there is no equivalent for a program the
knows it is multi-threaded to remove the check and branch. If __GTHREADS is not
defined then no atomic code is emitted.

There are two issues with this: 1) for programs that know they are effectively
always multithreaded they pay for a runtime branch and .text segment bloat for
an optimization that never applies. This may have knock-on effects of making
functions that use shared_ptr less likely to be inlined by pushing them
slightly over the complexity threshold. 2) It invalidates singlethreaded
microbenchmarks of code that uses shared_ptr because the performance of the
code may be very different from when run in the real multithreaded program.

I understand the value of making a fast mode for single-threaded code, and I
can even except having the runtime branch by default, rather than as an opt-in,
when it is unknown if the program will be run with multiple threads. But an
opt-out would be nice to have. If it had to be a gcc-build time option rather
than a #define, that would be acceptable for us since we always use our own
build of gcc, but it seems like a worse option for other users.

FWIW, neither llvm libc++
(https://github.com/llvm/llvm-project/blob/0bfaed8c612705cfa8c5382d26d8089a0a26386b/libcxx/include/__memory/shared_ptr.h#L103-L110)
nor MS-STL
(https://github.com/microsoft/STL/blob/main/stl/inc/memory#L1171-L1173) ever
use runtime branching to detect multithreading.

Re: [PATCH v1] Update check_effective_target_vect_int_mod according to LoongArch SX/ASX capabilities.

2023-09-25 Thread Chenghui Pan
Thanks! I will try to improve it.

On Mon, 2023-09-25 at 17:44 +0800, Xi Ruoyao wrote:
> On Mon, 2023-09-25 at 17:38 +0800, Chenghui Pan wrote:
> > Hi!
> > 
> > After some attemptions, I think we still ne to check
> > "check_effective_target_loongarch_sx" in vect_int_mod. I wrote some
> > temp logics in gcc/testsuite/lib/target-supports.exp like this:
> > 
> > diff --git a/gcc/testsuite/lib/target-supports.exp
> > b/gcc/testsuite/lib/target-supports.exp
> > index 2de41cef2f6..91e1c22a6e1 100644
> > --- a/gcc/testsuite/lib/target-supports.exp
> > +++ b/gcc/testsuite/lib/target-supports.exp
> > @@ -8586,7 +8586,8 @@ proc check_effective_target_vect_int_mod { }
> > {
> >  return [check_cached_effective_target_indexed vect_int_mod {
> >    expr { ([istarget powerpc*-*-*]
> >   && [check_effective_target_has_arch_pwr10])
> > - || [istarget amdgcn-*-*] }}]
> > + || [istarget loongarch*-*-*]
> > + || [istarget amdgcn-*-*] }}]
> >  }
> >  
> >  # Return 1 if the target supports vector even/odd elements
> > extraction,
> > 0 otherwise.
> > @@ -11174,6 +11175,12 @@ proc check_vect_support_and_set_flags { }
> > {
> >     lappend DEFAULT_VECTCFLAGS "--param" "riscv-vector-abi"
> >     set dg-do-what-default compile
> >     }
> > +    } elseif [istarget loongarch*-*-*] {
> > +  if [check_effective_target_loongarch_asx_hw] {
> > + lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx"
> > +  } elseif [check_effective_target_loongarch_sx_hw] {
> > + lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlsx"
> > +  }
> 
> I think we can always enable LASX in DEFAULT_VECTCFLAGS, but set dg-
> do-
> what-default to "run" only if both the hardware and the kernel
> supports
> LASX.  If the kernel or the hardware is not capable we set dg-do-
> what-
> default to "compile".
> 
> >  } else {
> >  return 0
> >  }
> > \* temp impl of sx/asx hw proc *\
> > 
> > And then in make check without --target_board=unix/-mlasx, vect.exp
> > is
> > invoked with expected vector isa options, but pr104992.c failed
> > because
> > it expected result with "vect_int_mod returns 1" but it was
> > compiled
> > without -mlsx/-mlasx. Seems pr104992.c is invoked by gcc.dg/dg.exp,
> > pr104992.c is not affected by DEFAULT_CFLAGS, so we still need to
> > check
> > if LSX/LASX is available in vect_int_mod. 
> > 
> > Other parts of new patch is still WIP.
> 



Re: [PATCH v1] Update check_effective_target_vect_int_mod according to LoongArch SX/ASX capabilities.

2023-09-25 Thread Xi Ruoyao
On Mon, 2023-09-25 at 17:38 +0800, Chenghui Pan wrote:
> Hi!
> 
> After some attemptions, I think we still ne to check
> "check_effective_target_loongarch_sx" in vect_int_mod. I wrote some
> temp logics in gcc/testsuite/lib/target-supports.exp like this:
> 
> diff --git a/gcc/testsuite/lib/target-supports.exp
> b/gcc/testsuite/lib/target-supports.exp
> index 2de41cef2f6..91e1c22a6e1 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -8586,7 +8586,8 @@ proc check_effective_target_vect_int_mod { } {
>  return [check_cached_effective_target_indexed vect_int_mod {
>    expr { ([istarget powerpc*-*-*]
>   && [check_effective_target_has_arch_pwr10])
> - || [istarget amdgcn-*-*] }}]
> + || [istarget loongarch*-*-*]
> + || [istarget amdgcn-*-*] }}]
>  }
>  
>  # Return 1 if the target supports vector even/odd elements extraction,
> 0 otherwise.
> @@ -11174,6 +11175,12 @@ proc check_vect_support_and_set_flags { } {
>     lappend DEFAULT_VECTCFLAGS "--param" "riscv-vector-abi"
>     set dg-do-what-default compile
>     }
> +    } elseif [istarget loongarch*-*-*] {
> +  if [check_effective_target_loongarch_asx_hw] {
> + lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx"
> +  } elseif [check_effective_target_loongarch_sx_hw] {
> + lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlsx"
> +  }

I think we can always enable LASX in DEFAULT_VECTCFLAGS, but set dg-do-
what-default to "run" only if both the hardware and the kernel supports
LASX.  If the kernel or the hardware is not capable we set dg-do-what-
default to "compile".

>  } else {
>  return 0
>  }
> \* temp impl of sx/asx hw proc *\
> 
> And then in make check without --target_board=unix/-mlasx, vect.exp is
> invoked with expected vector isa options, but pr104992.c failed because
> it expected result with "vect_int_mod returns 1" but it was compiled
> without -mlsx/-mlasx. Seems pr104992.c is invoked by gcc.dg/dg.exp,
> pr104992.c is not affected by DEFAULT_CFLAGS, so we still need to check
> if LSX/LASX is available in vect_int_mod. 
> 
> Other parts of new patch is still WIP.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH v1] Update check_effective_target_vect_int_mod according to LoongArch SX/ASX capabilities.

2023-09-25 Thread Chenghui Pan
Hi!

After some attemptions, I think we still ne to check
"check_effective_target_loongarch_sx" in vect_int_mod. I wrote some
temp logics in gcc/testsuite/lib/target-supports.exp like this:

diff --git a/gcc/testsuite/lib/target-supports.exp
b/gcc/testsuite/lib/target-supports.exp
index 2de41cef2f6..91e1c22a6e1 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8586,7 +8586,8 @@ proc check_effective_target_vect_int_mod { } {
 return [check_cached_effective_target_indexed vect_int_mod {
   expr { ([istarget powerpc*-*-*]
  && [check_effective_target_has_arch_pwr10])
- || [istarget amdgcn-*-*] }}]
+ || [istarget loongarch*-*-*]
+ || [istarget amdgcn-*-*] }}]
 }
 
 # Return 1 if the target supports vector even/odd elements extraction,
0 otherwise.
@@ -11174,6 +11175,12 @@ proc check_vect_support_and_set_flags { } {
lappend DEFAULT_VECTCFLAGS "--param" "riscv-vector-abi"
set dg-do-what-default compile
}
+} elseif [istarget loongarch*-*-*] {
+  if [check_effective_target_loongarch_asx_hw] {
+ lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx"
+  } elseif [check_effective_target_loongarch_sx_hw] {
+ lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlsx"
+  }
 } else {
 return 0
 }
\* temp impl of sx/asx hw proc *\

And then in make check without --target_board=unix/-mlasx, vect.exp is
invoked with expected vector isa options, but pr104992.c failed because
it expected result with "vect_int_mod returns 1" but it was compiled
without -mlsx/-mlasx. Seems pr104992.c is invoked by gcc.dg/dg.exp,
pr104992.c is not affected by DEFAULT_CFLAGS, so we still need to check
if LSX/LASX is available in vect_int_mod. 

Other parts of new patch is still WIP.

On Sun, 2023-09-24 at 18:05 +0800, Xi Ruoyao wrote:
> On Wed, 2023-09-20 at 09:15 +0800, Chenghui Pan wrote:
> > LoongArch failed to pass gcc.dg/pr104992.c with -mlsx and -mlasx.
> > This test uses
> > different dg-final directives depending on the vect_int_mod result,
> > LoongArch
> > SX/ASX supports this operations but corresponding description is
> > not defined in
> > target-supports.exp. This patch solves the problem above with some
> > modification in proc check_effective_target_vect_int_mod.
> 
> I think we can just add -mdouble-float -mlasx into DEFAULT_VECTCFLAGS
> and always enable vect_int_mod for LoongArch.  This will make
> vect.exp
> tests automatically run for every "make check" on LoongArch.
> 
> > gcc/testsuite/ChangeLog:
> > 
> > * lib/target-supports.exp: Update
> > check_effective_target_vect_int_mod according to
> > LoongArch SX/ASX capabilities.
> > ---
> >  gcc/testsuite/lib/target-supports.exp | 18 ++
> >  1 file changed, 18 insertions(+)
> > 
> > diff --git a/gcc/testsuite/lib/target-supports.exp
> > b/gcc/testsuite/lib/target-supports.exp
> > index 2de41cef2f6..b253dc578d2 100644
> > --- a/gcc/testsuite/lib/target-supports.exp
> > +++ b/gcc/testsuite/lib/target-supports.exp
> > @@ -8586,6 +8586,8 @@ proc check_effective_target_vect_int_mod { }
> > {
> >  return [check_cached_effective_target_indexed vect_int_mod {
> >    expr { ([istarget powerpc*-*-*]
> >   && [check_effective_target_has_arch_pwr10])
> > +    || ([istarget loongarch*-*-*]
> > +    && [check_effective_target_loongarch_sx])
> >   || [istarget amdgcn-*-*] }}]
> >  }
> >  
> > @@ -12656,6 +12658,22 @@ proc
> > check_effective_target_const_volatile_readonly_section { } {
> >    return 1
> >  }
> >  
> > +proc check_effective_target_loongarch_sx { } {
> > +    return [check_no_compiler_messages loongarch_lsx assembly {
> > +   #if !defined(__loongarch_sx)
> > +   #error "LSX not defined"
> > +   #endif
> > +    }]
> > +}
> > +
> > +proc check_effective_target_loongarch_asx { } {
> > +    return [check_no_compiler_messages loongarch_asx assembly {
> > +   #if !defined(__loongarch_asx)
> > +   #error "LASX not defined"
> > +   #endif
> > +    }]
> > +}
> > +
> >  # Appends necessary Python flags to extra-tool-flags if Python.h
> > is supported.
> >  # Otherwise, modifies dg-do-what.
> >  proc dg-require-python-h { args } {
> 



  1   2   >