date:20230925

RE: [PATCH v1] RISC-V: Rename rounding const fp function for refactor

2023-09-25 Thread Li, Pan2

Committed, thanks Juzhe.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Tuesday, September 26, 2023 11:18 AM
To: Li, Pan2 ; gcc-patches 
Cc: Li, Pan2 ; Wang, Yanzhang ; 
kito.cheng 
Subject: Re: [PATCH v1] RISC-V: Rename rounding const fp function for refactor

LGTM.


juzhe.zh...@rivai.ai

From: pan2.li
Date: 2023-09-26 11:12
To: gcc-patches
CC: juzhe.zhong; 
pan2.li; 
yanzhang.wang; 
kito.cheng
Subject: [PATCH v1] RISC-V: Rename rounding const fp function for refactor
From: Pan Li mailto:pan2...@intel.com>>

The rounding related API shared one const, rename it to avoid
unnecessary redundant code.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (gen_ceil_const_fp): Remove.
(get_fp_rounding_coefficient): Rename.
(gen_floor_const_fp): Remove.
(expand_vec_ceil): Take renamed func.
(expand_vec_floor): Ditto.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>
---
gcc/config/riscv/riscv-v.cc | 13 +++--
1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a1ffefb23f3..9a1df950d58 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3548,7 +3548,7 @@ cmp_lmul_gt_one (machine_mode mode)
   greater than and equal to 4503599627370496.
  */
static rtx
-gen_ceil_const_fp (machine_mode inner_mode)
+get_fp_rounding_coefficient (machine_mode inner_mode)
{
   REAL_VALUE_TYPE real;
@@ -3564,13 +3564,6 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
}
-static rtx
-gen_floor_const_fp (machine_mode inner_mode)
-{
-  /* The floor needs the same floating point const as ceil.  */
-  return gen_ceil_const_fp (inner_mode);
-}
-
static rtx
emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
machine_mode vec_fp_mode)
@@ -3637,7 +3630,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
@@ -3662,7 +3655,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
--
2.34.1

Re: [PATCH v1] RISC-V: Rename rounding const fp function for refactor

2023-09-25 Thread juzhe.zh...@rivai.ai

LGTM.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-09-26 11:12
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Rename rounding const fp function for refactor
From: Pan Li 
 
The rounding related API shared one const, rename it to avoid
unnecessary redundant code.
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (gen_ceil_const_fp): Remove.
(get_fp_rounding_coefficient): Rename.
(gen_floor_const_fp): Remove.
(expand_vec_ceil): Take renamed func.
(expand_vec_floor): Ditto.
 
Signed-off-by: Pan Li 
---
gcc/config/riscv/riscv-v.cc | 13 +++--
1 file changed, 3 insertions(+), 10 deletions(-)
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a1ffefb23f3..9a1df950d58 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3548,7 +3548,7 @@ cmp_lmul_gt_one (machine_mode mode)
   greater than and equal to 4503599627370496.
  */
static rtx
-gen_ceil_const_fp (machine_mode inner_mode)
+get_fp_rounding_coefficient (machine_mode inner_mode)
{
   REAL_VALUE_TYPE real;
@@ -3564,13 +3564,6 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
}
-static rtx
-gen_floor_const_fp (machine_mode inner_mode)
-{
-  /* The floor needs the same floating point const as ceil.  */
-  return gen_ceil_const_fp (inner_mode);
-}
-
static rtx
emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
machine_mode vec_fp_mode)
@@ -3637,7 +3630,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
@@ -3662,7 +3655,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
-- 
2.34.1

[PATCH v1] RISC-V: Rename rounding const fp function for refactor

2023-09-25 Thread pan2 . li

From: Pan Li 

The rounding related API shared one const, rename it to avoid
unnecessary redundant code.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (gen_ceil_const_fp): Remove.
(get_fp_rounding_coefficient): Rename.
(gen_floor_const_fp): Remove.
(expand_vec_ceil): Take renamed func.
(expand_vec_floor): Ditto.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-v.cc | 13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a1ffefb23f3..9a1df950d58 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3548,7 +3548,7 @@ cmp_lmul_gt_one (machine_mode mode)
   greater than and equal to 4503599627370496.
  */
 static rtx
-gen_ceil_const_fp (machine_mode inner_mode)
+get_fp_rounding_coefficient (machine_mode inner_mode)
 {
   REAL_VALUE_TYPE real;
 
@@ -3564,13 +3564,6 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
 }
 
-static rtx
-gen_floor_const_fp (machine_mode inner_mode)
-{
-  /* The floor needs the same floating point const as ceil.  */
-  return gen_ceil_const_fp (inner_mode);
-}
-
 static rtx
 emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
 machine_mode vec_fp_mode)
@@ -3637,7 +3630,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
 
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
 
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
@@ -3662,7 +3655,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode 
vec_fp_mode,
   emit_vec_abs (op_0, op_1, vec_fp_mode);
 
   /* Step-2: Generate the mask on const fp.  */
-  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx const_fp = get_fp_rounding_coefficient (GET_MODE_INNER (vec_fp_mode));
   rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
 
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
-- 
2.34.1

Re: [PATCH] Checking undefined_p before using the vr

2023-09-25 Thread Jiufu Guo



Hi Andrew,

Thanks for your explain! And sorry for later reply.

Andrew MacLeod  writes:

> On 9/14/23 22:07, Jiufu Guo wrote:
>>>
>>> undefined is a perfectly acceptable range.  It can be used to
>>> represent either values which has not been initialized, or more
>>> frequently it identifies values that cannot occur due to
>>> conflicting/unreachable code.  VARYING means it can be any range,
>>> UNDEFINED means this is unusable, so treat it accordingly.  Its
>>> propagated like any other range.
>> "undefined" means the ranger is unusable. So, for this ranger, it
>> seems only "undefined_p ()" can be checked, and it seems no other
>> functions of this ranger can be called.
>
> not at all. It means ranger has determined that there is no valid
> range for the item you are asking about probably due to conflicting
> conditions, which imparts important information about the range.. or
> lack of range :-)
>
> Quite frequently it means you are looking at a block of code that
> ranger knows is unreachable, but a pass of the compiler which removes
> such blocks has not been called yet.. so the awareness imparted is
> that there isn't much point in doing optimizations on it because its
> probably going to get thrown away by a following pass.
>
>>
>> I'm thinking that it may be ok to let "range_of_expr" return false
>> if the "vr" is "undefined_p".  I know this may change the meaning
>> of "range_of_expr" slightly :)
>
> No.  That would be like saying NULL is not a valid value for a
> pointer.  undefined_p has very specific meaning that we use.. it just
> has no type.

Oh, get it.:)

BR,
Jeff (Jiufu Guo)
>
> Andrew

Re: [PATCH v1] RISC-V: Support FP nearbyint auto-vectorization

2023-09-25 Thread juzhe.zh...@rivai.ai

+static rtx
+gen_nearbyint_const_fp (machine_mode inner_mode)
+{
+  /* The nearbyint needs the same floating point const as ceil.  */
+  return gen_ceil_const_fp (inner_mode);
+}
This is redundant.

Also, this is also redundant:
static rtx
gen_floor_const_fp (machine_mode inner_mode)
{
  /* The floor needs the same floating point const as ceil.  */
  return gen_ceil_const_fp (inner_mode);
}

So rename it :
gen_ceil_const_fp (machine_mode inner_mode)

into:
get_fp_rounding_coefficient



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-09-26 10:39
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support FP nearbyint auto-vectorization
From: Pan Li 
 
This patch would like to support auto-vectorization for the
nearbyint API in math.h. It depends on the -ffast-math option.
 
When we would like to call nearbyint/nearbyintf like v2 = nearbyint (v1),
we will convert it into below insns (reference the implementation of llvm).
 
* frflags a5
* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3
* fsflags a5
 
However, the floating point value may not need the cvt as above if
its mantissa is zero. Take single precision floating point as example:
 
Assume we have RTZ rounding mode
 
  ++---+-+
  | raw float  | binary layout | after nearbyint |
  ++---+-+
  | 8388607.5  | 0x4aff| 8388607.0   |
  | 8388608.0  | 0x4b00| 8388608.0   |
  | 8388609.0  | 0x4b01| 8388609.0   |
  ++---+-+
 
All single floating point >= 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.
 
Befor this patch:
math-nearbyint-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw fa0,0(s0)
  addis0,s0,4
  addis1,s1,4
  callnearbyint
  fsw fa0,-4(s1)
  bne s0,s2,.L3
 
After this patch:
  vfabs.v v2,v1
  vmflt.vfv0,v2,fa5
  frflags a7
  vfcvt.x.f.v v4,v1,v0.t
  vfcvt.f.x.v v2,v4,v0.t
  fsflags a7
  vfsgnj.vv   v2,v2,v1
 
Please note VLS mode is also involved in this patch and covered by the
test cases.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (nearbyint2): New pattern.
* config/riscv/riscv-protos.h (enum insn_type): New enum.
(expand_vec_nearbyint): New function decl.
* config/riscv/riscv-v.cc (gen_nearbyint_const_fp): New function impl.
(expand_vec_nearbyint): Ditto.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/unop/test-math.h: Add helper function.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c: New test.
 
Signed-off-by: Pan Li 
---
gcc/config/riscv/autovec.md   | 11 
gcc/config/riscv/riscv-protos.h   |  2 +
gcc/config/riscv/riscv-v.cc   | 36 
.../riscv/rvv/autovec/unop/math-nearbyint-0.c | 20 +++
.../riscv/rvv/autovec/unop/math-nearbyint-1.c | 20 +++
.../riscv/rvv/autovec/unop/math-nearbyint-2.c | 20 +++
.../riscv/rvv/autovec/unop/math-nearbyint-3.c | 22 +++
.../rvv/autovec/unop/math-nearbyint-run-1.c   | 48 +++
.../rvv/autovec/unop/math-nearbyint-run-2.c   | 48 +++
.../riscv/rvv/autovec/unop/test-math.h| 33 +++
.../riscv/rvv/autovec/vls/math-nearbyint-1.c  | 58 +++
11 files changed, 318 insertions(+)
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-1.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-2.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-3.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-1.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-2.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a005e17457e..b47f086f5e6 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2210,6 +2210,7 @@ (define_expand "avg3_ceil"
;; Includes:
;; - ceil/ceilf
;; - floor/floorf
+;; - nearbyint/nearbyintf
;; -
(define_expand "ceil2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2230,3 +2231,13 @@ (define_expand "floor2"
 DONE;
   }
)
+
+(define_expand "nearbyint2"
+  [(match_operand:V_VLSF 0 "register_operand")
+

Ping^2 [PATCH V5 1/4] rs6000: build constant via li;rotldi

2023-09-25 Thread Jiufu Guo

Hi,

Gentle ping...

BR,
Jeff (Jiufu Guo)

Jiufu Guo via Gcc-patches  writes:

> Hi,
>
> Gentle ping...
>
> BR,
> Jeff (Jiufu Guo)
>
> Jiufu Guo  writes:
>
>> Hi,
>>
>> If a constant is possible to be rotated to/from a positive or negative
>> value which "li" can generated, then "li;rotldi" can be used to build
>> the constant.
>>
>> Compare with the previous version:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623528.html
>> This patch just did minor changes to the comments according to previous
>> review.
>>
>> Bootstrap and regtest pass on ppc64{,le}.
>>
>> Is this ok for trunk?
>>
>>
>> BR,
>> Jeff (Jiufu)
>>
>> gcc/ChangeLog:
>>
>>  * config/rs6000/rs6000.cc (can_be_built_by_li_and_rotldi): New function.
>>  (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rotldi.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * gcc.target/powerpc/const-build.c: New test.
>> ---
>>  gcc/config/rs6000/rs6000.cc   | 47 +--
>>  .../gcc.target/powerpc/const-build.c  | 57 +++
>>  2 files changed, 98 insertions(+), 6 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build.c
>>
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index 42f49e4a56b..acc332acc05 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -10258,6 +10258,31 @@ rs6000_emit_set_const (rtx dest, rtx source)
>>return true;
>>  }
>>  
>> +/* Check if value C can be built by 2 instructions: one is 'li', another is
>> +   'rotldi'.
>> +
>> +   If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
>> +   is set to the mask operand of rotldi(rldicl), and return true.
>> +   Return false otherwise.  */
>> +
>> +static bool
>> +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift,
>> +   HOST_WIDE_INT *mask)
>> +{
>> +  /* If C or ~C contains at least 49 successive zeros, then C can be rotated
>> + to/from a positive or negative value that 'li' is able to load.  */
>> +  int n;
>> +  if (can_be_rotated_to_lowbits (c, 15, )
>> +  || can_be_rotated_to_lowbits (~c, 15, ))
>> +{
>> +  *mask = HOST_WIDE_INT_M1;
>> +  *shift = HOST_BITS_PER_WIDE_INT - n;
>> +  return true;
>> +}
>> +
>> +  return false;
>> +}
>> +
>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>> Output insns to set DEST equal to the constant C as a series of
>> lis, ori and shl instructions.  */
>> @@ -10266,15 +10291,14 @@ static void
>>  rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>>  {
>>rtx temp;
>> +  int shift;
>> +  HOST_WIDE_INT mask;
>>HOST_WIDE_INT ud1, ud2, ud3, ud4;
>>  
>>ud1 = c & 0x;
>> -  c = c >> 16;
>> -  ud2 = c & 0x;
>> -  c = c >> 16;
>> -  ud3 = c & 0x;
>> -  c = c >> 16;
>> -  ud4 = c & 0x;
>> +  ud2 = (c >> 16) & 0x;
>> +  ud3 = (c >> 32) & 0x;
>> +  ud4 = (c >> 48) & 0x;
>>  
>>if ((ud4 == 0x && ud3 == 0x && ud2 == 0x && (ud1 & 0x8000))
>>|| (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
>> @@ -10305,6 +10329,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT 
>> c)
>>emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>>   GEN_INT ((ud2 ^ 0x) << 16)));
>>  }
>> +  else if (can_be_built_by_li_and_rotldi (c, , ))
>> +{
>> +  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +  unsigned HOST_WIDE_INT imm = (c | ~mask);
>> +  imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
>> +
>> +  emit_move_insn (temp, GEN_INT (imm));
>> +  if (shift != 0)
>> +temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
>> +  emit_move_insn (dest, temp);
>> +}
>>else if (ud3 == 0 && ud4 == 0)
>>  {
>>temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c 
>> b/gcc/testsuite/gcc.target/powerpc/const-build.c
>> new file mode 100644
>> index 000..69b37e2bb53
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c
>> @@ -0,1 +1,57 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O2 -save-temps" } */
>> +/* { dg-require-effective-target has_arch_ppc64 } */
>> +
>> +/* Verify that two instructions are successfully used to build constants.
>> +   One insn is li, another is rotate: rldicl.  */
>> +
>> +#define NOIPA __attribute__ ((noipa))
>> +
>> +struct fun
>> +{
>> +  long long (*f) (void);
>> +  long long val;
>> +};
>> +
>> +long long NOIPA
>> +li_rotldi_1 (void)
>> +{
>> +  return 0x75310LL;
>> +}
>> +
>> +long long NOIPA
>> +li_rotldi_2 (void)
>> +{
>> +  return 0x2164LL;
>> +}
>> +
>> +long long NOIPA
>> +li_rotldi_3 (void)
>> +{
>> +  return 0x8531LL;
>> +}
>> +
>> +long long NOIPA
>> +li_rotldi_4 (void)
>> +{
>> +  return 0x2194LL;
>> +}
>> +
>> +struct fun arr[] = {
>>

[PATCH] RISC-V: Add opaque integer modes to fix ICE on DSE[PR111590]

2023-09-25 Thread Juzhe-Zhong

When doing fortran test with 'V' extension enabled on RISC-V port.
I saw multiple ICE: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111590

The root cause is on DSE:

internal compiler error: in smallest_mode_for_size, at stor-layout.cc:356
0x1918f70 smallest_mode_for_size(poly_int<2u, unsigned long>, mode_class)
../../../../gcc/gcc/stor-layout.cc:356
0x11f75bb smallest_int_mode_for_size(poly_int<2u, unsigned long>)
../../../../gcc/gcc/machmode.h:916
0x3304141 find_shift_sequence
../../../../gcc/gcc/dse.cc:1738
0x3304f1a get_stored_val
../../../../gcc/gcc/dse.cc:1906
0x3305377 replace_read
../../../../gcc/gcc/dse.cc:2010
0x3306226 check_mem_read_rtx
../../../../gcc/gcc/dse.cc:2310
0x330667b check_mem_read_use
../../../../gcc/gcc/dse.cc:2415

After investigations, DSE is trying to do optimization like this following 
codes:

(insn 86 85 87 9 (set (reg:V4DI 168)
(mem/u/c:V4DI (reg/f:DI 171) [0  S32 A128])) "bug.f90":6:18 discrim 6 
1167 {*movv4di}
 (expr_list:REG_EQUAL (const_vector:V4DI [
(const_int 4 [0x4])
(const_int 1 [0x1]) repeated x2
(const_int 3 [0x3])
])
(nil)))

(set (mem) (reg:V4DI 168))

Then it ICE on: auto new_mode = smallest_int_mode_for_size (access_size * 
BITS_PER_UNIT);

The access_size may be 24 or 32. We don't have such integer modes with these 
size so it ICE.

I saw both aarch64 and ARM has EI/OI/CI/XI opaque modes. 

So I add it to walk around ICE on DCE, it works as all ICE are resolved.

CC Richard to review to make sure I am doing the right thing to fix the bug.

Hi, Richard, could you help me with this issue ? Thanks.

gcc/ChangeLog:

* config/riscv/riscv-modes.def (INT_MODE): Add opaque modes

---
 gcc/config/riscv/riscv-modes.def | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index e3c6ccb2809..ab86032c914 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -393,6 +393,12 @@ VLS_MODES (1024); /* V1024QI  V512HI  V256SI V128DI  
V512HF  V256SF V128DF */
 VLS_MODES (2048); /* V2048QI V1024HI  V512SI V256DI V1024HF  V512SF V256DF */
 VLS_MODES (4096); /* V4096QI V2048HI V1024SI V512DI V2048HF V1024SF V512DF */
 
+/* Opaque integer modes 3, 4, 6 or 8 general double registers.  */
+INT_MODE (EI, 24);
+INT_MODE (OI, 32);
+INT_MODE (CI, 48);
+INT_MODE (XI, 64);
+
 /* TODO: According to RISC-V 'V' ISA spec, the maximun vector length can
be 65536 for a single vector register which means the vector mode in
GCC can be maximum = 65536 * 8 bits (LMUL=8).
-- 
2.36.3

[PATCH v1] RISC-V: Support FP nearbyint auto-vectorization

2023-09-25 Thread pan2 . li

From: Pan Li 

This patch would like to support auto-vectorization for the
nearbyint API in math.h. It depends on the -ffast-math option.

When we would like to call nearbyint/nearbyintf like v2 = nearbyint (v1),
we will convert it into below insns (reference the implementation of llvm).

* frflags a5
* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3
* fsflags a5

However, the floating point value may not need the cvt as above if
its mantissa is zero. Take single precision floating point as example:

Assume we have RTZ rounding mode

  ++---+-+
  | raw float  | binary layout | after nearbyint |
  ++---+-+
  | 8388607.5  | 0x4aff| 8388607.0   |
  | 8388608.0  | 0x4b00| 8388608.0   |
  | 8388609.0  | 0x4b01| 8388609.0   |
  ++---+-+

All single floating point >= 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-nearbyint-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw fa0,0(s0)
  addis0,s0,4
  addis1,s1,4
  callnearbyint
  fsw fa0,-4(s1)
  bne s0,s2,.L3

After this patch:
  vfabs.v v2,v1
  vmflt.vfv0,v2,fa5
  frflags a7
  vfcvt.x.f.v v4,v1,v0.t
  vfcvt.f.x.v v2,v4,v0.t
  fsflags a7
  vfsgnj.vv   v2,v2,v1

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

* config/riscv/autovec.md (nearbyint2): New pattern.
* config/riscv/riscv-protos.h (enum insn_type): New enum.
(expand_vec_nearbyint): New function decl.
* config/riscv/riscv-v.cc (gen_nearbyint_const_fp): New function impl.
(expand_vec_nearbyint): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/test-math.h: Add helper function.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec.md   | 11 
 gcc/config/riscv/riscv-protos.h   |  2 +
 gcc/config/riscv/riscv-v.cc   | 36 
 .../riscv/rvv/autovec/unop/math-nearbyint-0.c | 20 +++
 .../riscv/rvv/autovec/unop/math-nearbyint-1.c | 20 +++
 .../riscv/rvv/autovec/unop/math-nearbyint-2.c | 20 +++
 .../riscv/rvv/autovec/unop/math-nearbyint-3.c | 22 +++
 .../rvv/autovec/unop/math-nearbyint-run-1.c   | 48 +++
 .../rvv/autovec/unop/math-nearbyint-run-2.c   | 48 +++
 .../riscv/rvv/autovec/unop/test-math.h| 33 +++
 .../riscv/rvv/autovec/vls/math-nearbyint-1.c  | 58 +++
 11 files changed, 318 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-nearbyint-run-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a005e17457e..b47f086f5e6 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2210,6 +2210,7 @@ (define_expand "avg3_ceil"
 ;; Includes:
 ;; - ceil/ceilf
 ;; - floor/floorf
+;; - nearbyint/nearbyintf
 ;; -
 (define_expand "ceil2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2230,3 +2231,13 @@ (define_expand "floor2"
 DONE;
   }
 )
+
+(define_expand "nearbyint2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+riscv_vector::expand_vec_nearbyint (operands[0], operands[1], mode, 
mode);
+DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 63eb2475705..f87bdef0f71 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -296,6 +296,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN

[PATCH v3 1/2] c++: Initial support for P0847R7 (Deducing This) [PR102609]

2023-09-25 Thread waffl3x

> Yes, but I'll warn you that grokdeclarator has resisted refactoring for
> a long time...

That will certainly be what I work on after this is squared off then,
I've been up and down grokdeclarator so I'm confident I'll be able to
do it.

As for the patch, I sure took my sweet time with it, but here it is. I
hope to work on the diagnostics patch tomorrow, but as you've probably
figured out it's best not to take my word on timeframes :^).

On the plus side, I took my time to figure out how to best to pass down
information about whether a param is an xobj param. My initial
impression on what you were suggesting was to push another node on the
front of the list, but I stared at it for a few hours and didn't think
it would work out. However, eventually I realized that the purpose
member if free for xobj params as it is illegal for them to have
default arguments. So I ended up passing it over the TREE_LIST after
all, maybe this is what you meant in the first place anyway too.

I am pretty confident that this version is all good, with only a few
possible issues.

An update on my copyright assignment, I sent an e-mail and haven't
gotten a response yet. From what I saw, I am confident that it's my
preferred option going forward though. Hopefully they get back to me
soon.

Also, just a quick update on my copyright assignment, I have sent an
e-mail to the FSF and haven't gotten a response yet. From what I was
reading, I am confident that it's my preferred option going forward
though. Hopefully they get back to me soon.

Bootstrapped and regtested on x86_64-pc-linux-gnu.

From bbfbcc72e8c0868559284352c71731394c98441e Mon Sep 17 00:00:00 2001
From: waffl3x 
Date: Mon, 25 Sep 2023 16:59:10 -0600
Subject: [PATCH] c++: Initial support for C++23 P0847R7 (Deducing This)
 [PR102609]

This patch implements initial support for P0847R7, without additions to
diagnostics.  Almost everything should work correctly, barring a few
limitations which are listed below.  I attempted to minimize changes to the
existing code, treating explicit object member functions as static functions,
while flagging them to give them extra powers seemed to be the best way of
achieving this.  For this patch, the flag is only utilized in call.cc for
resolving overloads and making the actual function call.

Internally, the difference between a static member function and an implicit
object member function appears to be whether the type node of the decl is a
FUNCTION_TYPE or a METHOD_TYPE.  So to get the desired behavior, it seems to be
sufficient to simply prevent conversion from FUNC_TYPE to METHOD_TYPE in
grokdeclarator when the first parameter is an explicit object parameter.  To
achieve this, explicit object parameters are flagged as such through each the
TREE_LIST's purpose member in declarator->u.function.parameters.  Typically the
purpose member is used for default arguments,  as those are not allowed for
explicit object parameters, we are able to repurpose purpose for our purposes.
The value used as a flag is the "this_identifier" global tree, as it seemed to
be the most fitting of the current global trees.  Even though it is obviously
illegal for any parameter except the first to be an explicit object parameter,
each parameter parsed as an explicit object parameter will be flagged in this
manner.  This will be used for diagnostics in the following patch.  When an
explicit object parameter is encountered in grokdeclarator, the purpose member
is nulled before the list is passed elsewhere to maintain compatibility with
any code that assumes that a non-null purpose member indicates a default
argument.  This patch only checks for and nulls the first parameter however.

As for the previously mentioned limitations, lambdas do not work correctly yet,
but I suspect that a few tweaks are all it will take to have them fully
functional.  User defined conversion functions are not called when an explicit
object member function with an explicit object parameter of an unrelated type
is called.  The following case does not behave correctly because of this.

struct S {
  operator size_t() {
return 42;
  }
  size_t f(this size_t n) {
return n;
  }
};

int main()
{
  S s{};
  size_t a = s.f();
}

Currently, it appears that the object argument is simply reinterpreted as
a size_t instead of properly calling the user defined conversion function.
The validity of such a conversion is still considered however, if there is no
way to convert S to a size_t an appropriate compile error will be emitted.
I have an idea of what changes need to be made to fix this, but I did not
persue this for the initial implementation patch.
This bug can be observed in the explicit-object-param4.C test case, while
explicit-object-param3.C demonstrates the non functioning lambdas.

	PR c++/102609

gcc/cp/ChangeLog:
	PR c++/102609
	Initial support for C++23 P0847R7 - Deducing this.
	* call.cc (add_candidates): Check if fn is an xobj member function.
	(build_over_call): Ditto.
	*

Re: [PATCH] AArch64: Fix __sync_val_compare_and_swap [PR111404]

2023-09-25 Thread Wilco Dijkstra

Hi Ramana,

>> __sync_val_compare_and_swap may be used on 128-bit types and either calls the
>> outline atomic code or uses an inline loop.  On AArch64 LDXP is only atomic 
>> if
>> the value is stored successfully using STXP, but the current implementations
>> do not perform the store if the comparison fails.  In this case the value 
>> returned
>> is not read atomically.
>
> IIRC, the previous discussions in this space revolved around the
> difficulty with the store writing to readonly memory which is why I
> think we went with LDXP in this form.

That's not related to this patch - this fixes a serious atomicity bug that may
affect the Linux kernel since it uses the older sync primitives. Given that LDXP
is not atomic on its own, you have to execute the STXP even in the failure case.
Note that you can't rely on compare not to write memory: load-exclusive
loops may either always write or avoid writes in the failure case if the load is
atomic. CAS instructions always write.

> Has something changed from then ?

Yes, we now know that using locking atomics was a bad decision. Developers
actually require efficient and lock-free atomics. Since we didn't support them,
many applications were forced to add their own atomic implementations using
hacky inline assembler. It also resulted in a nasty ABI incompatibility between
GCC and LLVM. Yes - atomics are part of the ABI!

All that is much worse than worrying about a theoretical corner case that
can't happen in real applications - atomics only work on writeable memory
since their purpose is to synchronize reads with writes.

Cheers,
Wilco

Re: [wwwdocs, committed] gcc-14/changes.html (OpenMP): Tweak manual-update wording

2023-09-25 Thread Gerald Pfeifer

On Mon, 25 Sep 2023, Tobias Burnus wrote:
> The 'description' words looked a bit misplaced when reading the full 
> sentence. Likewise "the libnuma" - I changed that to simply "libnuma". 
> (Alternatives would be "the libnuma library" or "the numa library".)
> 
> Hence, I fixed my own wording :-)

Looks good (for the record).

Thanks,
Gerald

Re: [PATCH] RISC-V/testsuite: Fix ILP32 RVV failures from missing

2023-09-25 Thread Maciej W. Rozycki

On Mon, 25 Sep 2023, Maciej W. Rozycki wrote:

>  NB the use of this specific  header, still in place elsewhere, 
> seems gratuitous to me.  We don't need or indeed want to print anything in 
> the test cases (unless verifying something specific to the print facility) 
> and if we want to avoid minor code duplication (i.e. not to have explicit:
> 
>   if (...)
> __builtin_abort ();
> 
> replicated across test cases), we can easily implement this via a local 
> header, there's no need to pull in a complex system facility.

 Overall we ought not to require any system headers in compile tests and 
then link and run tests need a functional target environment anyway.  So 
maybe the use of  in run tests isn't as bad after all if not for 
the -DNDEBUG peculiarity.  However I still think the less we depend in 
verification on external components the better, that's one variable to 
exclude.

  Maciej

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Andrew Pinski

On Mon, Sep 25, 2023 at 1:04 PM Andrew Pinski  wrote:
>
> On Mon, Sep 25, 2023 at 12:59 PM Philipp Tomsich
>  wrote:
> >
> > On Mon, 25 Sept 2023 at 21:54, Andrew Pinski  wrote:
> > >
> > > On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
> > >  wrote:
> > > >
> > > > This patch implements the following TODO in 
> > > > gcc/config/aarch64/aarch64.cc
> > > > to provide the requested behaviour for handling ldp and stp:
> > > >
> > > >   /* Allow the tuning structure to disable LDP instruction formation
> > > >  from combining instructions (e.g., in peephole2).
> > > >  TODO: Implement fine-grained tuning control for LDP and STP:
> > > >1. control policies for load and store separately;
> > > >2. support the following policies:
> > > >   - default (use what is in the tuning structure)
> > > >   - always
> > > >   - never
> > > >   - aligned (only if the compiler can prove that the
> > > > load will be aligned to 2 * element_size)  */
> > > >
> > > > It provides two new and concrete target-specific command-line parameters
> > > > -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> > > > to give the ability to control load and store policies seperately as
> > > > stated in part 1 of the TODO.
> > > >
> > > > The accepted values for both parameters are:
> > > > - 0: Use the policy of the tuning structure (default).
> > > > - 1: Emit ldp/stp regardless of alignment.
> > > > - 2: Do not emit ldp/stp.
> > > > - 3: In order to emit ldp/stp, first check if the load/store will
> > > >   be aligned to 2 * element_size.
> > >
> > > Instead of a number, does it make sense to instead use an string
> > > (ENUM) for this param.
> > > Also I think using --param is a bad idea if it is going to be
> > > documented in the user manual.
> > > Maybe a -m option should be used instead.
> >
> > See https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631283.html
> > for the discussion triggering the change from -m... to --param and the
> > change to using a number instead of a string.
>
> That is the opposite of the current GCC practice across all targets.
> Things like this should be consistent and if one target decides to do
> it different, then maybe it should NOT.
> Anyways we should document the correct coding style for options so we
> don't have these back and forths again.

Kyrylo:
>  It will have to take a number rather than a string but that should be okay, 
> as long as the right values are documented in invoke.texi.

No it does not need to be a number. --param=ranger-debug= does not
take a number, it takes an enum .
One of the benefits of moving --param support over to .opt to allow
more than just numbers even.

Thanks,
Andrew


>
>
> Thanks,
> Andrew
>
> >
> > Thanks,
> > Philipp.
> >
> > >
> > > Thanks,
> > > Andrew
> > >
> > > >
> > > > gcc/ChangeLog:
> > > > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> > > > appropriate enums for the policies.
> > > > * config/aarch64/aarch64-tuning-flags.def
> > > > (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> > > > options.
> > > > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> > > > function to parse ldp-policy parameter.
> > > > (aarch64_parse_stp_policy): New function to parse stp-policy 
> > > > parameter.
> > > > (aarch64_override_options_internal): Call parsing functions.
> > > > (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> > > > alignment check and remove superseded ones.
> > > > (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value 
> > > > check and
> > > > alignment check and remove superseded ones.
> > > > * config/aarch64/aarch64.opt: Add options.
> > > > * doc/invoke.texi: Document the parameters accordingly.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > > * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> > > > * gcc.target/aarch64/ldp_aligned.c: New test.
> > > > * gcc.target/aarch64/ldp_always.c: New test.
> > > > * gcc.target/aarch64/ldp_never.c: New test.
> > > > * gcc.target/aarch64/stp_aligned.c: New test.
> > > > * gcc.target/aarch64/stp_always.c: New test.
> > > > * gcc.target/aarch64/stp_never.c: New test.
> > > >
> > > > Signed-off-by: Manos Anagnostakis 
> > > > ---
> > > > Changes in v3:
> > > > - Changed command-line options to target-specific parameters
> > > >   and documented them accordingly in doc/invoke.texi.
> > > > - Removed ampere1-no_ldp_combine.c test as superseded.
> > > >
> > > >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> > > >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> > > >  gcc/config/aarch64/aarch64.cc | 215 +-
> > > >  gcc/config/aarch64/aarch64.opt|   8 +
> > > >

Re: [PATCH] AArch64: Fix __sync_val_compare_and_swap [PR111404]

2023-09-25 Thread Ramana Radhakrishnan

On Wed, Sep 13, 2023 at 3:55 PM Wilco Dijkstra via Gcc-patches
 wrote:
>
>
> __sync_val_compare_and_swap may be used on 128-bit types and either calls the
> outline atomic code or uses an inline loop.  On AArch64 LDXP is only atomic if
> the value is stored successfully using STXP, but the current implementations
> do not perform the store if the comparison fails.  In this case the value 
> returned
> is not read atomically.

IIRC, the previous discussions in this space revolved around the
difficulty with the store writing to readonly memory which is why I
think we went with LDXP in this form.
Has something changed from then ?

Reviewed-by : Ramana Radhakrishnan  

regards
Ramana




>
> Passes regress/bootstrap, OK for commit?
>
> gcc/ChangeLog/
> PR target/111404
> * config/aarch64/aarch64.cc (aarch64_split_compare_and_swap):
> For 128-bit store the loaded value and loop if needed.
>
> libgcc/ChangeLog/
> PR target/111404
> * config/aarch64/lse.S (__aarch64_cas16_acq_rel): Execute STLXP using
> either new value or loaded value.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> 5e8d0a0c91bc7719de2a8c5627b354cf905a4db0..c44c0b979d0cc3755c61dcf566cfddedccebf1ea
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -23413,11 +23413,11 @@ aarch64_split_compare_and_swap (rtx operands[])
>mem = operands[1];
>oldval = operands[2];
>newval = operands[3];
> -  is_weak = (operands[4] != const0_rtx);
>model_rtx = operands[5];
>scratch = operands[7];
>mode = GET_MODE (mem);
>model = memmodel_from_int (INTVAL (model_rtx));
> +  is_weak = operands[4] != const0_rtx && mode != TImode;
>
>/* When OLDVAL is zero and we want the strong version we can emit a tighter
>  loop:
> @@ -23478,6 +23478,33 @@ aarch64_split_compare_and_swap (rtx operands[])
>else
>  aarch64_gen_compare_reg (NE, scratch, const0_rtx);
>
> +  /* 128-bit LDAXP is not atomic unless STLXP succeeds.  So for a mismatch,
> + store the returned value and loop if the STLXP fails.  */
> +  if (mode == TImode)
> +{
> +  rtx_code_label *label3 = gen_label_rtx ();
> +  emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (Pmode, 
> label3)));
> +  emit_barrier ();
> +
> +  emit_label (label2);
> +  aarch64_emit_store_exclusive (mode, scratch, mem, rval, model_rtx);
> +
> +  if (aarch64_track_speculation)
> +   {
> + /* Emit an explicit compare instruction, so that we can correctly
> +track the condition codes.  */
> + rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
> + x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
> +   }
> +  else
> +   x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +   gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
> +  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +
> +  label2 = label3;
> +}
> +
>emit_label (label2);
>
>/* If we used a CBNZ in the exchange loop emit an explicit compare with 
> RVAL
> diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
> index 
> dde3a28e07b13669533dfc5e8fac0a9a6ac33dbd..ba05047ff02b6fc5752235bffa924fc4a2f48c04
>  100644
> --- a/libgcc/config/aarch64/lse.S
> +++ b/libgcc/config/aarch64/lse.S
> @@ -160,6 +160,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
>  #define tmp0   16
>  #define tmp1   17
>  #define tmp2   15
> +#define tmp3   14
> +#define tmp4   13
>
>  #define BTI_C  hint34
>
> @@ -233,10 +235,11 @@ STARTFN   NAME(cas)
>  0: LDXPx0, x1, [x4]
> cmp x0, x(tmp0)
> ccmpx1, x(tmp1), #0, eq
> -   bne 1f
> -   STXPw(tmp2), x2, x3, [x4]
> -   cbnzw(tmp2), 0b
> -1: BARRIER
> +   cselx(tmp2), x2, x0, eq
> +   cselx(tmp3), x3, x1, eq
> +   STXPw(tmp4), x(tmp2), x(tmp3), [x4]
> +   cbnzw(tmp4), 0b
> +   BARRIER
> ret
>
>  #endif
>

Re: [PATCH] RISC-V/testsuite: Fix ILP32 RVV failures from missing

2023-09-25 Thread Maciej W. Rozycki

On Sun, 24 Sep 2023, Vineet Gupta wrote:

> This fix is great but is there a more general solution to the problem when we
> toolchain is built for say just rv64 (and thus only those headers) vs. test
> building for say rv32 (and failing to build due to lack of headers) or
> vice-versa.

 The MIPS port has logic in its target test script for combining test 
options and excluding ones that are mutually incompatible due to ABI or 
ISA restrictions.  It wasn't written by me and I have only minimally 
tweaked it (and then many years ago), so I can't remember all the details 
offhand.  See the top comment in gcc/testsuite/gcc.target/mips/mips.exp 
for further information including usage.

 I guess it would make sense to pinch that logic for our port, especially 
given our growing number of machine options.  I think it was mentioned at 
one of the patch review calls (Jeff?).

 NB the use of this specific  header, still in place elsewhere, 
seems gratuitous to me.  We don't need or indeed want to print anything in 
the test cases (unless verifying something specific to the print facility) 
and if we want to avoid minor code duplication (i.e. not to have explicit:

  if (...)
__builtin_abort ();

replicated across test cases), we can easily implement this via a local 
header, there's no need to pull in a complex system facility.

 Also I find the use of this facility questionable in the first place: do 
we want these test cases to pass even in the case of an issue if run with 
-DNDEBUG as a target board option (which would cause some tests to be 
optimised away in their entriety)?

  Maciej

[PATCH 1/2] c++: remove NON_DEPENDENT_EXPR, part 1

2023-09-25 Thread Patrick Palka

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
for trunk?

-- >8 --

This tree code dates all the way back to r69130[1] which implemented
typing of non-dependent expressions.  Its motivation was never clear (to
me at least) since the documentation for it in e.g. cp-tree.def doesn't
seem accurate anymore.  build_non_dependent_expr has since gained
a bunch of edge cases about whether (or how) to wrap certain templated
trees, making it hard to reason about in general.

So this patch removes this tree code, and temporarily turns
build_non_dependent_expr into the identity function.  The subsequent
patch will remove build_non_dependent_expr and adjust its callers
appropriately.

We now need to gracefully handle templated (sub)trees in a couple of
places, places which previously didn't see templated trees since they
didn't look through NON_DEPENDENT_EXPR.

[1]: https://gcc.gnu.org/pipermail/gcc-patches/2003-July/109355.html

gcc/c-family/ChangeLog:

* c-warn.cc (check_address_or_pointer_of_packed_member): Handle
templated CALL_EXPR naming a local extern function.

gcc/cp/ChangeLog:

* class.cc (instantiate_type): Remove NON_DEPENDENT_EXPR
handling.
* constexpr.cc (cxx_eval_constant_expression): Likewise.
(potential_constant_expression_1): Likewise.
* coroutines.cc (coro_validate_builtin_call): Don't
expect ALIGNOF_EXPR to be wrapped in NON_DEPENDENT_EXPR.
* cp-objcp-common.cc (cp_common_init_ts): Remove
NON_DEPENDENT_EXPR handling.
* cp-tree.def (NON_DEPENDENT_EXPR): Remove.
* cp-tree.h (build_non_dependent_expr): Temporarily redefine as
the identity function.
* cvt.cc (maybe_warn_nodiscard): Handle templated CALL_EXPR
naming a local extern function.
* cxx-pretty-print.cc (cxx_pretty_printer::expression): Remove
NON_DEPENDENT_EXPR handling.
* error.cc (dump_decl): Likewise.
(dump_expr): Likewise.
* expr.cc (mark_use): Likewise.
(mark_exp_read): Likewise.
* pt.cc (build_non_dependent_expr): Remove.
* tree.cc (lvalue_kind): Remove NON_DEPENDENT_EXPR handling.
(cp_stabilize_reference): Likewise.
* typeck.cc (warn_for_null_address): Likewise.
(cp_build_binary_op): Handle type-dependent SIZEOF_EXPR operands.
(cp_build_unary_op) : Don't fold inside a
template.

gcc/testsuite/ChangeLog:

* g++.dg/concepts/var-concept3.C: Adjust expected diagnostic
for attempting to call a variable concept.
---
 gcc/c-family/c-warn.cc   |  2 +-
 gcc/cp/class.cc  |  9 --
 gcc/cp/constexpr.cc  |  9 --
 gcc/cp/coroutines.cc |  3 +-
 gcc/cp/cp-objcp-common.cc|  1 -
 gcc/cp/cp-tree.def   | 11 ---
 gcc/cp/cp-tree.h |  2 +-
 gcc/cp/cvt.cc|  4 +-
 gcc/cp/cxx-pretty-print.cc   |  1 -
 gcc/cp/error.cc  |  8 --
 gcc/cp/expr.cc   |  2 -
 gcc/cp/pt.cc | 92 
 gcc/cp/tree.cc   |  5 --
 gcc/cp/typeck.cc | 13 +--
 gcc/testsuite/g++.dg/concepts/var-concept3.C |  2 +-
 15 files changed, 15 insertions(+), 149 deletions(-)

diff --git a/gcc/c-family/c-warn.cc b/gcc/c-family/c-warn.cc
index e67dd87a773..c07770394bf 100644
--- a/gcc/c-family/c-warn.cc
+++ b/gcc/c-family/c-warn.cc
@@ -3029,7 +3029,7 @@ check_address_or_pointer_of_packed_member (tree type, 
tree rhs)
   if (TREE_CODE (rhs) == CALL_EXPR)
{
  rhs = CALL_EXPR_FN (rhs); /* Pointer expression.  */
- if (rhs == NULL_TREE)
+ if (rhs == NULL_TREE || TREE_CODE (rhs) == IDENTIFIER_NODE)
return NULL_TREE;
  rhs = TREE_TYPE (rhs);/* Pointer type.  */
  /* We could be called while processing a template and RHS could be
diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index b71333af1f8..10de0437242 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -8843,15 +8843,6 @@ instantiate_type (tree lhstype, tree rhs, tsubst_flags_t 
complain)
   rhs = BASELINK_FUNCTIONS (rhs);
 }
 
-  /* If we are in a template, and have a NON_DEPENDENT_EXPR, we cannot
- deduce any type information.  */
-  if (TREE_CODE (rhs) == NON_DEPENDENT_EXPR)
-{
-  if (complain & tf_error)
-   error ("not enough type information");
-  return error_mark_node;
-}
-
   /* There are only a few kinds of expressions that may have a type
  dependent on overload resolution.  */
   gcc_assert (TREE_CODE (rhs) == ADDR_EXPR
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 2a6601c0cbc..8c9abeeec1b 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8054,7 +8054,6 @@

[PATCH 2/2] c++: remove NON_DEPENDENT_EXPR, part 2

2023-09-25 Thread Patrick Palka

This much more mechanical patch removes build_non_dependent_expr
(and make_args_non_dependent) and adjusts callers accordingly,
no functional change.

gcc/cp/ChangeLog:

* call.cc (build_new_method_call): Remove calls to
build_non_dependent_expr and/or make_args_non_dependent.
* coroutines.cc (finish_co_return_stmt): Likewise.
* cp-tree.h (build_non_dependent_expr): Remove.
(make_args_non_dependent): Remove.
* decl2.cc (grok_array_decl): Remove calls to
build_non_dependent_expr and/or make_args_non_dependent.
(build_offset_ref_call_from_tree): Likewise.
* init.cc (build_new): Likewise.
* pt.cc (make_args_non_dependent): Remove.
(test_build_non_dependent_expr): Remove.
(cp_pt_cc_tests): Adjust.
* semantics.cc (finish_expr_stmt): Remove calls to
build_non_dependent_expr and/or make_args_non_dependent.
(finish_for_expr): Likewise.
(finish_call_expr): Likewise.
(finish_omp_atomic): Likewise.
* typeck.cc (finish_class_member_access_expr): Likewise.
(build_x_indirect_ref): Likewise.
(build_x_binary_op): Likewise.
(build_x_array_ref): Likewise.
(build_x_vec_perm_expr): Likewise.
(build_x_shufflevector): Likewise.
(build_x_unary_op): Likewise.
(cp_build_addressof): Likewise.
(build_x_conditional_expr):
(build_x_compound_expr): Likewise.
(build_static_cast): Likewise.
(build_x_modify_expr): Likewise.
(check_return_expr): Likewise.
* typeck2.cc (build_x_arrow): Likewise.
---
 gcc/cp/call.cc   |  7 +--
 gcc/cp/coroutines.cc |  3 ---
 gcc/cp/cp-tree.h |  2 --
 gcc/cp/decl2.cc  | 17 +++-
 gcc/cp/init.cc   |  5 -
 gcc/cp/pt.cc | 46 
 gcc/cp/semantics.cc  | 25 ++--
 gcc/cp/typeck.cc | 31 -
 gcc/cp/typeck2.cc|  1 -
 9 files changed, 6 insertions(+), 131 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index e8dafbd8ba6..15079ddf6dc 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -11430,12 +11430,7 @@ build_new_method_call (tree instance, tree fns, 
vec **args,
 }
 
   if (processing_template_decl)
-{
-  orig_args = args == NULL ? NULL : make_tree_vector_copy (*args);
-  instance = build_non_dependent_expr (instance);
-  if (args != NULL)
-   make_args_non_dependent (*args);
-}
+orig_args = args == NULL ? NULL : make_tree_vector_copy (*args);
 
   /* Process the argument list.  */
   if (args != NULL && *args != NULL)
diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index df3cc820797..a5464becf7f 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -1351,9 +1351,6 @@ finish_co_return_stmt (location_t kw, tree expr)
 to undo it so we can try to treat it as an rvalue below.  */
   expr = maybe_undo_parenthesized_ref (expr);
 
-  if (processing_template_decl)
-   expr = build_non_dependent_expr (expr);
-
   if (error_operand_p (expr))
return error_mark_node;
 }
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 66b9a9c4b9a..8b9a7d58462 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7488,8 +7488,6 @@ extern bool any_value_dependent_elements_p  
(const_tree);
 extern bool dependent_omp_for_p(tree, tree, tree, 
tree);
 extern tree resolve_typename_type  (tree, bool);
 extern tree template_for_substitution  (tree);
-inline tree build_non_dependent_expr   (tree t) { return t; } // XXX 
remove
-extern void make_args_non_dependent(vec *);
 extern bool reregister_specialization  (tree, tree, tree);
 extern tree instantiate_non_dependent_expr (tree, tsubst_flags_t = 
tf_error);
 extern tree instantiate_non_dependent_expr_internal (tree, tsubst_flags_t);
diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 344e19ec98b..0aa1e355972 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -427,14 +427,8 @@ grok_array_decl (location_t loc, tree array_expr, tree 
index_exp,
  return build_min_nt_loc (loc, ARRAY_REF, array_expr, index_exp,
   NULL_TREE, NULL_TREE);
}
-  array_expr = build_non_dependent_expr (array_expr);
-  if (index_exp)
-   index_exp = build_non_dependent_expr (index_exp);
-  else
-   {
- orig_index_exp_list = make_tree_vector_copy (*index_exp_list);
- make_args_non_dependent (*index_exp_list);
-   }
+  if (!index_exp)
+   orig_index_exp_list = make_tree_vector_copy (*index_exp_list);
 }
 
   type = TREE_TYPE (array_expr);
@@ -5435,18 +5429,13 @@ build_offset_ref_call_from_tree (tree fn, vec **args,
   orig_args = make_tree_vector_copy (*args);
 
   /* Transform the arguments and add the implicit "this"
-parameter.  That must be done

[pushed] [PR111497][LRA]: Copy substituted equivalence

2023-09-25 Thread Vladimir Makarov


The following patch solves

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111497

The patch was successfully tested and bootstrapped on x86-64 and aarch64.
commit 3c23defed384cf17518ad6c817d94463a445d21b
Author: Vladimir N. Makarov 
Date:   Mon Sep 25 16:19:50 2023 -0400

[PR111497][LRA]: Copy substituted equivalence

When we substitute the equivalence and it becomes shared, we can fail
to correctly update reg info used by LRA.  This can result in wrong
code generation, e.g. because of incorrect live analysis.  It can also
result in compiler crash as the pseudo survives RA.  This is what
exactly happened for the PR.  This patch solves this problem by
unsharing substituted equivalences.

gcc/ChangeLog:

PR middle-end/111497
* lra-constraints.cc (lra_constraints): Copy substituted
equivalence.
* lra.cc (lra): Change comment for calling unshare_all_rtl_again.

gcc/testsuite/ChangeLog:

PR middle-end/111497
* g++.target/i386/pr111497.C: new test.

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 3aaa4906999..76a1393ab23 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -5424,6 +5424,11 @@ lra_constraints (bool first_p)
 	   loc_equivalence_callback, curr_insn);
 	  if (old != *curr_id->operand_loc[0])
 		{
+		  /* If we substitute pseudo by shared equivalence, we can fail
+		 to update LRA reg info and this can result in many
+		 unexpected consequences.  So keep rtl unshared:  */
+		  *curr_id->operand_loc[0]
+		= copy_rtx (*curr_id->operand_loc[0]);
 		  lra_update_insn_regno_info (curr_insn);
 		  changed_p = true;
 		}
diff --git a/gcc/lra.cc b/gcc/lra.cc
index 563aff10b96..361f84fdacb 100644
--- a/gcc/lra.cc
+++ b/gcc/lra.cc
@@ -2579,9 +2579,8 @@ lra (FILE *f)
   if (inserted_p)
 commit_edge_insertions ();
 
-  /* Replacing pseudos with their memory equivalents might have
- created shared rtx.  Subsequent passes would get confused
- by this, so unshare everything here.  */
+  /* Subsequent passes expect that rtl is unshared, so unshare everything
+ here.  */
   unshare_all_rtl_again (get_insns ());
 
   if (flag_checking)
diff --git a/gcc/testsuite/g++.target/i386/pr111497.C b/gcc/testsuite/g++.target/i386/pr111497.C
new file mode 100644
index 000..a645bb95907
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr111497.C
@@ -0,0 +1,22 @@
+// { dg-do compile { target ia32 } }
+// { dg-options "-march=i686 -mtune=generic -fPIC -O2 -g" }
+
+class A;
+struct B { const char *b1; int b2; };
+struct C : B { C (const char *x, int y) { b1 = x; b2 = y; } };
+struct D : C { D (B x) : C (x.b1, x.b2) {} };
+struct E { E (A *); };
+struct F : E { D f1, f2, f3, f4, f5, f6; F (A *, const B &, const B &, const B &); };
+struct G : F { G (A *, const B &, const B &, const B &); };
+struct H { int h; };
+struct I { H i; };
+struct J { I *j; };
+struct A : J {};
+inline F::F (A *x, const B , const B , const B )
+  : E(x), f1(y), f2(z), f3(w), f4(y), f5(z), f6(w) {}
+G::G (A *x, const B , const B , const B ) : F(x, y, z, w)
+{
+  H *h = >j->i;
+  if (h)
+h->h++;
+}

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Andrew Pinski

On Mon, Sep 25, 2023 at 12:59 PM Philipp Tomsich
 wrote:
>
> On Mon, 25 Sept 2023 at 21:54, Andrew Pinski  wrote:
> >
> > On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
> >  wrote:
> > >
> > > This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> > > to provide the requested behaviour for handling ldp and stp:
> > >
> > >   /* Allow the tuning structure to disable LDP instruction formation
> > >  from combining instructions (e.g., in peephole2).
> > >  TODO: Implement fine-grained tuning control for LDP and STP:
> > >1. control policies for load and store separately;
> > >2. support the following policies:
> > >   - default (use what is in the tuning structure)
> > >   - always
> > >   - never
> > >   - aligned (only if the compiler can prove that the
> > > load will be aligned to 2 * element_size)  */
> > >
> > > It provides two new and concrete target-specific command-line parameters
> > > -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> > > to give the ability to control load and store policies seperately as
> > > stated in part 1 of the TODO.
> > >
> > > The accepted values for both parameters are:
> > > - 0: Use the policy of the tuning structure (default).
> > > - 1: Emit ldp/stp regardless of alignment.
> > > - 2: Do not emit ldp/stp.
> > > - 3: In order to emit ldp/stp, first check if the load/store will
> > >   be aligned to 2 * element_size.
> >
> > Instead of a number, does it make sense to instead use an string
> > (ENUM) for this param.
> > Also I think using --param is a bad idea if it is going to be
> > documented in the user manual.
> > Maybe a -m option should be used instead.
>
> See https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631283.html
> for the discussion triggering the change from -m... to --param and the
> change to using a number instead of a string.

That is the opposite of the current GCC practice across all targets.
Things like this should be consistent and if one target decides to do
it different, then maybe it should NOT.
Anyways we should document the correct coding style for options so we
don't have these back and forths again.


Thanks,
Andrew

>
> Thanks,
> Philipp.
>
> >
> > Thanks,
> > Andrew
> >
> > >
> > > gcc/ChangeLog:
> > > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> > > appropriate enums for the policies.
> > > * config/aarch64/aarch64-tuning-flags.def
> > > (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> > > options.
> > > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> > > function to parse ldp-policy parameter.
> > > (aarch64_parse_stp_policy): New function to parse stp-policy 
> > > parameter.
> > > (aarch64_override_options_internal): Call parsing functions.
> > > (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> > > alignment check and remove superseded ones.
> > > (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value 
> > > check and
> > > alignment check and remove superseded ones.
> > > * config/aarch64/aarch64.opt: Add options.
> > > * doc/invoke.texi: Document the parameters accordingly.
> > >
> > > gcc/testsuite/ChangeLog:
> > > * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> > > * gcc.target/aarch64/ldp_aligned.c: New test.
> > > * gcc.target/aarch64/ldp_always.c: New test.
> > > * gcc.target/aarch64/ldp_never.c: New test.
> > > * gcc.target/aarch64/stp_aligned.c: New test.
> > > * gcc.target/aarch64/stp_always.c: New test.
> > > * gcc.target/aarch64/stp_never.c: New test.
> > >
> > > Signed-off-by: Manos Anagnostakis 
> > > ---
> > > Changes in v3:
> > > - Changed command-line options to target-specific parameters
> > >   and documented them accordingly in doc/invoke.texi.
> > > - Removed ampere1-no_ldp_combine.c test as superseded.
> > >
> > >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> > >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> > >  gcc/config/aarch64/aarch64.cc | 215 +-
> > >  gcc/config/aarch64/aarch64.opt|   8 +
> > >  gcc/doc/invoke.texi   |  30 +++
> > >  .../aarch64/ampere1-no_ldp_combine.c  |  11 -
> > >  .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
> > >  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
> > >  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
> > >  .../gcc.target/aarch64/stp_aligned.c  |  60 +
> > >  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
> > >  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
> > >  12 files changed, 600 insertions(+), 74 deletions(-)
> > >  delete mode 100644 
> > >

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Manos Anagnostakis

Hello Andrew,

what you describe was my previous version, but @Kyrylo Tkachov
 prompted me to use -param.

Thank you for taking a look anyway!

Manos Anagnostakis | Compiler Engineer
| E: manos.anagnosta...@vrull.eu

VRULL GmbH | Beatrixgasse 32 1030 Vienna | W: www.vrull.eu

Στις Δευ 25 Σεπ 2023, 22:54 ο χρήστης Andrew Pinski 
έγραψε:

> On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
>  wrote:
> >
> > This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> > to provide the requested behaviour for handling ldp and stp:
> >
> >   /* Allow the tuning structure to disable LDP instruction formation
> >  from combining instructions (e.g., in peephole2).
> >  TODO: Implement fine-grained tuning control for LDP and STP:
> >1. control policies for load and store separately;
> >2. support the following policies:
> >   - default (use what is in the tuning structure)
> >   - always
> >   - never
> >   - aligned (only if the compiler can prove that the
> > load will be aligned to 2 * element_size)  */
> >
> > It provides two new and concrete target-specific command-line parameters
> > -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> > to give the ability to control load and store policies seperately as
> > stated in part 1 of the TODO.
> >
> > The accepted values for both parameters are:
> > - 0: Use the policy of the tuning structure (default).
> > - 1: Emit ldp/stp regardless of alignment.
> > - 2: Do not emit ldp/stp.
> > - 3: In order to emit ldp/stp, first check if the load/store will
> >   be aligned to 2 * element_size.
>
> Instead of a number, does it make sense to instead use an string
> (ENUM) for this param.
> Also I think using --param is a bad idea if it is going to be
> documented in the user manual.
> Maybe a -m option should be used instead.
>
> Thanks,
> Andrew
>
> >
> > gcc/ChangeLog:
> > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> > appropriate enums for the policies.
> > * config/aarch64/aarch64-tuning-flags.def
> > (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> > options.
> > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> > function to parse ldp-policy parameter.
> > (aarch64_parse_stp_policy): New function to parse stp-policy
> parameter.
> > (aarch64_override_options_internal): Call parsing functions.
> > (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> > alignment check and remove superseded ones.
> > (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value
> check and
> > alignment check and remove superseded ones.
> > * config/aarch64/aarch64.opt: Add options.
> > * doc/invoke.texi: Document the parameters accordingly.
> >
> > gcc/testsuite/ChangeLog:
> > * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> > * gcc.target/aarch64/ldp_aligned.c: New test.
> > * gcc.target/aarch64/ldp_always.c: New test.
> > * gcc.target/aarch64/ldp_never.c: New test.
> > * gcc.target/aarch64/stp_aligned.c: New test.
> > * gcc.target/aarch64/stp_always.c: New test.
> > * gcc.target/aarch64/stp_never.c: New test.
> >
> > Signed-off-by: Manos Anagnostakis 
> > ---
> > Changes in v3:
> > - Changed command-line options to target-specific parameters
> >   and documented them accordingly in doc/invoke.texi.
> > - Removed ampere1-no_ldp_combine.c test as superseded.
> >
> >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> >  gcc/config/aarch64/aarch64.cc | 215 +-
> >  gcc/config/aarch64/aarch64.opt|   8 +
> >  gcc/doc/invoke.texi   |  30 +++
> >  .../aarch64/ampere1-no_ldp_combine.c  |  11 -
> >  .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
> >  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
> >  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
> >  .../gcc.target/aarch64/stp_aligned.c  |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
> >  12 files changed, 600 insertions(+), 74 deletions(-)
> >  delete mode 100644
> gcc/testsuite/gcc.target/aarch64/ampere1-no_ldp_combine.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-protos.h
>

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Philipp Tomsich

On Mon, 25 Sept 2023 at 21:54, Andrew Pinski  wrote:
>
> On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
>  wrote:
> >
> > This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> > to provide the requested behaviour for handling ldp and stp:
> >
> >   /* Allow the tuning structure to disable LDP instruction formation
> >  from combining instructions (e.g., in peephole2).
> >  TODO: Implement fine-grained tuning control for LDP and STP:
> >1. control policies for load and store separately;
> >2. support the following policies:
> >   - default (use what is in the tuning structure)
> >   - always
> >   - never
> >   - aligned (only if the compiler can prove that the
> > load will be aligned to 2 * element_size)  */
> >
> > It provides two new and concrete target-specific command-line parameters
> > -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> > to give the ability to control load and store policies seperately as
> > stated in part 1 of the TODO.
> >
> > The accepted values for both parameters are:
> > - 0: Use the policy of the tuning structure (default).
> > - 1: Emit ldp/stp regardless of alignment.
> > - 2: Do not emit ldp/stp.
> > - 3: In order to emit ldp/stp, first check if the load/store will
> >   be aligned to 2 * element_size.
>
> Instead of a number, does it make sense to instead use an string
> (ENUM) for this param.
> Also I think using --param is a bad idea if it is going to be
> documented in the user manual.
> Maybe a -m option should be used instead.

See https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631283.html
for the discussion triggering the change from -m... to --param and the
change to using a number instead of a string.

Thanks,
Philipp.

>
> Thanks,
> Andrew
>
> >
> > gcc/ChangeLog:
> > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> > appropriate enums for the policies.
> > * config/aarch64/aarch64-tuning-flags.def
> > (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> > options.
> > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> > function to parse ldp-policy parameter.
> > (aarch64_parse_stp_policy): New function to parse stp-policy 
> > parameter.
> > (aarch64_override_options_internal): Call parsing functions.
> > (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> > alignment check and remove superseded ones.
> > (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value check 
> > and
> > alignment check and remove superseded ones.
> > * config/aarch64/aarch64.opt: Add options.
> > * doc/invoke.texi: Document the parameters accordingly.
> >
> > gcc/testsuite/ChangeLog:
> > * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> > * gcc.target/aarch64/ldp_aligned.c: New test.
> > * gcc.target/aarch64/ldp_always.c: New test.
> > * gcc.target/aarch64/ldp_never.c: New test.
> > * gcc.target/aarch64/stp_aligned.c: New test.
> > * gcc.target/aarch64/stp_always.c: New test.
> > * gcc.target/aarch64/stp_never.c: New test.
> >
> > Signed-off-by: Manos Anagnostakis 
> > ---
> > Changes in v3:
> > - Changed command-line options to target-specific parameters
> >   and documented them accordingly in doc/invoke.texi.
> > - Removed ampere1-no_ldp_combine.c test as superseded.
> >
> >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> >  gcc/config/aarch64/aarch64.cc | 215 +-
> >  gcc/config/aarch64/aarch64.opt|   8 +
> >  gcc/doc/invoke.texi   |  30 +++
> >  .../aarch64/ampere1-no_ldp_combine.c  |  11 -
> >  .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
> >  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
> >  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
> >  .../gcc.target/aarch64/stp_aligned.c  |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
> >  12 files changed, 600 insertions(+), 74 deletions(-)
> >  delete mode 100644 
> > gcc/testsuite/gcc.target/aarch64/ampere1-no_ldp_combine.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-protos.h 
> > b/gcc/config/aarch64/aarch64-protos.h
> > index

Re: [PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Andrew Pinski

On Mon, Sep 25, 2023 at 12:50 PM Manos Anagnostakis
 wrote:
>
> This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> to provide the requested behaviour for handling ldp and stp:
>
>   /* Allow the tuning structure to disable LDP instruction formation
>  from combining instructions (e.g., in peephole2).
>  TODO: Implement fine-grained tuning control for LDP and STP:
>1. control policies for load and store separately;
>2. support the following policies:
>   - default (use what is in the tuning structure)
>   - always
>   - never
>   - aligned (only if the compiler can prove that the
> load will be aligned to 2 * element_size)  */
>
> It provides two new and concrete target-specific command-line parameters
> -param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
> to give the ability to control load and store policies seperately as
> stated in part 1 of the TODO.
>
> The accepted values for both parameters are:
> - 0: Use the policy of the tuning structure (default).
> - 1: Emit ldp/stp regardless of alignment.
> - 2: Do not emit ldp/stp.
> - 3: In order to emit ldp/stp, first check if the load/store will
>   be aligned to 2 * element_size.

Instead of a number, does it make sense to instead use an string
(ENUM) for this param.
Also I think using --param is a bad idea if it is going to be
documented in the user manual.
Maybe a -m option should be used instead.

Thanks,
Andrew

>
> gcc/ChangeLog:
> * config/aarch64/aarch64-protos.h (struct tune_params): Add
> appropriate enums for the policies.
> * config/aarch64/aarch64-tuning-flags.def
> (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> options.
> * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> function to parse ldp-policy parameter.
> (aarch64_parse_stp_policy): New function to parse stp-policy 
> parameter.
> (aarch64_override_options_internal): Call parsing functions.
> (aarch64_operands_ok_for_ldpstp): Add parameter-value check and
> alignment check and remove superseded ones.
> (aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value check and
> alignment check and remove superseded ones.
> * config/aarch64/aarch64.opt: Add options.
> * doc/invoke.texi: Document the parameters accordingly.
>
> gcc/testsuite/ChangeLog:
> * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
> * gcc.target/aarch64/ldp_aligned.c: New test.
> * gcc.target/aarch64/ldp_always.c: New test.
> * gcc.target/aarch64/ldp_never.c: New test.
> * gcc.target/aarch64/stp_aligned.c: New test.
> * gcc.target/aarch64/stp_always.c: New test.
> * gcc.target/aarch64/stp_never.c: New test.
>
> Signed-off-by: Manos Anagnostakis 
> ---
> Changes in v3:
> - Changed command-line options to target-specific parameters
>   and documented them accordingly in doc/invoke.texi.
> - Removed ampere1-no_ldp_combine.c test as superseded.
>
>  gcc/config/aarch64/aarch64-protos.h   |  24 ++
>  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
>  gcc/config/aarch64/aarch64.cc | 215 +-
>  gcc/config/aarch64/aarch64.opt|   8 +
>  gcc/doc/invoke.texi   |  30 +++
>  .../aarch64/ampere1-no_ldp_combine.c  |  11 -
>  .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
>  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
>  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
>  .../gcc.target/aarch64/stp_aligned.c  |  60 +
>  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
>  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
>  12 files changed, 600 insertions(+), 74 deletions(-)
>  delete mode 100644 gcc/testsuite/gcc.target/aarch64/ampere1-no_ldp_combine.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 70303d6fd95..be1d73490ed 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -568,6 +568,30 @@ struct tune_params
>/* Place prefetch struct pointer at the end to enable type checking
>   errors when tune_params misses elements (e.g., from erroneous merges).  
> */
>const struct cpu_prefetch_tune *prefetch;
> +/* An enum specifying how to handle load pairs using a fine-grained policy:
> +   - LDP_POLICY_ALIGNED: Emit ldp

[PATCH v3] aarch64: Fine-grained policies to control ldp-stp formation.

2023-09-25 Thread Manos Anagnostakis

This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
to provide the requested behaviour for handling ldp and stp:

  /* Allow the tuning structure to disable LDP instruction formation
 from combining instructions (e.g., in peephole2).
 TODO: Implement fine-grained tuning control for LDP and STP:
   1. control policies for load and store separately;
   2. support the following policies:
  - default (use what is in the tuning structure)
  - always
  - never
  - aligned (only if the compiler can prove that the
load will be aligned to 2 * element_size)  */

It provides two new and concrete target-specific command-line parameters
-param=aarch64-ldp-policy= and -param=aarch64-stp-policy=
to give the ability to control load and store policies seperately as
stated in part 1 of the TODO.

The accepted values for both parameters are:
- 0: Use the policy of the tuning structure (default).
- 1: Emit ldp/stp regardless of alignment.
- 2: Do not emit ldp/stp.
- 3: In order to emit ldp/stp, first check if the load/store will
  be aligned to 2 * element_size.

gcc/ChangeLog:
* config/aarch64/aarch64-protos.h (struct tune_params): Add
appropriate enums for the policies.
* config/aarch64/aarch64-tuning-flags.def
(AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
options.
* config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
function to parse ldp-policy parameter.
(aarch64_parse_stp_policy): New function to parse stp-policy parameter.
(aarch64_override_options_internal): Call parsing functions.
(aarch64_operands_ok_for_ldpstp): Add parameter-value check and
alignment check and remove superseded ones.
(aarch64_operands_adjust_ok_for_ldpstp): Add parameter-value check and
alignment check and remove superseded ones.
* config/aarch64/aarch64.opt: Add options.
* doc/invoke.texi: Document the parameters accordingly.

gcc/testsuite/ChangeLog:
* gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed.
* gcc.target/aarch64/ldp_aligned.c: New test.
* gcc.target/aarch64/ldp_always.c: New test.
* gcc.target/aarch64/ldp_never.c: New test.
* gcc.target/aarch64/stp_aligned.c: New test.
* gcc.target/aarch64/stp_always.c: New test.
* gcc.target/aarch64/stp_never.c: New test.

Signed-off-by: Manos Anagnostakis 
---
Changes in v3:
- Changed command-line options to target-specific parameters
  and documented them accordingly in doc/invoke.texi.
- Removed ampere1-no_ldp_combine.c test as superseded.

 gcc/config/aarch64/aarch64-protos.h   |  24 ++
 gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
 gcc/config/aarch64/aarch64.cc | 215 +-
 gcc/config/aarch64/aarch64.opt|   8 +
 gcc/doc/invoke.texi   |  30 +++
 .../aarch64/ampere1-no_ldp_combine.c  |  11 -
 .../gcc.target/aarch64/ldp_aligned.c  |  66 ++
 gcc/testsuite/gcc.target/aarch64/ldp_always.c |  66 ++
 gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  66 ++
 .../gcc.target/aarch64/stp_aligned.c  |  60 +
 gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
 gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
 12 files changed, 600 insertions(+), 74 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/ampere1-no_ldp_combine.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 70303d6fd95..be1d73490ed 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -568,6 +568,30 @@ struct tune_params
   /* Place prefetch struct pointer at the end to enable type checking
  errors when tune_params misses elements (e.g., from erroneous merges).  */
   const struct cpu_prefetch_tune *prefetch;
+/* An enum specifying how to handle load pairs using a fine-grained policy:
+   - LDP_POLICY_ALIGNED: Emit ldp if the source pointer is aligned
+   to at least double the alignment of the type.
+   - LDP_POLICY_ALWAYS: Emit ldp regardless of alignment.
+   - LDP_POLICY_NEVER: Do not emit ldp.  */
+
+  enum aarch64_ldp_policy_model
+  {
+LDP_POLICY_ALIGNED,
+LDP_POLICY_ALWAYS,
+LDP_POLICY_NEVER
+  } ldp_policy_model;
+/* An enum specifying how to handle store pairs using a fine-grained policy:
+   - STP_POLICY_ALIGNED: Emit stp if the source pointer is aligned
+   to at least

[wwwdocs, committed] gcc-14/changes.html (OpenMP): Tweak manual-update wording

2023-09-25 Thread Tobias Burnus


The 'description' words looked a bit misplaced when reading the full sentence.
Likewise "the libnuma" - I changed that to simply "libnuma". (Alternatives 
would be
"the libnuma library" or "the numa library".)

Hence, I fixed my own wording :-)

Committed as attached. See also https://gcc.gnu.org/gcc-14/changes.html

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 50c5c9f94be7b26a2853f64909fa61ebf60086aa
Author: Tobias Burnus 
Date:   Mon Sep 25 19:36:31 2023 +0200

gcc-14/changes.html (OpenMP): Tweak manual-update wording
---
 htdocs/gcc-14/changes.html | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 2ca05ad0..c817dde4 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -74,11 +74,11 @@ a work-in-progress.
 
   The https://gcc.gnu.org/onlinedocs/libgomp/;>GNU Offloading and
   Multi Processing Runtime Library Manual has been updated and extended,
-  improving especially the ICV description, memory allocation, and the
-  description of the environment variables and OpenMP routines. On Linux,
-  the https://github.com/numactl/numactl;>libnuma is now used
-  for allocators requesting the nearest-partition trait as detailed in the
-  manual.
+  improving especially the description of ICVs, memory allocation, environment variables and OpenMP
+  routines. On Linux, https://github.com/numactl/numactl;>libnuma
+  is now used for allocators requesting the nearest-partition trait as
+  detailed in the manual.

[patch] invoke.texi: Update -fopenmp and -fopenmp-simd for omp::decl and loop semantic

2023-09-25 Thread Tobias Burnus


I stumbled over this during the ARM64 talk at the cauldron as they
consider using -fopenmp-simd by default.

→ https://gcc.gnu.org/wiki/cauldron2023 (I put my talk/BoF slides up;
others aren't, yet)

I did stumble over 'omp loop' with SIMD. It turns out that -fopenmp-simd
just turns 'loop' into 'simd', ignoring whatever value the user has
specified for the bind value.

Additionally, [[omp::decl(...)]] was missing.

Any comment to that patch before I commit it?

Tobias

PS: the [[omp::...]] needs a 'C++' → 'C/C++' change once omp:: support
with C23's attributes is in.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
invoke.texi: Update -fopenmp and -fopenmp-simd for omp::decl and loop semantic

gcc/ChangeLog:

	PR middle-end/111547
	* doc/invoke.texi (-fopenmp): Mention C++11 [[omp::decl(...)]] syntax.
	(-fopenmp-simd): Likewise. Clarify 'loop' directive semantic.

 gcc/doc/invoke.texi | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 146b40414b0..89c539f06c2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2766,8 +2766,9 @@ can be omitted, to use a target-specific default value.
 @cindex OpenMP parallel
 @item -fopenmp
 Enable handling of OpenMP directives @code{#pragma omp} in C/C++,
-@code{[[omp::directive(...)]]} and @code{[[omp::sequence(...)]]} in C++ and
-@code{!$omp} in Fortran.  When @option{-fopenmp} is specified, the
+@code{[[omp::directive(...)]]}, @code{[[omp::sequence(...)]]} and
+@code{[[omp::decl(...)]]} in C++ and @code{!$omp} in Fortran.
+When @option{-fopenmp} is specified, the
 compiler generates parallel code according to the OpenMP Application
 Program Interface v4.5 @w{@uref{https://www.openmp.org}}.  This option
 implies @option{-pthread}, and thus is only supported on targets that
@@ -2779,11 +2780,14 @@ have support for @option{-pthread}. @option{-fopenmp} implies
 @cindex SIMD
 @item -fopenmp-simd
 Enable handling of OpenMP's @code{simd}, @code{declare simd},
-@code{declare reduction}, @code{assume}, @code{ordered}, @code{scan},
-@code{loop} directives and combined or composite directives with
+@code{declare reduction}, @code{assume}, @code{ordered}, @code{scan}
+and @code{loop} directive, and of combined or composite directives with
 @code{simd} as constituent with @code{#pragma omp} in C/C++,
-@code{[[omp::directive(...)]]} and @code{[[omp::sequence(...)]]} in C++
-and @code{!$omp} in Fortran.  Other OpenMP directives are ignored.
+@code{[[omp::directive(...)]]}, @code{[[omp::sequence(...)]]} and
+@code{[[omp::decl(...)]]} in C++ and @code{!$omp} in Fortran.
+Other OpenMP directives are ignored.  Unless @option{-fopenmp} is
+additionally specified, the @code{loop} region binds to the current
+task region, independent of the specified @code{bind} clause.
 
 @opindex fopenmp-target-simd-clone
 @cindex OpenMP target SIMD clone

Improve -Wflex-array-member-not-at-end changes.html wording |Plus: and warning bug? (was: [V2][PATCH] gcc-14/changes.html: Deprecate a GCC C extension on flexible array members.)

2023-09-25 Thread Tobias Burnus


Hi all,

I stumbled over this as I found the wording in the release notes rather 
unclear.is.


First, the following gives only a -pedantic warning and not a 
-Wflex-array-member-not-at-end:

  struct t { int b; int x[]; };
  struct q { int b; struct t a[2]; int c; };

warning: invalid use of structure with flexible array member [-Wpedantic]

If I remove the "[2]", it shows additionally:
  warning: structure containing a flexible array member is not at the end of 
another structure [-Wflex-array-member-not-at-end]

It seems as if it should print latter warning also inside the struct.

Qing? Joseph? Thoughts?

* * *

Secondly, if this is deprecated, shouldn't then the warning enabled by, e.g., 
-Wall or made
otherwise more prominent? (-std=?) - Currently, one either has to find the new 
flag or use
-pedantic.

Or is this not really regarded as deprecated? But then (IMHO) we should not 
really claim so and just
add the warning without deprecation.

BTW; clang-15 prints the -Wgnu-variable-sized-type-not-at-end warning by 
default.

Joseph, all: Thoughts?

* * *

Cross ref: The patch adding the new warning is r14-2197-g070a6bf0bdc6761
https://gcc.gnu.org/pipermail/gcc-cvs/2023-June/385730.html (cf. previously in 
this thread)


* * *

Regarding the changes.html wording:

On 07.08.23 16:22, Qing Zhao via Gcc-patches wrote:


Comparing to the 1st version, the only change is to address Richard's
comment on refering a warning option for diagnosing deprecated behavior.

...

+++ b/htdocs/gcc-14/changes.html
@@ -30,7 +30,18 @@ a work-in-progress.
  
  Caveats
  
-  ...
+  C:
+  Support for the GCC extension, a structure containing a C99 flexible 
array
+  member, or a union containing such a structure, is not the last field of
+  another structure, is deprecated. Refer to
+  https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html;>
+  Zero Length Arrays.


...

I find the first sentence difficult to read. What do you think of the following?
(It is hard to come up with some good wording.)

--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -31,9 +31,10 @@ a work-in-progress.
 Caveats
 
   C:
-  Support for the GCC extension, a structure containing a C99 flexible 
array
-  member, or a union containing such a structure, is not the last field of
-  another structure, is deprecated. Refer to
+  Support for the GCC extension that a structure containing a C99 flexible
+  array (and any union containing a member of such structure) can be a
+  member of a structure has been deprecated for the case that it is not
+  the last member. Refer to
   https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html;>
   Zero Length Arrays.
   Any code relying on this extension should be modifed to ensure that


Tobias

PS:  C17 has:
"A structure or union shall not contain a member with incomplete or function 
type (hence, a structure
 shall not contain an instance of itself, but may contain a pointer to an 
instance of itself), except that
 the last member of a structure with more than one named member may have 
incomplete array type;
 such a structure (and any union containing, possibly recursively, a member 
that is such a structure)
 shall not be a member of a structure or an element of an array."

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: [PATCH 0/2] Replace intl/ with out-of-tree GNU gettext

2023-09-25 Thread Arsen Arsenović


Xi Ruoyao  writes:

> On Mon, 2023-09-25 at 17:00 +0200, Arsen Arsenović wrote:
>> Afternoon,
>> 
>> This patch series replaces the old (early 2000s era, AFAICT) libintl
>> implementation in-tree, which relies on C constructs some compilers
>> (newer clang, hopefully GCC 14) refuse to compile by default with
>> out-of-tree gettext, in a manner similar to GMP et al, and adds gettext
>> to download_prerequisites.
>
> I think we need to update install.texi to mention the new dependency.

Ah, thanks.  I had forgotten to update it.  What do you think of the
following prose?

1:  2ac5c8240c0f ! 1:  2cc0029921fb *: add modern gettext
@@ Commit message
 * aclocal.m4: Regenerate.
 * Makefile.in (LIBDEPS): Remove (potential) ./ prefix from
 LIBINTL_DEP.
-* doc/install.texi: Document new (notable) flags added by the 
optional
-gettext tree and by AM_GNU_GETTEXT.
 
 libcpp/ChangeLog:
 
@@ gcc/configure: $as_echo "$as_me: executing $ac_file commands" >&6;}
  "collect-ld":F) chmod +x collect-ld ;;
  "nm":F) chmod +x nm ;;
 
- ## gcc/doc/install.texi ##
-@@ gcc/doc/install.texi: is shown below:
- @code{sys} and @code{time}.
- @end table
- 
-+@item GNU gettext
-+
-+Necessary to build GCC with internationalization support via
-+@option{--enable-nls}.  It can be downloaded from
-+@uref{https://gnu.org/s/gettext/}.  If a GNU gettext distribution is
-+found in a subdirectory of your GCC sources named @file{gettext}, it
-+will be built together with GCC, unless present in the system (either in
-+libc or as a stand-alone library).
-+
-+The in-tree configuration requires GNU gettext v0.22.
-+
- @end table
- 
- @heading Tools/packages necessary for modifying GCC
-@@ gcc/doc/install.texi: components of the binutils you intend to build 
alongside the compiler
- (@file{bfd}, @file{binutils}, @file{gas}, @file{gprof}, @file{ld},
- @file{opcodes}, @dots{}) to the directory containing the GCC sources.
- 
--Likewise the GMP, MPFR and MPC libraries can be automatically built
--together with GCC.  You may simply run the
-+Likewise the GMP, MPFR, MPC and Gettext libraries can be automatically
-+built together with GCC.  You may simply run the
- @command{contrib/download_prerequisites} script in the GCC source 
directory
- to set up everything.
--Otherwise unpack the GMP, MPFR and/or MPC source
-+Otherwise unpack the GMP, MPFR, MPC and/or Gettext source
- distributions in the directory containing the GCC sources and rename
--their directories to @file{gmp}, @file{mpfr} and @file{mpc},
--respectively (or use symbolic links with the same name).
-+their directories to @file{gmp}, @file{mpfr}, @file{mpc} and
-+@file{gettext}, respectively (or use symbolic links with the same name).
- 
- @html
- 
-@@ gcc/doc/install.texi: which lets GCC output diagnostics in languages 
other than American
- English.  Native Language Support is enabled by default if not doing a
- canadian cross build.  The @option{--disable-nls} option disables NLS@.
- 
-+@item --with-libintl-prefix=@var{dir}
-+@itemx --without-libintl-prefix
-+Searches for libintl in @file{@var{dir}/include} and
-+@file{@var{dir}/lib}, or disables manual searching for it, letting the
-+linker handle it.
-+
-+@item --with-libintl-type=@var{type}
-+Specifies the type of library to search for when looking for libintl.
-+@var{type} can be one of @code{auto}, @code{static} or @code{shared}.
-+
- @item --with-included-gettext
--If NLS is enabled, the @option{--with-included-gettext} option causes the 
build
--procedure to prefer its copy of GNU @command{gettext}.
-+Only available if @file{gettext} is present in the source tree.
-+
-+Forces the gettext tree to be configured to build and use a new static
-+libintl, overriding the system libintl.
- 
- @item --with-catgets
- If NLS is enabled, and if the host lacks @code{gettext} but has the
-
  ## libcpp/aclocal.m4 ##
 @@ libcpp/aclocal.m4: m4_include([../config/codeset.m4])
  m4_include([../config/depstand.m4])

Perhaps this is easier to read when not a range-diff..
https://git.sr.ht/~arsen/gcc/commit/2ac5c8240c0f1a670f100c8e38baf40b13cc50b2#gcc/doc/install.texi

> And IIUC if --disable-nls is used, we can still build GCC with neither
> system gettext nor in-tree gettext.  Or am I wrong?  (I'm asking because
> we'll need to adjust Linux From Scratch [1-3] for this change if it's
> applied.)

Yes, this doesn't change how --disable-nls works.

> [1]:https://www.linuxfromscratch.org/lfs/view/development/chapter05/gcc-pass1.html
> [2]:https://www.linuxfromscratch.org/lfs/view/development/chapter06/gcc-pass2.html
>

Re: [PATCH 0/2] Replace intl/ with out-of-tree GNU gettext

2023-09-25 Thread Xi Ruoyao

On Mon, 2023-09-25 at 17:00 +0200, Arsen Arsenović wrote:
> Afternoon,
> 
> This patch series replaces the old (early 2000s era, AFAICT) libintl
> implementation in-tree, which relies on C constructs some compilers
> (newer clang, hopefully GCC 14) refuse to compile by default with
> out-of-tree gettext, in a manner similar to GMP et al, and adds gettext
> to download_prerequisites.

I think we need to update install.texi to mention the new dependency.

And IIUC if --disable-nls is used, we can still build GCC with neither
system gettext nor in-tree gettext.  Or am I wrong?  (I'm asking because
we'll need to adjust Linux From Scratch [1-3] for this change if it's
applied.)

[1]:https://www.linuxfromscratch.org/lfs/view/development/chapter05/gcc-pass1.html
[2]:https://www.linuxfromscratch.org/lfs/view/development/chapter06/gcc-pass2.html
[3]:https://www.linuxfromscratch.org/lfs/view/development/chapter08/gcc.html

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [PATCH] Add missing return in gori_compute::logical_combine

2023-09-25 Thread Andrew MacLeod

OK for trunk at least.   Thanks.  I presume it'll be fine for the other 
releases.


Andrew

On 9/25/23 11:51, Eric Botcazou wrote:

Hi,

the varying case currently falls through to the 1/true case.

Tested on x86_64-suse-linux, OK for mainline, 13 and 12 branches?


2023-09-25  Eric Botcazou  

* gimple-range-gori.cc (gori_compute::logical_combine): Add missing
return statement in the varying case.


2023-09-25  Eric Botcazou  

* gnat.dg/opt102.adb:New test.
* gnat.dg/opt102_pkg.adb, gnat.dg/opt102_pkg.ads: New helper.

[PATCH] Add missing return in gori_compute::logical_combine

2023-09-25 Thread Eric Botcazou

Hi,

the varying case currently falls through to the 1/true case.

Tested on x86_64-suse-linux, OK for mainline, 13 and 12 branches?


2023-09-25  Eric Botcazou  

* gimple-range-gori.cc (gori_compute::logical_combine): Add missing
return statement in the varying case.


2023-09-25  Eric Botcazou  

* gnat.dg/opt102.adb:New test.
* gnat.dg/opt102_pkg.adb, gnat.dg/opt102_pkg.ads: New helper.

-- 
Eric Botcazoudiff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index 51fb542a19c..2694e551d73 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -876,6 +876,7 @@ gori_compute::logical_combine (vrange , enum tree_code code,
 	  r.dump (dump_file);
 	  fputc ('\n', dump_file);
 	}
+  return res;
 }
 
   switch (code)
package body Opt102_Pkg is

  function Get (E : Enum; F, M : access Integer) return Integer is
  begin
case E is
  when One   => return 0;
  when Two   => return F.all;
  when Three => return M.all;
end case;
  end;

end Opt102_Pkg;
-- { dg-do run }
-- { dg-options "-O2 -gnata" }

with Opt102_Pkg; use Opt102_Pkg;

procedure Opt102 is
  I, F : aliased Integer;
begin
  I := Get (Two, F'Access, null);
end;
package Opt102_Pkg is

  type Enum is (One, Two, Three);

  function Get (E : Enum; F, M : access Integer) return Integer
with Pre => (E = One) = (F = null and M = null) and
(E = Two) = (F /= null) and
(E = Three) = (M /= null);

end Opt102_Pkg;

[committed] hppa: Update baseline symbols for hppa-linux

2023-09-25 Thread John David Anglin

Committed to trunk.

Dave
---

Update baseline symbols for hppa-linux.

2023-09-25  John David Anglin  

libstdc++-v3/ChangeLog:

* config/abi/post/hppa-linux-gnu/baseline_symbols.txt: Update.

diff --git a/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt 
b/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt
index ff40f201eb8..b41e57125ef 100644
--- a/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt
+++ b/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt
@@ -983,8 +983,18 @@ FUNC:_ZNKSt11__timepunctIwE9_M_monthsEPPKw
 FUNC:_ZNKSt11__timepunctIwE9_M_monthsEPPKw@@GLIBCXX_3.4
 FUNC:_ZNKSt11logic_error4whatEv
 FUNC:_ZNKSt11logic_error4whatEv@@GLIBCXX_3.4
+FUNC:_ZNKSt12__basic_fileIcE13native_handleEv
+FUNC:_ZNKSt12__basic_fileIcE13native_handleEv@@GLIBCXX_3.4.32
 FUNC:_ZNKSt12__basic_fileIcE7is_openEv
 FUNC:_ZNKSt12__basic_fileIcE7is_openEv@@GLIBCXX_3.4
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx114_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv
+FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx114_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
 FUNC:_ZNKSt12bad_weak_ptr4whatEv
 FUNC:_ZNKSt12bad_weak_ptr4whatEv@@GLIBCXX_3.4.15
 FUNC:_ZNKSt12future_error4whatEv
@@ -1313,6 +1323,20 @@ FUNC:_ZNKSt5ctypeIwE9do_narrowEPKwS2_cPc
 FUNC:_ZNKSt5ctypeIwE9do_narrowEPKwS2_cPc@@GLIBCXX_3.4
 FUNC:_ZNKSt5ctypeIwE9do_narrowEwc
 FUNC:_ZNKSt5ctypeIwE9do_narrowEwc@@GLIBCXX_3.4
+FUNC:_ZNKSt6chrono4tzdb11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE
+FUNC:_ZNKSt6chrono4tzdb11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono4tzdb12current_zoneEv
+FUNC:_ZNKSt6chrono4tzdb12current_zoneEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9time_zone15_M_get_sys_infoENS_10time_pointINS_3_V212system_clockENS_8durationIxSt5ratioILx1ELx1EE
+FUNC:_ZNKSt6chrono9time_zone15_M_get_sys_infoENS_10time_pointINS_3_V212system_clockENS_8durationIxSt5ratioILx1ELx1EE@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9time_zone17_M_get_local_infoENS_10time_pointINS_7local_tENS_8durationIxSt5ratioILx1ELx1EE
+FUNC:_ZNKSt6chrono9time_zone17_M_get_local_infoENS_10time_pointINS_7local_tENS_8durationIxSt5ratioILx1ELx1EE@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9tzdb_list14const_iteratordeEv
+FUNC:_ZNKSt6chrono9tzdb_list14const_iteratordeEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9tzdb_list5beginEv
+FUNC:_ZNKSt6chrono9tzdb_list5beginEv@@GLIBCXX_3.4.31
+FUNC:_ZNKSt6chrono9tzdb_list5frontEv
+FUNC:_ZNKSt6chrono9tzdb_list5frontEv@@GLIBCXX_3.4.31
 FUNC:_ZNKSt6locale2id5_M_idEv
 FUNC:_ZNKSt6locale2id5_M_idEv@@GLIBCXX_3.4
 FUNC:_ZNKSt6locale4nameB5cxx11Ev
@@ -6134,12 +6158,30 @@ FUNC:_ZNSt6__norm15_List_node_base8transferEPS0_S1_
 FUNC:_ZNSt6__norm15_List_node_base8transferEPS0_S1_@@GLIBCXX_3.4.9
 FUNC:_ZNSt6__norm15_List_node_base9_M_unhookEv
 FUNC:_ZNSt6__norm15_List_node_base9_M_unhookEv@@GLIBCXX_3.4.14
+FUNC:_ZNSt6chrono11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE
+FUNC:_ZNSt6chrono11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono11reload_tzdbEv
+FUNC:_ZNSt6chrono11reload_tzdbEv@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono12current_zoneEv
+FUNC:_ZNSt6chrono12current_zoneEv@@GLIBCXX_3.4.31
 FUNC:_ZNSt6chrono12system_clock3nowEv
 FUNC:_ZNSt6chrono12system_clock3nowEv@@GLIBCXX_3.4.11
+FUNC:_ZNSt6chrono13get_tzdb_listEv
+FUNC:_ZNSt6chrono13get_tzdb_listEv@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono14remote_versionB5cxx11Ev
+FUNC:_ZNSt6chrono14remote_versionB5cxx11Ev@@GLIBCXX_3.4.31
 FUNC:_ZNSt6chrono3_V212steady_clock3nowEv
 FUNC:_ZNSt6chrono3_V212steady_clock3nowEv@@GLIBCXX_3.4.19
 FUNC:_ZNSt6chrono3_V212system_clock3nowEv
 FUNC:_ZNSt6chrono3_V212system_clock3nowEv@@GLIBCXX_3.4.19
+FUNC:_ZNSt6chrono8get_tzdbEv
+FUNC:_ZNSt6chrono8get_tzdbEv@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono9tzdb_list11erase_afterENS0_14const_iteratorE
+FUNC:_ZNSt6chrono9tzdb_list11erase_afterENS0_14const_iteratorE@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEi
+FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEi@@GLIBCXX_3.4.31
+FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEv
+FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEv@@GLIBCXX_3.4.31
 FUNC:_ZNSt6gslice8_IndexerC1EjRKSt8valarrayIjES4_

Re: [PATCH v2 0/1] Add LoongArch64 support for D frontend

2023-09-25 Thread liushuyu


Hi Yujie,

Sorry, I did not know Loongson Technologies is also working on this.

However, you can jump onto that GitHub pull request to review my changes 
so that they align with your implementation and nobody's effort would go 
to waste.


Thanks,

Zixing

On 2023/9/25 04:04, Yang Yujie wrote:

Hi Zixing,

We are also working on a patch series that could pass the libphobos regression 
tests.
Will post this later once all failed items are fixed.

Yujie

On Sun, Sep 24, 2023 at 03:40:32PM -0600, Zixing Liu wrote:

This patch adds the LoongArch64 support for GCC D frontend.

The runtime support is submitted as a separate patch here:
https://github.com/dlang/dmd/pull/15628.

You can find more information about the LoongArch architecture on this
website:
https://loongson.github.io/LoongArch-Documentation/README-EN.html.

--

Changes since the last revision of the patch:

* Corrected copyright years in loongarch-d.cc and loongarch-d.h.
* Removed changes to the tests, the changes have been rolled into the DMD
   changes in:
   
https://github.com/dlang/dmd/pull/15628/commits/eb84b8a2bc86aa751ad6f472422e8abad63ff500
   .
* Removed D_LP32 and D_LP64 bits. Since LoongArch ABIs are somewhat
   complicated, we may introduce the ABI information in the form of target
   traits in the future.

Zixing Liu (1):

  gcc/config.gcc |  1 +
  gcc/config/loongarch/loongarch-d.cc| 77 ++
  gcc/config/loongarch/loongarch-d.h | 26 
  gcc/config/loongarch/t-loongarch   |  4 ++
  libphobos/configure.tgt|  3 +
  libphobos/libdruntime/gcc/sections/elf.d   |  2 +
  libphobos/libdruntime/gcc/unwind/generic.d |  1 +
  7 files changed, 114 insertions(+)
  create mode 100644 gcc/config/loongarch/loongarch-d.cc
  create mode 100644 gcc/config/loongarch/loongarch-d.h

--
2.42.0

[PATCH 0/2] Replace intl/ with out-of-tree GNU gettext

2023-09-25 Thread Arsen Arsenović

Afternoon,

This patch series replaces the old (early 2000s era, AFAICT) libintl
implementation in-tree, which relies on C constructs some compilers
(newer clang, hopefully GCC 14) refuse to compile by default with
out-of-tree gettext, in a manner similar to GMP et al, and adds gettext
to download_prerequisites.

Regstrapped on x86_64-pc-linux-gnu --with-included-gettext and all
languages enabled.  Tested for localization on x86_64-pc-linux-gnu,
x86_64-unknown-freebsd13.2, x86_64-darwin21, i686-darwin9 (thanks,
Iain!).

Example from FreeBSD:

  [arsen@fbsd132 ~/gcc-bld/_pfx/bin]$ LANG=sr_RS.UTF-8 ./gcc
  gcc: кобна грешка: нема улазних датотека
  компиловање прекинуто.
  [arsen@fbsd132 ~/gcc-bld/_pfx/bin]$ ldd ./gcc
  ./gcc:
libiconv.so.2 => /usr/local/lib/libiconv.so.2 (0x258b24264000)
libm.so.5 => /lib/libm.so.5 (0x258b2314b000)
libc.so.7 => /lib/libc.so.7 (0x258b25acc000)
[vdso] (0x7fffe5d0)

OK for trunk (if passing review on the binutils and GDB sides)?

Thanks in advance, have a lovely day.

Arsen Arsenović (2):
  intl: remove, in favor of out-of-tree gettext
  *: add modern gettext

 .gitignore |1 +
 Makefile.def   |   72 +-
 Makefile.in| 1612 +++
 config/gettext-sister.m4   |   35 +-
 config/gettext.m4  |  357 +-
 config/iconv.m4|  313 +-
 config/intlmacosx.m4   |   65 +
 configure  |   44 +-
 configure.ac   |   44 +-
 contrib/download_prerequisites |2 +
 contrib/prerequisites.md5  |1 +
 contrib/prerequisites.sha512   |1 +
 gcc/Makefile.in|8 +-
 gcc/aclocal.m4 |4 +
 gcc/configure  | 2001 +++-
 intl/ChangeLog |  306 --
 intl/Makefile.in   |  264 -
 intl/README|   21 -
 intl/VERSION   |1 -
 intl/aclocal.m4|   33 -
 intl/bindtextdom.c |  374 --
 intl/config.h.in   |  280 --
 intl/config.intl.in|   12 -
 intl/configure | 8288 
 intl/configure.ac  |  108 -
 intl/dcgettext.c   |   59 -
 intl/dcigettext.c  | 1238 -
 intl/dcngettext.c  |   60 -
 intl/dgettext.c|   60 -
 intl/dngettext.c   |   62 -
 intl/eval-plural.h |  114 -
 intl/explodename.c |  192 -
 intl/finddomain.c  |  195 -
 intl/gettext.c |   64 -
 intl/gettextP.h|  224 -
 intl/gmo.h |  148 -
 intl/hash-string.h |   59 -
 intl/intl-compat.c |  151 -
 intl/l10nflist.c   |  453 --
 intl/libgnuintl.h  |  341 --
 intl/loadinfo.h|  156 -
 intl/loadmsgcat.c  | 1322 -
 intl/localcharset.c|  398 --
 intl/localcharset.h|   42 -
 intl/locale.alias  |   78 -
 intl/localealias.c |  419 --
 intl/localename.c  |  772 ---
 intl/log.c |  104 -
 intl/ngettext.c|   68 -
 intl/osdep.c   |   24 -
 intl/plural-config.h   |1 -
 intl/plural-exp.c  |  156 -
 intl/plural-exp.h  |  132 -
 intl/plural.c  | 1540 --
 intl/plural.y  |  434 --
 intl/relocatable.c |  439 --
 intl/relocatable.h |   67 -
 intl/textdomain.c  |  142 -
 libcpp/aclocal.m4  |5 +
 libcpp/configure   | 2139 -
 libstdc++-v3/configure |  727 +--
 61 files changed, 5398 insertions(+), 21434 deletions(-)
 create mode 100644 config/intlmacosx.m4
 delete mode 100644 intl/ChangeLog
 delete mode 100644 intl/Makefile.in
 delete mode 100644 intl/README
 delete mode 100644 intl/VERSION
 delete mode 100644 intl/aclocal.m4
 delete mode 100644 intl/bindtextdom.c
 delete mode 100644 intl/config.h.in
 delete mode 100644 intl/config.intl.in
 delete mode 100755 intl/configure
 delete mode 100644 intl/configure.ac
 delete mode 100644 intl/dcgettext.c
 delete mode 100644 intl/dcigettext.c
 delete mode 100644 intl/dcngettext.c
 delete mode 100644 intl/dgettext.c
 delete mode 100644 intl/dngettext.c
 delete mode 100644 intl/eval-plural.h
 delete mode 100644 intl/explodename.c
 delete mode 100644 intl/finddomain.c
 delete mode 100644 intl/gettext.c
 delete mode 100644 intl/gettextP.h
 delete mode 100644 intl/gmo.h
 delete mode 100644 intl/hash-string.h
 delete mode 100644 intl/intl-compat.c
 delete mode 100644 intl/l10nflist.c
 delete mode 100644 intl/libgnuintl.h
 delete mode 100644 intl/loadinfo.h
 delete mode 100644 intl/loadmsgcat.c
 delete mode 100644 intl/localcharset.c
 delete mode 100644 intl/localcharset.h
 delete mode 100644 intl/locale.alias
 delete mode 100644 intl/localealias.c
 delete mode 100644

Re: [PATCH] [testsuite] Remove undefined behavior from gcc.dg/tree-ssa/pr44306.c

2023-09-25 Thread Richard Biener




> Am 25.09.2023 um 14:18 schrieb Aldy Hernandez :
> 
> In auditing the DOM code to see what the scoped tables catch that
> ranger doesn't, I've run accross this test, which seems to
> have uninitialized reads from both j and present[].
> 
> From the original PR, it looks like this came from a reduction of a
> failing test in SPEC's 464.h264ref.  A google search of the
> CalculateQuant8Param() in the test yields:
> 
> https://github.com/microsoft/test-suite/blob/master/MultiSource/Applications/JM/lencod/q_matrix.c
> 
> Assuming the above source is similar to the original testcase, it looks
> like both "j" and "present" were initialized before use, so our testcase
> just got reduced a bit too far.
> 
> I tried to build the offending commit to see if my adjustments to the
> test still caused it to fail:
> 
> commit e1449456c0a88f5b3122db5452f7e91f5a9535f6 (HEAD -> master)
> Author: Sebastian Pop 
> Date:   Wed May 26 16:46:59 2010 +
> 
>Reorganize the analysis of basic block predication.
> 
> ...but alas it no longer builds with a recent compiler.  Perhaps
> someone has a ./cc1 of that revision around to verify?
> 
> OK?

Ok


> gcc/testsuite/ChangeLog:
> 
>* gcc.dg/tree-ssa/pr44306.c: Remove undefined behavior.
> ---
> gcc/testsuite/gcc.dg/tree-ssa/pr44306.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
> index 1ea04ce3a98..d322fe048b5 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
> @@ -8,10 +8,10 @@ int LevelScale8x8Luma_Inter[6][8][8];
> int InvLevelScale8x8Luma_Intra[6][8][8];
> int InvLevelScale8x8Luma_Inter[6][8][8];
> short UseDefaultScalingMatrix8x8Flag[2];
> -void CalculateQuant8Param()
> +int present[2];
> +void CalculateQuant8Param(int j)
> {
> - int i, j, k, temp;
> - int present[2];
> + int i, k, temp;
>  for(k=0; j<8; j++)
>for(i=0; i<8; i++)
>  {
> -- 
> 2.41.0
>

[PATCH] [testsuite] Remove undefined behavior from gcc.dg/tree-ssa/pr44306.c

2023-09-25 Thread Aldy Hernandez

In auditing the DOM code to see what the scoped tables catch that
ranger doesn't, I've run accross this test, which seems to
have uninitialized reads from both j and present[].

>From the original PR, it looks like this came from a reduction of a
failing test in SPEC's 464.h264ref.  A google search of the
CalculateQuant8Param() in the test yields:

https://github.com/microsoft/test-suite/blob/master/MultiSource/Applications/JM/lencod/q_matrix.c

Assuming the above source is similar to the original testcase, it looks
like both "j" and "present" were initialized before use, so our testcase
just got reduced a bit too far.

I tried to build the offending commit to see if my adjustments to the
test still caused it to fail:

commit e1449456c0a88f5b3122db5452f7e91f5a9535f6 (HEAD -> master)
Author: Sebastian Pop 
Date:   Wed May 26 16:46:59 2010 +

Reorganize the analysis of basic block predication.

...but alas it no longer builds with a recent compiler.  Perhaps
someone has a ./cc1 of that revision around to verify?

OK?

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr44306.c: Remove undefined behavior.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr44306.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
index 1ea04ce3a98..d322fe048b5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr44306.c
@@ -8,10 +8,10 @@ int LevelScale8x8Luma_Inter[6][8][8];
 int InvLevelScale8x8Luma_Intra[6][8][8];
 int InvLevelScale8x8Luma_Inter[6][8][8];
 short UseDefaultScalingMatrix8x8Flag[2];
-void CalculateQuant8Param()
+int present[2];
+void CalculateQuant8Param(int j)
 {
- int i, j, k, temp;
- int present[2];
+ int i, k, temp;
  for(k=0; j<8; j++)
for(i=0; i<8; i++)
  {
-- 
2.41.0

Re: [PATCH] Always generate else-block in gimplify

2023-09-25 Thread Jørgen Kvalsvik


On 25/09/2023 19:51, Richard Biener wrote:

On Sun, Sep 24, 2023 at 3:09 PM Jørgen Kvalsvik  wrote:


This is a request for feedback and a proof-of-concept, not something I
intend to merge as-is.  It would be nice if gcc, maybe just under some
circumstances, always generated an else-block for coverage purposes.

I am working on the MC/DC support by CFG analysis for a while
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621449.html and have
ironed out a lot of problems. The last problem I know about, which is
impossible to actually fix right now, is the "fusing" of nested ifs.
Here is an example:

 if (a) if (b) if (c) { ... } // 3 conditions, 6 outcomes
 if (a && b && c) { ... } // 3 conditions, 6 outcomes

These form isomorphic CFGs which means there is no way for my algorithm
to distinguish them. This is sort-of acceptable since the coverage
measurements more accurately measure the semantics (and not the syntax),
but this also happens when there is code in-between the nesting:

 if (a) // measures to 2 conditions, 4 outcomes
 {
 a += b * 10;
 b -= a + 2;
 if (b)
 {
 ...
 }
 }

You would expect this to be measured as:

 if (a) // 1 condition, 2 outcomes
 {
 a += b * 10;
 b -= a + 2;
 if (b) // 1 condition, 2 outcomes
 {
 ...
 }
 }

The source of the problem is the missing (or empty) else block, as the
algorithm uses the outcome (then/else) edges to determine the limits of
expressions. If, however, the else blocks are generated, the conditions
are counted as you would expect.

So I have a few questions:

1. Is something like this even acceptable? The semantics of the program
should not change, assuming the else-block only exists but is without
significant behavior. It will only be generated if there is no
explicit else in source.
2. Should this only be generated when necessary (e.g. under condition
coverage? No optimization?)
3. I used a simple int-init { int __mcdc_barrier = 0; } but there might
be better contents for the block that does not add anything
operationally. I am not very familiar with this part of gcc and would
like to see someting better. Any suggestions?


Can you in theory handle this by splitting the 'else' edge before
coverage instrumentation rather than using a stmt inserted during
gimplification?
I don't think so. By the time we get to the instrumentation we do not 
know in if the false edge is to a proper else. The simplest example is 
really:


if (a) if (b) if (c) { ... }
if (a && b && c) { ... }

And the dot representation for both graphs:

digraph {
subgraph cluster_ifs {
label = "ifs";
A0 -> A2 [label="fallthru "];
A2 -> A3 [label="true "];
A2 -> A6 [label="false "];
A3 -> A4 [label="true "];
A3 -> A6 [label="false "];
A6 -> A7 [label="fallthru "];
A7 -> A1 [label=""];
A4 -> A5 [label="true "];
A4 -> A6 [label="false "];
A5 -> A7 [label="fallthru "];
}

subgraph cluster_and {
label = "and";
B0 -> B2 [label="fallthru "];
B2 -> B3 [label="true "];
B2 -> B6 [label="false "];
B3 -> B4 [label="true "];
B3 -> B6 [label="false "];
B6 -> B7 [label="fallthru "];
B7 -> B1 [label=""];
B4 -> B5 [label="true "];
B4 -> B6 [label="false "];
B5 -> B7 [label="fallthru "];
}
} 




The CFGs are identical, so there is no way to recover the else block at 
this stage.


Now, it might be possible to do or recover this in other phases than the 
gimplify, and I am very open for suggestions to where and how.


PS. my patch (maybe unsurprisingly) breaks a bunch of tests, so it is 
obviously not fit as-is.


Thanks,
Jørgen




---
  gcc/gimplify.cc | 8 
  1 file changed, 8 insertions(+)

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index ade6e335da7..43af38df742 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -4370,6 +4370,14 @@ gimplify_cond_expr (tree *expr_p, gimple_seq *pre_p, 
fallback_t fallback)
enum tree_code pred_code;
gimple_seq seq = NULL;

+  if (TREE_OPERAND (expr, 2) == NULL_TREE)
+  {
+  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier
+   ("__mcdc_barrier"), integer_type_node);
+  tree val = build_int_cst (integer_type_node, 0);
+  TREE_OPERAND (expr, 2) = build2 (INIT_EXPR, TREE_TYPE (var), var, val);
+  }
+
/* If this COND_EXPR has a value, copy the values into a temporary within
   the arms.  */
if (!VOID_TYPE_P (type))
--
2.30.2

Re: [PATCH] aarch64: Fine-grained ldp and stp policies with test-cases.

2023-09-25 Thread Manos Anagnostakis

Thanks for the feedback, Kyrill.

I'll resend it as a V3. I believe you have also checked V2 containing just
a small test adjustment.

Manos Anagnostakis | Compiler Engineer
| E: manos.anagnosta...@vrull.eu

VRULL GmbH | Beatrixgasse 32 1030 Vienna | W: www.vrull.eu

Στις Δευ 25 Σεπ 2023, 13:59 ο χρήστης Kyrylo Tkachov 
έγραψε:

> Hi Manos,
>
> Apologies for the long delay.
>
> > -Original Message-
> > From: Manos Anagnostakis 
> > Sent: Friday, August 18, 2023 8:50 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Kyrylo Tkachov ; Philipp Tomsich
> > ; Manos Anagnostakis
> > 
> > Subject: [PATCH] aarch64: Fine-grained ldp and stp policies with
> test-cases.
> >
> > This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> > to provide the requested behaviour for handling ldp and stp:
> >
> >   /* Allow the tuning structure to disable LDP instruction formation
> >  from combining instructions (e.g., in peephole2).
> >  TODO: Implement fine-grained tuning control for LDP and STP:
> >1. control policies for load and store separately;
> >2. support the following policies:
> >   - default (use what is in the tuning structure)
> >   - always
> >   - never
> >   - aligned (only if the compiler can prove that the
> > load will be aligned to 2 * element_size)  */
> >
> > It provides two new and concrete command-line options -mldp-policy and -
> > mstp-policy
> > to give the ability to control load and store policies seperately as
> > stated in part 1 of the TODO.
> >
> > The accepted values for both options are:
> > - default: Use the ldp/stp policy defined in the corresponding tuning
> >   structure.
> > - always: Emit ldp/stp regardless of alignment.
> > - never: Do not emit ldp/stp.
> > - aligned: In order to emit ldp/stp, first check if the load/store will
> >   be aligned to 2 * element_size.
> >
> > gcc/ChangeLog:
> > * config/aarch64/aarch64-protos.h (struct tune_params): Add
> >   appropriate enums for the policies.
> > * config/aarch64/aarch64-tuning-flags.def
> >   (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
> >   options.
> > * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
> >   function to parse ldp-policy option.
> > (aarch64_parse_stp_policy): New function to parse stp-policy
> option.
> > (aarch64_override_options_internal): Call parsing functions.
> > (aarch64_operands_ok_for_ldpstp): Add option-value check and
> >   alignment check and remove superseded ones
> > (aarch64_operands_adjust_ok_for_ldpstp): Add option-value check
> and
> >   alignment check and remove superseded ones.
> > * config/aarch64/aarch64.opt: Add options.
> >
> > gcc/testsuite/ChangeLog:
> > * gcc.target/aarch64/ldp_aligned.c: New test.
> > * gcc.target/aarch64/ldp_always.c: New test.
> > * gcc.target/aarch64/ldp_never.c: New test.
> > * gcc.target/aarch64/stp_aligned.c: New test.
> > * gcc.target/aarch64/stp_always.c: New test.
> > * gcc.target/aarch64/stp_never.c: New test.
> >
> > Signed-off-by: Manos Anagnostakis 
> > ---
> >
> >  gcc/config/aarch64/aarch64-protos.h   |  24 ++
> >  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
> >  gcc/config/aarch64/aarch64.cc | 229 ++
> >  gcc/config/aarch64/aarch64.opt|   8 +
> >  .../gcc.target/aarch64/ldp_aligned.c  |  64 +
> >  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  64 +
> >  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  64 +
> >  .../gcc.target/aarch64/stp_aligned.c  |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
> >  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
> >  10 files changed, 580 insertions(+), 61 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-protos.h
> > b/gcc/config/aarch64/aarch64-protos.h
> > index 70303d6fd95..be1d73490ed 100644
> > --- a/gcc/config/aarch64/aarch64-protos.h
> > +++ b/gcc/config/aarch64/aarch64-protos.h
> > @@ -568,6 +568,30 @@ struct tune_params
> >/* Place prefetch struct pointer at the end to enable type checking
> >   errors when tune_params misses elements (e.g., from erroneous
> merges).
> > */
> >const struct cpu_prefetch_tune *prefetch;
> > +/* An enum specifying how to handle load pairs using a fine-grained
> policy:
> > +   - LDP_POLICY_ALIGNED: Emit ldp if the source pointer is aligned
> >

Re: On a Plane During Tomorrow's RISC-V GCC Patchwork Meeting

2023-09-25 Thread Jeff Law





On 9/25/23 04:18, Palmer Dabbelt wrote:

On Mon, 18 Sep 2023 15:13:04 PDT (-0700), Vineet Gupta wrote:

On 9/18/23 09:11, Jeff Law wrote:



On 9/18/23 09:24, Kito Cheng wrote:

I may missed that one time too, not on plane yet, but need to go bed
earlier due to my flight is in next day early morning...

I'm unavailable as well, though I don't get on a plane until Wednesday
evening.


This is one meeting I really look forward to :-)
I'll be on a plane Wednesday evening as  well - see you all soon.


Looks like I'll also be traveling for this week's meeting, so I'll have 
to skip again.
I ran into Vineet at Heathrow and we concluded that we were going to 
skip tomorrow :-)


jeff

RE: [PATCH] aarch64: Fine-grained ldp and stp policies with test-cases.

2023-09-25 Thread Kyrylo Tkachov

Hi Manos,

Apologies for the long delay.

> -Original Message-
> From: Manos Anagnostakis 
> Sent: Friday, August 18, 2023 8:50 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov ; Philipp Tomsich
> ; Manos Anagnostakis
> 
> Subject: [PATCH] aarch64: Fine-grained ldp and stp policies with test-cases.
> 
> This patch implements the following TODO in gcc/config/aarch64/aarch64.cc
> to provide the requested behaviour for handling ldp and stp:
> 
>   /* Allow the tuning structure to disable LDP instruction formation
>  from combining instructions (e.g., in peephole2).
>  TODO: Implement fine-grained tuning control for LDP and STP:
>1. control policies for load and store separately;
>2. support the following policies:
>   - default (use what is in the tuning structure)
>   - always
>   - never
>   - aligned (only if the compiler can prove that the
> load will be aligned to 2 * element_size)  */
> 
> It provides two new and concrete command-line options -mldp-policy and -
> mstp-policy
> to give the ability to control load and store policies seperately as
> stated in part 1 of the TODO.
> 
> The accepted values for both options are:
> - default: Use the ldp/stp policy defined in the corresponding tuning
>   structure.
> - always: Emit ldp/stp regardless of alignment.
> - never: Do not emit ldp/stp.
> - aligned: In order to emit ldp/stp, first check if the load/store will
>   be aligned to 2 * element_size.
> 
> gcc/ChangeLog:
> * config/aarch64/aarch64-protos.h (struct tune_params): Add
>   appropriate enums for the policies.
> * config/aarch64/aarch64-tuning-flags.def
>   (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning
>   options.
> * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New
>   function to parse ldp-policy option.
> (aarch64_parse_stp_policy): New function to parse stp-policy option.
> (aarch64_override_options_internal): Call parsing functions.
> (aarch64_operands_ok_for_ldpstp): Add option-value check and
>   alignment check and remove superseded ones
> (aarch64_operands_adjust_ok_for_ldpstp): Add option-value check and
>   alignment check and remove superseded ones.
> * config/aarch64/aarch64.opt: Add options.
> 
> gcc/testsuite/ChangeLog:
> * gcc.target/aarch64/ldp_aligned.c: New test.
> * gcc.target/aarch64/ldp_always.c: New test.
> * gcc.target/aarch64/ldp_never.c: New test.
> * gcc.target/aarch64/stp_aligned.c: New test.
> * gcc.target/aarch64/stp_always.c: New test.
> * gcc.target/aarch64/stp_never.c: New test.
> 
> Signed-off-by: Manos Anagnostakis 
> ---
> 
>  gcc/config/aarch64/aarch64-protos.h   |  24 ++
>  gcc/config/aarch64/aarch64-tuning-flags.def   |   8 -
>  gcc/config/aarch64/aarch64.cc | 229 ++
>  gcc/config/aarch64/aarch64.opt|   8 +
>  .../gcc.target/aarch64/ldp_aligned.c  |  64 +
>  gcc/testsuite/gcc.target/aarch64/ldp_always.c |  64 +
>  gcc/testsuite/gcc.target/aarch64/ldp_never.c  |  64 +
>  .../gcc.target/aarch64/stp_aligned.c  |  60 +
>  gcc/testsuite/gcc.target/aarch64/stp_always.c |  60 +
>  gcc/testsuite/gcc.target/aarch64/stp_never.c  |  60 +
>  10 files changed, 580 insertions(+), 61 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_always.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_never.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_aligned.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_always.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_never.c
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h
> b/gcc/config/aarch64/aarch64-protos.h
> index 70303d6fd95..be1d73490ed 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -568,6 +568,30 @@ struct tune_params
>/* Place prefetch struct pointer at the end to enable type checking
>   errors when tune_params misses elements (e.g., from erroneous merges).
> */
>const struct cpu_prefetch_tune *prefetch;
> +/* An enum specifying how to handle load pairs using a fine-grained policy:
> +   - LDP_POLICY_ALIGNED: Emit ldp if the source pointer is aligned
> +   to at least double the alignment of the type.
> +   - LDP_POLICY_ALWAYS: Emit ldp regardless of alignment.
> +   - LDP_POLICY_NEVER: Do not emit ldp.  */
> +
> +  enum aarch64_ldp_policy_model
> +  {
> +LDP_POLICY_ALIGNED,
> +LDP_POLICY_ALWAYS,
> +LDP_POLICY_NEVER
> +  } ldp_policy_model;
> +/* An enum specifying how to handle store pairs using a fine-grained policy:
> +   - STP_POLICY_ALIGNED: Emit stp if the source pointer is aligned
> +   to at least double the alignment of the type.
> +   -

Re: PING^5: [PATCH] rtl-optimization/110939 Really fix narrow comparison of memory and constant

2023-09-25 Thread Eric Botcazou

> This is why I got a bit uncertain and hoped to get some feedback whether
> my intuition is correct or not.  Meanwhile I also found a comment in
> the internals book at "14.7 Constant Expression Types" where we have:
> 
>"Constants generated for modes with fewer bits than in HOST_WIDE_INT
> must be sign extended to full width (e.g., with gen_int_mode).
> [...]
> Note however that values are neither inherently signed nor
> inherently unsigned; where necessary, signedness is determined by
> the rtl operation instead."
> 
> At least this and the assert statement document that the normal form of
> a CONST_INT is kind of special w.r.t. unsigned integers.  Is there
> anyone who can shed some light on _why_ such a normal form was chosen?

In RTL integral values have no sign, they just represent a given pattern of 
bits so, in order to have a 1-to-1 mapping, you need to choose a canonical 
form.  The signed form is probably more natural and, since CONST_INTs have no 
mode, the same objects are used for e.g. QImode and HImode, which means that 
you need to sign-extend the bit pattern.

-- 
Eric Botcazou

Re: [PATCH] Always generate else-block in gimplify

2023-09-25 Thread Richard Biener

On Sun, Sep 24, 2023 at 3:09 PM Jørgen Kvalsvik  wrote:
>
> This is a request for feedback and a proof-of-concept, not something I
> intend to merge as-is.  It would be nice if gcc, maybe just under some
> circumstances, always generated an else-block for coverage purposes.
>
> I am working on the MC/DC support by CFG analysis for a while
> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621449.html and have
> ironed out a lot of problems. The last problem I know about, which is
> impossible to actually fix right now, is the "fusing" of nested ifs.
> Here is an example:
>
> if (a) if (b) if (c) { ... } // 3 conditions, 6 outcomes
> if (a && b && c) { ... } // 3 conditions, 6 outcomes
>
> These form isomorphic CFGs which means there is no way for my algorithm
> to distinguish them. This is sort-of acceptable since the coverage
> measurements more accurately measure the semantics (and not the syntax),
> but this also happens when there is code in-between the nesting:
>
> if (a) // measures to 2 conditions, 4 outcomes
> {
> a += b * 10;
> b -= a + 2;
> if (b)
> {
> ...
> }
> }
>
> You would expect this to be measured as:
>
> if (a) // 1 condition, 2 outcomes
> {
> a += b * 10;
> b -= a + 2;
> if (b) // 1 condition, 2 outcomes
> {
> ...
> }
> }
>
> The source of the problem is the missing (or empty) else block, as the
> algorithm uses the outcome (then/else) edges to determine the limits of
> expressions. If, however, the else blocks are generated, the conditions
> are counted as you would expect.
>
> So I have a few questions:
>
> 1. Is something like this even acceptable? The semantics of the program
>should not change, assuming the else-block only exists but is without
>significant behavior. It will only be generated if there is no
>explicit else in source.
> 2. Should this only be generated when necessary (e.g. under condition
>coverage? No optimization?)
> 3. I used a simple int-init { int __mcdc_barrier = 0; } but there might
>be better contents for the block that does not add anything
>operationally. I am not very familiar with this part of gcc and would
>like to see someting better. Any suggestions?

Can you in theory handle this by splitting the 'else' edge before
coverage instrumentation rather than using a stmt inserted during
gimplification?

> ---
>  gcc/gimplify.cc | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
> index ade6e335da7..43af38df742 100644
> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -4370,6 +4370,14 @@ gimplify_cond_expr (tree *expr_p, gimple_seq *pre_p, 
> fallback_t fallback)
>enum tree_code pred_code;
>gimple_seq seq = NULL;
>
> +  if (TREE_OPERAND (expr, 2) == NULL_TREE)
> +  {
> +  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier
> +   ("__mcdc_barrier"), integer_type_node);
> +  tree val = build_int_cst (integer_type_node, 0);
> +  TREE_OPERAND (expr, 2) = build2 (INIT_EXPR, TREE_TYPE (var), var, val);
> +  }
> +
>/* If this COND_EXPR has a value, copy the values into a temporary within
>   the arms.  */
>if (!VOID_TYPE_P (type))
> --
> 2.30.2
>

Re: On a Plane During Tomorrow's RISC-V GCC Patchwork Meeting

2023-09-25 Thread Palmer Dabbelt


On Mon, 18 Sep 2023 15:13:04 PDT (-0700), Vineet Gupta wrote:

On 9/18/23 09:11, Jeff Law wrote:



On 9/18/23 09:24, Kito Cheng wrote:

I may missed that one time too, not on plane yet, but need to go bed
earlier due to my flight is in next day early morning...

I'm unavailable as well, though I don't get on a plane until Wednesday
evening.


This is one meeting I really look forward to :-)
I'll be on a plane Wednesday evening as  well - see you all soon.


Looks like I'll also be traveling for this week's meeting, so I'll have 
to skip again.




-Vineet

Re: [PATCH v2 0/1] Add LoongArch64 support for D frontend

2023-09-25 Thread Yang Yujie

Hi Zixing,

We are also working on a patch series that could pass the libphobos regression 
tests.
Will post this later once all failed items are fixed.

Yujie

On Sun, Sep 24, 2023 at 03:40:32PM -0600, Zixing Liu wrote:
> This patch adds the LoongArch64 support for GCC D frontend.
> 
> The runtime support is submitted as a separate patch here:
> https://github.com/dlang/dmd/pull/15628.
> 
> You can find more information about the LoongArch architecture on this
> website:
> https://loongson.github.io/LoongArch-Documentation/README-EN.html.
> 
> --
> 
> Changes since the last revision of the patch:
> 
> * Corrected copyright years in loongarch-d.cc and loongarch-d.h.
> * Removed changes to the tests, the changes have been rolled into the DMD
>   changes in:
>   
> https://github.com/dlang/dmd/pull/15628/commits/eb84b8a2bc86aa751ad6f472422e8abad63ff500
>   .
> * Removed D_LP32 and D_LP64 bits. Since LoongArch ABIs are somewhat
>   complicated, we may introduce the ABI information in the form of target
>   traits in the future.
> 
> Zixing Liu (1):
> 
>  gcc/config.gcc |  1 +
>  gcc/config/loongarch/loongarch-d.cc| 77 ++
>  gcc/config/loongarch/loongarch-d.h | 26 
>  gcc/config/loongarch/t-loongarch   |  4 ++
>  libphobos/configure.tgt|  3 +
>  libphobos/libdruntime/gcc/sections/elf.d   |  2 +
>  libphobos/libdruntime/gcc/unwind/generic.d |  1 +
>  7 files changed, 114 insertions(+)
>  create mode 100644 gcc/config/loongarch/loongarch-d.cc
>  create mode 100644 gcc/config/loongarch/loongarch-d.h
> 
> -- 
> 2.42.0

[PING] [PATCH] Harmonize headers between both dg-extract-results scripts

2023-09-25 Thread Paul Iannetta

On Mon, Sep 18, 2023 at 08:39:34AM +0200, Paul Iannetta wrote:
> On Thu, Sep 14, 2023 at 04:24:33PM +0200, Paul Iannetta wrote:
> > Hi,
> > 
> > This is a small patch so that both dg-extract-results.py and
> > dg-extract-results.sh share the same header.  In particular, it fixes
> > the fact that the regexp r'^Test Run By (\S+) on (.*)$' was never
> > matched in the python file.
> 
> By the way, the bash script dg-extract-results.sh checks whether
> python is available by invoking python.  However, it seems that the
> policy on newer machines is to not provide python as a symlink (at
> least on Ubuntu 22.04 and above; and RHEL 8).  Therefore, we might
> want to also check against python3 so that the bash script does not
> fail to find python even though it is available.
> 
> Thanks,
> Paul
> 
> 
> > Author: Paul Iannetta 
> > Date:   Thu Sep 14 15:43:58 2023 +0200
> > 
> > Harmonize headers between both dg-extract-results scripts
> > 
> > The header of the python version looked like:
> > Target is ...
> > Host   is ...
> > The header of the bash version looked like:
> > Test run by ... on ...
> > Target is ...
> > 
> > After this change both headers look like:
> > Test run by ... on ...
> > Target is ...
> > Host   is ...
> > 
> > The order of the tests is not the same but since dg-cmp-results.sh it
> > does not matter much.
> > 
> > contrib/ChangeLog:
> > 
> > 2023-09-14  Paul Iannetta  
> > 
> > * dg-extract-results.py: Print the "Test run" line.
> > * dg-extract-results.sh: Print the "Host" line.
> > 
> > diff --git a/contrib/dg-extract-results.py b/contrib/dg-extract-results.py
> > index 30aa68771d4..34da1808c5f 100644
> > --- a/contrib/dg-extract-results.py
> > +++ b/contrib/dg-extract-results.py
> > @@ -113,7 +113,7 @@ class Prog:
> >  # Whether to create .sum rather than .log output.
> >  self.do_sum = True
> >  # Regexps used while parsing.
> > -self.test_run_re = re.compile (r'^Test Run By (\S+) on (.*)$')
> > +self.test_run_re = re.compile (r'^Test run by (\S+) on (.*)$')
> >  self.tool_re = re.compile (r'^\t\t=== (.*) tests ===$')
> >  self.result_re = re.compile (r'^(PASS|XPASS|FAIL|XFAIL|UNRESOLVED'
> >   r'|WARNING|ERROR|UNSUPPORTED|UNTESTED'
> > diff --git a/contrib/dg-extract-results.sh b/contrib/dg-extract-results.sh
> > index ff6c50d029c..57f6fe0e997 100755
> > --- a/contrib/dg-extract-results.sh
> > +++ b/contrib/dg-extract-results.sh
> > @@ -271,7 +271,7 @@ cat $SUM_FILES \
> >  
> >  # Write the begining of the combined summary file.
> >  
> > -head -n 2 $FIRST_SUM
> > +head -n 3 $FIRST_SUM
> >  echo
> >  echo " === $TOOL tests ==="
> >  echo

Re: [PATCH v1] Update check_effective_target_vect_int_mod according to LoongArch SX/ASX capabilities.

2023-09-25 Thread Chenghui Pan

Thanks! I will try to improve it.

On Mon, 2023-09-25 at 17:44 +0800, Xi Ruoyao wrote:
> On Mon, 2023-09-25 at 17:38 +0800, Chenghui Pan wrote:
> > Hi!
> > 
> > After some attemptions, I think we still ne to check
> > "check_effective_target_loongarch_sx" in vect_int_mod. I wrote some
> > temp logics in gcc/testsuite/lib/target-supports.exp like this:
> > 
> > diff --git a/gcc/testsuite/lib/target-supports.exp
> > b/gcc/testsuite/lib/target-supports.exp
> > index 2de41cef2f6..91e1c22a6e1 100644
> > --- a/gcc/testsuite/lib/target-supports.exp
> > +++ b/gcc/testsuite/lib/target-supports.exp
> > @@ -8586,7 +8586,8 @@ proc check_effective_target_vect_int_mod { }
> > {
> >  return [check_cached_effective_target_indexed vect_int_mod {
> >    expr { ([istarget powerpc*-*-*]
> >   && [check_effective_target_has_arch_pwr10])
> > - || [istarget amdgcn-*-*] }}]
> > + || [istarget loongarch*-*-*]
> > + || [istarget amdgcn-*-*] }}]
> >  }
> >  
> >  # Return 1 if the target supports vector even/odd elements
> > extraction,
> > 0 otherwise.
> > @@ -11174,6 +11175,12 @@ proc check_vect_support_and_set_flags { }
> > {
> >     lappend DEFAULT_VECTCFLAGS "--param" "riscv-vector-abi"
> >     set dg-do-what-default compile
> >     }
> > +    } elseif [istarget loongarch*-*-*] {
> > +  if [check_effective_target_loongarch_asx_hw] {
> > + lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx"
> > +  } elseif [check_effective_target_loongarch_sx_hw] {
> > + lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlsx"
> > +  }
> 
> I think we can always enable LASX in DEFAULT_VECTCFLAGS, but set dg-
> do-
> what-default to "run" only if both the hardware and the kernel
> supports
> LASX.  If the kernel or the hardware is not capable we set dg-do-
> what-
> default to "compile".
> 
> >  } else {
> >  return 0
> >  }
> > \* temp impl of sx/asx hw proc *\
> > 
> > And then in make check without --target_board=unix/-mlasx, vect.exp
> > is
> > invoked with expected vector isa options, but pr104992.c failed
> > because
> > it expected result with "vect_int_mod returns 1" but it was
> > compiled
> > without -mlsx/-mlasx. Seems pr104992.c is invoked by gcc.dg/dg.exp,
> > pr104992.c is not affected by DEFAULT_CFLAGS, so we still need to
> > check
> > if LSX/LASX is available in vect_int_mod. 
> > 
> > Other parts of new patch is still WIP.
>

Re: [PATCH v1] Update check_effective_target_vect_int_mod according to LoongArch SX/ASX capabilities.

2023-09-25 Thread Xi Ruoyao

On Mon, 2023-09-25 at 17:38 +0800, Chenghui Pan wrote:
> Hi!
> 
> After some attemptions, I think we still ne to check
> "check_effective_target_loongarch_sx" in vect_int_mod. I wrote some
> temp logics in gcc/testsuite/lib/target-supports.exp like this:
> 
> diff --git a/gcc/testsuite/lib/target-supports.exp
> b/gcc/testsuite/lib/target-supports.exp
> index 2de41cef2f6..91e1c22a6e1 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -8586,7 +8586,8 @@ proc check_effective_target_vect_int_mod { } {
>  return [check_cached_effective_target_indexed vect_int_mod {
>    expr { ([istarget powerpc*-*-*]
>   && [check_effective_target_has_arch_pwr10])
> - || [istarget amdgcn-*-*] }}]
> + || [istarget loongarch*-*-*]
> + || [istarget amdgcn-*-*] }}]
>  }
>  
>  # Return 1 if the target supports vector even/odd elements extraction,
> 0 otherwise.
> @@ -11174,6 +11175,12 @@ proc check_vect_support_and_set_flags { } {
>     lappend DEFAULT_VECTCFLAGS "--param" "riscv-vector-abi"
>     set dg-do-what-default compile
>     }
> +    } elseif [istarget loongarch*-*-*] {
> +  if [check_effective_target_loongarch_asx_hw] {
> + lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx"
> +  } elseif [check_effective_target_loongarch_sx_hw] {
> + lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlsx"
> +  }

I think we can always enable LASX in DEFAULT_VECTCFLAGS, but set dg-do-
what-default to "run" only if both the hardware and the kernel supports
LASX.  If the kernel or the hardware is not capable we set dg-do-what-
default to "compile".

>  } else {
>  return 0
>  }
> \* temp impl of sx/asx hw proc *\
> 
> And then in make check without --target_board=unix/-mlasx, vect.exp is
> invoked with expected vector isa options, but pr104992.c failed because
> it expected result with "vect_int_mod returns 1" but it was compiled
> without -mlsx/-mlasx. Seems pr104992.c is invoked by gcc.dg/dg.exp,
> pr104992.c is not affected by DEFAULT_CFLAGS, so we still need to check
> if LSX/LASX is available in vect_int_mod. 
> 
> Other parts of new patch is still WIP.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [PATCH v1] Update check_effective_target_vect_int_mod according to LoongArch SX/ASX capabilities.

2023-09-25 Thread Chenghui Pan

Hi!

After some attemptions, I think we still ne to check
"check_effective_target_loongarch_sx" in vect_int_mod. I wrote some
temp logics in gcc/testsuite/lib/target-supports.exp like this:

diff --git a/gcc/testsuite/lib/target-supports.exp
b/gcc/testsuite/lib/target-supports.exp
index 2de41cef2f6..91e1c22a6e1 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8586,7 +8586,8 @@ proc check_effective_target_vect_int_mod { } {
 return [check_cached_effective_target_indexed vect_int_mod {
   expr { ([istarget powerpc*-*-*]
  && [check_effective_target_has_arch_pwr10])
- || [istarget amdgcn-*-*] }}]
+ || [istarget loongarch*-*-*]
+ || [istarget amdgcn-*-*] }}]
 }
 
 # Return 1 if the target supports vector even/odd elements extraction,
0 otherwise.
@@ -11174,6 +11175,12 @@ proc check_vect_support_and_set_flags { } {
lappend DEFAULT_VECTCFLAGS "--param" "riscv-vector-abi"
set dg-do-what-default compile
}
+} elseif [istarget loongarch*-*-*] {
+  if [check_effective_target_loongarch_asx_hw] {
+ lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx"
+  } elseif [check_effective_target_loongarch_sx_hw] {
+ lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlsx"
+  }
 } else {
 return 0
 }
\* temp impl of sx/asx hw proc *\

And then in make check without --target_board=unix/-mlasx, vect.exp is
invoked with expected vector isa options, but pr104992.c failed because
it expected result with "vect_int_mod returns 1" but it was compiled
without -mlsx/-mlasx. Seems pr104992.c is invoked by gcc.dg/dg.exp,
pr104992.c is not affected by DEFAULT_CFLAGS, so we still need to check
if LSX/LASX is available in vect_int_mod. 

Other parts of new patch is still WIP.

On Sun, 2023-09-24 at 18:05 +0800, Xi Ruoyao wrote:
> On Wed, 2023-09-20 at 09:15 +0800, Chenghui Pan wrote:
> > LoongArch failed to pass gcc.dg/pr104992.c with -mlsx and -mlasx.
> > This test uses
> > different dg-final directives depending on the vect_int_mod result,
> > LoongArch
> > SX/ASX supports this operations but corresponding description is
> > not defined in
> > target-supports.exp. This patch solves the problem above with some
> > modification in proc check_effective_target_vect_int_mod.
> 
> I think we can just add -mdouble-float -mlasx into DEFAULT_VECTCFLAGS
> and always enable vect_int_mod for LoongArch.  This will make
> vect.exp
> tests automatically run for every "make check" on LoongArch.
> 
> > gcc/testsuite/ChangeLog:
> > 
> > * lib/target-supports.exp: Update
> > check_effective_target_vect_int_mod according to
> > LoongArch SX/ASX capabilities.
> > ---
> >  gcc/testsuite/lib/target-supports.exp | 18 ++
> >  1 file changed, 18 insertions(+)
> > 
> > diff --git a/gcc/testsuite/lib/target-supports.exp
> > b/gcc/testsuite/lib/target-supports.exp
> > index 2de41cef2f6..b253dc578d2 100644
> > --- a/gcc/testsuite/lib/target-supports.exp
> > +++ b/gcc/testsuite/lib/target-supports.exp
> > @@ -8586,6 +8586,8 @@ proc check_effective_target_vect_int_mod { }
> > {
> >  return [check_cached_effective_target_indexed vect_int_mod {
> >    expr { ([istarget powerpc*-*-*]
> >   && [check_effective_target_has_arch_pwr10])
> > +    || ([istarget loongarch*-*-*]
> > +    && [check_effective_target_loongarch_sx])
> >   || [istarget amdgcn-*-*] }}]
> >  }
> >  
> > @@ -12656,6 +12658,22 @@ proc
> > check_effective_target_const_volatile_readonly_section { } {
> >    return 1
> >  }
> >  
> > +proc check_effective_target_loongarch_sx { } {
> > +    return [check_no_compiler_messages loongarch_lsx assembly {
> > +   #if !defined(__loongarch_sx)
> > +   #error "LSX not defined"
> > +   #endif
> > +    }]
> > +}
> > +
> > +proc check_effective_target_loongarch_asx { } {
> > +    return [check_no_compiler_messages loongarch_asx assembly {
> > +   #if !defined(__loongarch_asx)
> > +   #error "LASX not defined"
> > +   #endif
> > +    }]
> > +}
> > +
> >  # Appends necessary Python flags to extra-tool-flags if Python.h
> > is supported.
> >  # Otherwise, modifies dg-do-what.
> >  proc dg-require-python-h { args } {
>

[committed] libstdc++: Prevent unwanted ADL in std::to_array [PR111512]

2023-09-25 Thread Jonathan Wakely

Tested x86_64-linux. Pushed to trunk.

-- >8 --

As noted in PR c++/111512, GCC does ADL for __builtin_memcpy if it is
unqualified, which can cause errors for template argument types which
cannot be completed.

Casting the memcpy arguments to void* prevents ADL from considering the
problem type.

libstdc++-v3/ChangeLog:

PR libstdc++/111511
PR c++/111512
* include/std/array (to_array): Cast memcpy arguments to void*.
* testsuite/23_containers/array/creation/111512.cc: New test.
---
 libstdc++-v3/include/std/array|  4 ++--
 .../23_containers/array/creation/111512.cc| 24 +++
 2 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/23_containers/array/creation/111512.cc

diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
index 0e32d7b52d0..c4d534c3a34 100644
--- a/libstdc++-v3/include/std/array
+++ b/libstdc++-v3/include/std/array
@@ -432,7 +432,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
  array, _Nm> __arr;
  if (!__is_constant_evaluated() && _Nm != 0)
-   __builtin_memcpy(__arr.data(), __a, sizeof(__a));
+   __builtin_memcpy((void*)__arr.data(), (void*)__a, sizeof(__a));
  else
for (size_t __i = 0; __i < _Nm; ++__i)
  __arr._M_elems[__i] = __a[__i];
@@ -461,7 +461,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
  array, _Nm> __arr;
  if (!__is_constant_evaluated() && _Nm != 0)
-   __builtin_memcpy(__arr.data(), __a, sizeof(__a));
+   __builtin_memcpy((void*)__arr.data(), (void*)__a, sizeof(__a));
  else
for (size_t __i = 0; __i < _Nm; ++__i)
  __arr._M_elems[__i] = __a[__i];
diff --git a/libstdc++-v3/testsuite/23_containers/array/creation/111512.cc 
b/libstdc++-v3/testsuite/23_containers/array/creation/111512.cc
new file mode 100644
index 000..b0f25a62153
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/array/creation/111512.cc
@@ -0,0 +1,24 @@
+// { dg-do compile { target c++20 } }
+
+// Bug libstdc++/111511 - Incorrect ADL in std::to_array in GCC 11/12/13
+// Bug c++/111512 - GCC's __builtin_memcpy can trigger ADL
+
+#include 
+#include 
+
+struct incomplete;
+
+template
+struct holder {
+T t; // { dg-bogus "'holder::t' has incomplete type" }
+};
+
+// A complete type that cannot be used as an associated type for ADL.
+using adl_bomb = holder*;
+
+int main()
+{
+adl_bomb a[1]{};
+(void) std::to_array(a);
+(void) std::to_array(std::move(a));
+}
-- 
2.41.0

[committed] libstdc++: Define C++23 std::forward_like (P2445R1)

2023-09-25 Thread Jonathan Wakely

Tested x86_64-linux. Pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* include/bits/move.h (forward_list): Define for C++23.
* include/bits/version.def (forward_like): Define.
* include/bits/version.h: Regenerate.
* include/std/utility (__glibcxx_want_forward_like): Define.
* testsuite/20_util/forward_like/1.cc: New test.
* testsuite/20_util/forward_like/2_neg.cc: New test.
* testsuite/20_util/forward_like/version.cc: New test.
---
 libstdc++-v3/include/bits/move.h  | 26 
 libstdc++-v3/include/bits/version.def |  8 +++
 libstdc++-v3/include/bits/version.h   | 27 ++---
 libstdc++-v3/include/std/utility  |  5 +-
 .../testsuite/20_util/forward_like/1.cc   | 59 +++
 .../testsuite/20_util/forward_like/2_neg.cc   | 10 
 .../testsuite/20_util/forward_like/version.cc | 10 
 7 files changed, 135 insertions(+), 10 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/20_util/forward_like/1.cc
 create mode 100644 libstdc++-v3/testsuite/20_util/forward_like/2_neg.cc
 create mode 100644 libstdc++-v3/testsuite/20_util/forward_like/version.cc

diff --git a/libstdc++-v3/include/bits/move.h b/libstdc++-v3/include/bits/move.h
index 00997d6f1fb..0151d78aff9 100644
--- a/libstdc++-v3/include/bits/move.h
+++ b/libstdc++-v3/include/bits/move.h
@@ -89,6 +89,32 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return static_cast<_Tp&&>(__t);
 }
 
+#if __glibcxx_forward_like // C++ >= 23
+  template
+  [[nodiscard]]
+  constexpr decltype(auto)
+  forward_like(_Up&& __x) noexcept
+  {
+constexpr bool __as_rval = is_rvalue_reference_v<_Tp&&>;
+
+if constexpr (is_const_v>)
+  {
+   using _Up2 = remove_reference_t<_Up>;
+   if constexpr (__as_rval)
+ return static_cast(__x);
+   else
+ return static_cast(__x);
+  }
+else
+  {
+   if constexpr (__as_rval)
+ return static_cast&&>(__x);
+   else
+ return static_cast<_Up&>(__x);
+  }
+  }
+#endif
+
   /**
*  @brief  Convert a value to an rvalue.
*  @param  __t  A thing of arbitrary type.
diff --git a/libstdc++-v3/include/bits/version.def 
b/libstdc++-v3/include/bits/version.def
index 6252f5478e0..8f008f9048f 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1542,6 +1542,14 @@ ftms = {
   };
 };
 
+ftms = {
+  name = forward_like;
+  values = {
+v = 202207;
+cxxmin = 23;
+  };
+};
+
 ftms = {
   name = ios_noreplace;
   values = {
diff --git a/libstdc++-v3/include/std/utility b/libstdc++-v3/include/std/utility
index f30e802a88d..bdaf5d4c31b 100644
--- a/libstdc++-v3/include/std/utility
+++ b/libstdc++-v3/include/std/utility
@@ -68,9 +68,10 @@
 #include 
 #include 
 
-#define __glibcxx_want_exchange_function
-#define __glibcxx_want_constexpr_algorithms
 #define __glibcxx_want_as_const
+#define __glibcxx_want_constexpr_algorithms
+#define __glibcxx_want_exchange_function
+#define __glibcxx_want_forward_like
 #define __glibcxx_want_integer_comparison_functions
 #define __glibcxx_want_to_underlying
 #define __glibcxx_want_unreachable
diff --git a/libstdc++-v3/testsuite/20_util/forward_like/1.cc 
b/libstdc++-v3/testsuite/20_util/forward_like/1.cc
new file mode 100644
index 000..928e60094a3
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/forward_like/1.cc
@@ -0,0 +1,59 @@
+// { dg-do compile { target c++23 } }
+// { dg-add-options no_pch }
+
+#include 
+
+#ifndef __cpp_lib_forward_like
+# error "Feature-test macro for forward_like missing in "
+#elif __cpp_lib_forward_like != 202207L
+# error "Feature-test macro for forward_like has wrong value in "
+#endif
+
+template
+using forward_like_t = decltype(std::forward_like(std::declval()));
+
+#if 0
+using std::is_same_v;
+#else
+#include 
+template concept is_same_v = std::same_as;
+#endif
+
+static_assert( is_same_v, long&&> );
+static_assert( is_same_v, long&> );
+static_assert( is_same_v, long&&> );
+
+static_assert( is_same_v, const long&&> );
+static_assert( is_same_v, const long&> );
+static_assert( is_same_v, const long&&> );
+
+static_assert( is_same_v, const long&&> );
+static_assert( is_same_v, const long&> );
+static_assert( is_same_v, const long&&> );
+
+static_assert( is_same_v, const long&&> );
+static_assert( is_same_v, const long&> );
+static_assert( is_same_v, const long&&> );
+
+static_assert( is_same_v, const long&&> );
+static_assert( is_same_v, const long&> );
+static_assert( is_same_v, const long&&> );
+
+static_assert( is_same_v, const long&&> );
+static_assert( is_same_v, const long&> );
+static_assert( is_same_v, const long&&> );
+
+static_assert( is_same_v, const long&&> );
+static_assert( is_same_v, const long&> );
+static_assert( is_same_v, const long&&> );
+
+static_assert( is_same_v, long&&> );
+static_assert( is_same_v, long&> );
+static_assert( is_same_v, long&&> );
+
+static_assert( is_same_v,
+const

Re: [PATCH] ipa: Self-DCE of uses of removed call LHSs (PR 108007)

2023-09-25 Thread Jan Hubicka

> >> PR 108007 is another manifestation where we rely on DCE to clean-up
> >> after IPA-SRA and if the user explicitely switches DCE off, IPA-SRA
> >> can leave behind statements which are fed uninitialized values and
> >> trap, even though their results are themselves never used.
> >>
> >> I have already fixed this for unused parameters in callees, this bug
> >> shows that almost the same thing can happen for removed returns, on
> >> the side of callers.  This means that the issue has to be fixed
> >> elsewhere, in call redirection.  This patch adds a function which
> >> recursivewly looks for uses of operations fed specific SSA names and
> >> removes them all.
> >>
> >> That would have been easy if it wasn't for debug statements during
> >> tree-inline (from which call redirection is also invoked).  Debug
> >> statements are decoupled from the rest at this point and iterating
> >> over uses of SSAs does not bring them up.  During tree-inline they are
> >> handled especially at the end, I assume in order to make sure that
> >> relative ordering of UIDs are the same with and without debug info.
> >>
> >> This means that during tree-inline we need to make a hash of killed
> >> SSAs, that we already have in copy_body_data, available to the
> >> function making the purging.  So the patch duly does also that, making
> >> the interface slightly ugly.
> >>
> >> Bootstrapped and tested on x86_64-linux.  OK for master?  (I am not sure
> >> the problem is grave enough to warrant backporting to release branches
> >> but can do that as well if people think I should.)
> >>
> >> Thanks,
> >>
> >> Martin
> >>
> >>
> >> gcc/ChangeLog:
> >>
> >> 2023-05-11  Martin Jambor  
> >>
> >>PR ipa/108007
> >>* cgraph.h (cgraph_edge): Add a parameter to
> >>redirect_call_stmt_to_callee.
> >>* ipa-param-manipulation.h (ipa_param_adjustments): Added a
> >>parameter to modify_call.
> >>* cgraph.cc (cgraph_edge::redirect_call_stmt_to_callee): New
> >>parameter killed_ssas, pass it to padjs->modify_call.
> >>* ipa-param-manipulation.cc (purge_transitive_uses): New function.
> >>(ipa_param_adjustments::modify_call): New parameter killed_ssas.
> >>Instead of substitutin uses, invoke purge_transitive_uses.  If
> >>hash of killed SSAs has not been provided, create a temporary one
> >>and release SSAs that have been added to it.
> >>* tree-inline.cc (redirect_all_calls): Create
> >>id->killed_new_ssa_names earlier, pass it to edge redirection,
> >>adjust a comment.
> >>(copy_body): Release SSAs in id->killed_new_ssa_names.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >> 2023-05-11  Martin Jambor  
> >>
> >>PR ipa/108007
> >>* gcc.dg/ipa/pr108007.c: New test.
> >> ---
> >>  gcc/cgraph.cc   | 10 +++-
> >>  gcc/cgraph.h|  9 ++-
> >>  gcc/ipa-param-manipulation.cc   | 85 +
> >>  gcc/ipa-param-manipulation.h|  3 +-
> >>  gcc/testsuite/gcc.dg/ipa/pr108007.c | 32 +++
> >>  gcc/tree-inline.cc  | 28 ++
> >>  6 files changed, 129 insertions(+), 38 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.dg/ipa/pr108007.c
> >>
> >> +/* Remove all statements that use NAME and transitively those that use the
> >> +   result of such statements.  KILLED_SSAS contains the SSA_NAMEs that are
> >> +   already being or have been processed and new ones need to be added to 
> >> it.
> >> +   The funtction only has to process situations handled by
> >> +   ssa_name_only_returned_p in ipa-sra.cc with the exception that it can 
> >> assume
> >> +   it must never reach a use in a return statement.  */
> >> +
> >> +static void
> >> +purge_transitive_uses (tree name, hash_set  *killed_ssas)
> >> +{
> >> +  imm_use_iterator imm_iter;
> >> +  gimple *stmt;
> >> +
> >> +  FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
> >> +{
> >> +  if (gimple_debug_bind_p (stmt))
> >> +  {
> >> +/* When runing within tree-inline, we will never end up here but
> >> +   adding the SSAs to killed_ssas will do the trick in this case and
> >> +   the respective debug statements will get reset. */
> >> +
> >> +gimple_debug_bind_reset_value (stmt);
> >> +update_stmt (stmt);
> >> +continue;
> >> +  }
> >> +
> >> +  tree lhs = NULL_TREE;
> >> +  if (is_gimple_assign (stmt))
> >> +  lhs = gimple_assign_lhs (stmt);
> >> +  else if (gimple_code (stmt) == GIMPLE_PHI)
> >> +  lhs = gimple_phi_result (stmt);
> >> +  gcc_assert (lhs
> >> +&& (TREE_CODE (lhs) == SSA_NAME)
> >> +&& !gimple_vdef (stmt));
> >> +
> >> +  if (!killed_ssas->contains (lhs))
> >> +  {
> >> +killed_ssas->add (lhs);
> >> +purge_transitive_uses (lhs, killed_ssas);

SSA graph may be deep so this may cause stack overflow, so I think we
should use worklist here (it is also easy to do).

OK with that change.
Honza

Re: [PATCH] ipa-sra: Allow IPA-SRA in presence of returns which will be removed

2023-09-25 Thread Jan Hubicka

> >> gcc/ChangeLog:
> >>
> >> 2023-08-18  Martin Jambor  
> >>
> >>PR ipa/110378
> >>* ipa-param-manipulation.cc
> >>(ipa_param_body_adjustments::mark_dead_statements): Verify that any
> >>return uses of PARAM will be removed.
> >>(ipa_param_body_adjustments::mark_clobbers_dead): Likewise.
> >>* ipa-sra.cc (isra_param_desc): New fields
> >>remove_only_when_retval_removed and split_only_when_retval_removed.
> >>(struct gensum_param_desc): Likewise.  Fix comment long line.
> >>(ipa_sra_function_summaries::duplicate): Copy the new flags.
> >>(dump_gensum_param_descriptor): Dump the new flags.
> >>(dump_isra_param_descriptor): Likewise.
> >>(isra_track_scalar_value_uses): New parameter desc.  Set its flag
> >>remove_only_when_retval_removed when encountering a simple return.
> >>(isra_track_scalar_param_local_uses): Replace parameter call_uses_p
> >>with desc.  Pass it to isra_track_scalar_value_uses and set its
> >>call_uses.
> >>(ptr_parm_has_nonarg_uses): Accept parameter descriptor as a
> >>parameter.  If there is a direct return use, mark any..
> >>(create_parameter_descriptors): Pass the whole parameter descriptor to
> >>isra_track_scalar_param_local_uses and ptr_parm_has_nonarg_uses.
> >>(process_scan_results): Copy the new flags.
> >>(isra_write_node_summary): Stream the new flags.
> >>(isra_read_node_info): Likewise.
> >>(adjust_parameter_descriptions): Check that transformations
> >>requring return removal only happen when return value is removed.
> >>Restructure main loop.  Adjust dump message.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >> 2023-08-18  Martin Jambor  
> >>
> >>PR ipa/110378
> >>* gcc.dg/ipa/ipa-sra-32.c: New test.
> >>* gcc.dg/ipa/pr110378-4.c: Likewise.
> >>* gcc.dg/ipa/ipa-sra-4.c: Use a return value.
> >> ---
> >>  gcc/ipa-param-manipulation.cc |   7 +-
> >>  gcc/ipa-sra.cc| 247 +-
> >>  gcc/testsuite/gcc.dg/ipa/ipa-sra-32.c |  30 
> >>  gcc/testsuite/gcc.dg/ipa/ipa-sra-4.c  |   4 +-
> >>  gcc/testsuite/gcc.dg/ipa/pr110378-4.c |  50 ++
> >>  5 files changed, 251 insertions(+), 87 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-sra-32.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/ipa/pr110378-4.c
> >>

OK,
Thanks
Honza

Re: [PATCH] LoongArch: doc: Update -m[no-]explicit-relocs for r14-4160

2023-09-25 Thread Xi Ruoyao

On Mon, 2023-09-25 at 16:26 +0800, chenglulu wrote:
> LGTM!
> 
> Thank you for your modification!

Pushed r14-4250.

> 在 2023/9/25 下午4:13, Xi Ruoyao 写道:
> > gcc/ChangeLog:
> > 
> > * doc/invoke.texi: Update -m[no-]explicit-relocs for r14-4160.
> > ---
> > 
> > I've not regtested this as it's only a doc change.  Ok for trunk?
> > 
> >   gcc/doc/invoke.texi | 10 ++
> >   1 file changed, 6 insertions(+), 4 deletions(-)
> > 
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index ba7984bcb7e..146b40414b0 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -26159,10 +26159,12 @@ The default code model is @code{normal}.
> >   @itemx -mno-explicit-relocs
> >   Use or do not use assembler relocation operators when dealing with 
> > symbolic
> >   addresses.  The alternative is to use assembler macros instead, which may
> > -limit optimization.  The default value for the option is determined during
> > -GCC build-time by detecting corresponding assembler support:
> > -@code{-mexplicit-relocs} if said support is present,
> > -@code{-mno-explicit-relocs} otherwise.  This option is mostly useful for
> > +limit instruction scheduling but allow linker relaxation.  The default
> > +value for the option is determined during GCC build-time by detecting
> > +corresponding assembler support:
> > +@code{-mno-explicit-relocs} if the assembler supports relaxation or it
> > +does not support relocation operators at all,
> > +@code{-mexplicit-relocs} otherwise.  This option is mostly useful for
> >   debugging, or interoperation with assemblers different from the build-time
> >   one.
> >   
> 

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [PATCH] LoongArch: doc: Update -m[no-]explicit-relocs for r14-4160

2023-09-25 Thread chenglulu


LGTM!

Thank you for your modification!

在 2023/9/25 下午4:13, Xi Ruoyao 写道:

gcc/ChangeLog:

* doc/invoke.texi: Update -m[no-]explicit-relocs for r14-4160.
---

I've not regtested this as it's only a doc change.  Ok for trunk?

  gcc/doc/invoke.texi | 10 ++
  1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ba7984bcb7e..146b40414b0 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -26159,10 +26159,12 @@ The default code model is @code{normal}.
  @itemx -mno-explicit-relocs
  Use or do not use assembler relocation operators when dealing with symbolic
  addresses.  The alternative is to use assembler macros instead, which may
-limit optimization.  The default value for the option is determined during
-GCC build-time by detecting corresponding assembler support:
-@code{-mexplicit-relocs} if said support is present,
-@code{-mno-explicit-relocs} otherwise.  This option is mostly useful for
+limit instruction scheduling but allow linker relaxation.  The default
+value for the option is determined during GCC build-time by detecting
+corresponding assembler support:
+@code{-mno-explicit-relocs} if the assembler supports relaxation or it
+does not support relocation operators at all,
+@code{-mexplicit-relocs} otherwise.  This option is mostly useful for
  debugging, or interoperation with assemblers different from the build-time
  one.

[PATCH] LoongArch: doc: Update -m[no-]explicit-relocs for r14-4160

2023-09-25 Thread Xi Ruoyao

gcc/ChangeLog:

* doc/invoke.texi: Update -m[no-]explicit-relocs for r14-4160.
---

I've not regtested this as it's only a doc change.  Ok for trunk?

 gcc/doc/invoke.texi | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ba7984bcb7e..146b40414b0 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -26159,10 +26159,12 @@ The default code model is @code{normal}.
 @itemx -mno-explicit-relocs
 Use or do not use assembler relocation operators when dealing with symbolic
 addresses.  The alternative is to use assembler macros instead, which may
-limit optimization.  The default value for the option is determined during
-GCC build-time by detecting corresponding assembler support:
-@code{-mexplicit-relocs} if said support is present,
-@code{-mno-explicit-relocs} otherwise.  This option is mostly useful for
+limit instruction scheduling but allow linker relaxation.  The default
+value for the option is determined during GCC build-time by detecting
+corresponding assembler support:
+@code{-mno-explicit-relocs} if the assembler supports relaxation or it
+does not support relocation operators at all,
+@code{-mexplicit-relocs} otherwise.  This option is mostly useful for
 debugging, or interoperation with assemblers different from the build-time
 one.
 
-- 
2.42.0

Re: [PATCH, rs6000] Enable vector compare for 16-byte memory equality compare [PR111449]

2023-09-25 Thread Kewen.Lin

Hi,

on 2023/9/20 16:49, HAO CHEN GUI wrote:
> Hi,
>   This patch enables vector compare for 16-byte memory equality compare.
> The 16-byte memory equality compare can be efficiently implemented by
> instruction "vcmpequb." It reduces one branch and one compare compared
> with two 8-byte compare sequence.

It looks nice to exploit vcmpequb. for this comparison.

> 
>   16-byte vector compare is not enabled on 32bit sub-targets as TImode
> hasn't been supported well on 32bit sub-targets.

But it sounds weird to say it is with TImode but the underlying instruction
is V16QImode.  This does NOT necessarily depend on TImode, so if it's coded
with V16QImode it would not suffer this unsupported issue.

The reason why you hacked with TImode seems that the generic part of code
only considers the scalar mode?  I wonder if we can extend the generic code
to consider the vector mode as well.  It also makes thing better if we will
have wider vector mode one day.

I guess there is no blocking/limitation for not considering vector modes?
CC some experts.

BR,
Kewen

> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> 
> Thanks
> Gui Haochen
> 
> ChangeLog
> rs6000: Enable vector compare for 16-byte memory equality compare
> 
> gcc/
>   PR target/111449
>   * config/rs6000/altivec.md (cbranchti4): New expand pattern.
>   * config/rs6000/rs6000.cc (rs6000_generate_compare): Generate insn
>   sequence for TImode vector equality compare.
>   * config/rs6000/rs6000.h (MOVE_MAX_PIECES): Define.
>   (COMPARE_MAX_PIECES): Define.
> 
> gcc/testsuite/
>   PR target/111449
>   * gcc.target/powerpc/pr111449.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index e8a596fb7e9..99264235cbe 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2605,6 +2605,24 @@ (define_insn "altivec_vupklpx"
>  }
>[(set_attr "type" "vecperm")])
> 
> +(define_expand "cbranchti4"
> +  [(use (match_operator 0 "equality_operator"
> + [(match_operand:TI 1 "memory_operand")
> +  (match_operand:TI 2 "memory_operand")]))
> +   (use (match_operand 3))]
> +  "VECTOR_UNIT_ALTIVEC_P (V16QImode)"
> +{
> +  rtx op1 = simplify_subreg (V16QImode, operands[1], TImode, 0);
> +  rtx op2 = simplify_subreg (V16QImode, operands[2], TImode, 0);
> +  operands[1] = force_reg (V16QImode, op1);
> +  operands[2] = force_reg (V16QImode, op2);
> +  rtx_code code = GET_CODE (operands[0]);
> +  operands[0] = gen_rtx_fmt_ee (code, V16QImode, operands[1],
> + operands[2]);
> +  rs6000_emit_cbranch (TImode, operands);
> +  DONE;
> +})
> +
>  ;; Compare vectors producing a vector result and a predicate, setting CR6 to
>  ;; indicate a combined status
>  (define_insn "altivec_vcmpequ_p"
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index efe9adce1f8..c6b935a64e7 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -15264,6 +15264,15 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
> else
>   emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
>   }
> +  else if (mode == TImode)
> + {
> +   gcc_assert (code == EQ || code == NE);
> +
> +   rtx result_vector = gen_reg_rtx (V16QImode);
> +   compare_result = gen_rtx_REG (CCmode, CR6_REGNO);
> +   emit_insn (gen_altivec_vcmpequb_p (result_vector, op0, op1));
> +   code = (code == NE) ? GE : LT;
> + }
>else
>   emit_insn (gen_rtx_SET (compare_result,
>   gen_rtx_COMPARE (comp_mode, op0, op1)));
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 3503614efbd..dc33bca0802 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -1730,6 +1730,8 @@ typedef struct rs6000_args
> in one reasonably fast instruction.  */
>  #define MOVE_MAX (! TARGET_POWERPC64 ? 4 : 8)
>  #define MAX_MOVE_MAX 8
> +#define MOVE_MAX_PIECES (!TARGET_POWERPC64 ? 4 : 16)
> +#define COMPARE_MAX_PIECES (!TARGET_POWERPC64 ? 4 : 16)
> 
>  /* Nonzero if access to memory by bytes is no faster than for words.
> Also nonzero if doing byte operations (specifically shifts) in registers
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr111449.c 
> b/gcc/testsuite/gcc.target/powerpc/pr111449.c
> new file mode 100644
> index 000..ab9583f47bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr111449.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-options "-maltivec -O2" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> +
> +/* Ensure vector comparison is used for 16-byte memory equality compare.  */
> +
> +int compare (const char* s1, const char* s2)
> +{
> +  return __builtin_memcmp (s1, s2, 16) == 0;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvcmpequb\M} 1 } } */
>

55 matches

Mail list logo