[PATCH v2] xtensa: Optimize '(x & CST1_POW2) != 0 ? CST2_POW2 : 0'

2023-05-22 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/05/23 11:27, Max Filippov wrote:
> Hi Suwa-san,

Hi!

> This change introduces a bunch of test failures on big endian configuration.
> I believe that's because the starting bit position for zero_extract is counted
> from different ends depending on the endianness.

Oops, what a stupid mistake... X(

===
This patch decreses one machine instruction from "single bit extraction
with shifting" operation, and tries to eliminate the conditional
branch if CST2_POW2 doesn't fit into signed 12 bits with the help
of ifcvt optimization.

/* example #1 */
int test0(int x) {
  return (x & 1048576) != 0 ? 1024 : 0;
}
extern int foo(void);
int test1(void) {
  return (foo() & 1048576) != 0 ? 16777216 : 0;
}

;; before
test0:
movia9, 0x400
sraia2, a2, 10
and a2, a2, a9
ret.n
test1:
addisp, sp, -16
s32i.n  a0, sp, 12
call0   foo
extui   a2, a2, 20, 1
sllia2, a2, 20
beqz.n  a2, .L2
movi.n  a2, 1
sllia2, a2, 24
.L2:
l32i.n  a0, sp, 12
addisp, sp, 16
ret.n

;; after
test0:
extui   a2, a2, 20, 1
sllia2, a2, 10
ret.n
test1:
addisp, sp, -16
s32i.n  a0, sp, 12
call0   foo
l32i.n  a0, sp, 12
extui   a2, a2, 20, 1
sllia2, a2, 24
addisp, sp, 16
ret.n

In addition, if the left shift amount ('exact_log2(CST2_POW2)') is
between 1 through 3 and a either addition or subtraction with another
register follows, emit a ADDX[248] or SUBX[248] machine instruction
instead of separate left shift and add/subtract ones.

/* example #2 */
int test2(int x, int y) {
  return ((x & 1048576) != 0 ? 4 : 0) + y;
}
int test3(int x, int y) {
  return ((x & 2) != 0 ? 8 : 0) - y;
}

;; before
test2:
movi.n  a9, 4
sraia2, a2, 18
and a2, a2, a9
add.n   a2, a2, a3
ret.n
test3:
movi.n  a9, 8
sllia2, a2, 2
and a2, a2, a9
sub a2, a2, a3
ret.n

;; after
test2:
extui   a2, a2, 20, 1
addx4   a2, a2, a3
ret.n
test3:
extui   a2, a2, 1, 1
subx8   a2, a2, a3
ret.n

gcc/ChangeLog:

* config/xtensa/predicates.md (addsub_operator): New.
* config/xtensa/xtensa.md (*extzvsi-1bit_ashlsi3,
*extzvsi-1bit_addsubx): New insn_and_split patterns.
* config/xtensa/xtensa.cc (xtensa_rtx_costs):
Add a special case about ifcvt 'noce_try_cmove()' to handle
constant loads that do not fit into signed 12 bits in the
patterns added above.
---
 gcc/config/xtensa/predicates.md |  3 ++
 gcc/config/xtensa/xtensa.cc |  3 +-
 gcc/config/xtensa/xtensa.md | 83 +
 3 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index 2dac193373a..5faf1be8c15 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -191,6 +191,9 @@
 (define_predicate "logical_shift_operator"
   (match_code "ashift,lshiftrt"))
 
+(define_predicate "addsub_operator"
+  (match_code "plus,minus"))
+
 (define_predicate "xtensa_cstoresi_operator"
   (match_code "eq,ne,gt,ge,lt,le"))
 
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index bb1444c44b6..e3af78cd228 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4355,7 +4355,8 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
   switch (outer_code)
{
case SET:
- if (xtensa_simm12b (INTVAL (x)))
+ if (xtensa_simm12b (INTVAL (x))
+ || (current_pass && current_pass->tv_id == TV_IFCVT))
{
  *total = speed ? COSTS_N_INSNS (1) : 0;
  return true;
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 3521fa33b47..c75fde1023a 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -997,6 +997,89 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+(define_insn_and_split "*extzvsi-1bit_ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (and:SI (match_operator:SI 4 "logical_shift_operator"
+   [(match_operand:SI 1 "register_operand" "r")
+(match_operand:SI 2 "const_int_operand" "i")])
+   (match_operand:SI 3 "const_int_operand" "i")))]
+  "exact_log2 (INTVAL (operands[3])) > 0"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (zero_extract:SI (match_dup 1)
+(const_int 1)
+(match_dup 2)))
+   (set (match_dup 0)
+   (ashift:SI (match_dup 0)
+  (match_dup 3)))]
+{
+  int pos = INTVAL (operands[2]),
+  shift = floor_log2 (INTVAL (operands[3]));
+ 

Re: [PATCH v2] rs6000: Add buildin for mffscrn instructions

2023-05-22 Thread Kewen.Lin via Gcc-patches
on 2023/5/23 01:31, Carl Love wrote:
> On Mon, 2023-05-22 at 14:36 +0800, Kewen.Lin wrote:
>> Hi Carl,
>>
>> on 2023/5/19 05:12, Carl Love via Gcc-patches wrote:
>>> GCC maintainers:
>>>
>>> version 2.  Fixed an issue with the test case.  The dg-options line
>>> was
>>> missing.
>>>
>>> The following patch adds an overloaded builtin.  There are two
>>> possible
>>> arguments for the builtin.  The builtin definitions are:
>>>
>>>   double __builtin_mffscrn (unsigned long int);
>>>   double __builtin_mffscrn (double);
>>>
>>
>> We already have one  bif __builtin_set_fpscr_rn for RN setting,
>> apparently
>> these two are mainly for direct mapping to mffscr[ni] and want the
>> FPSCR bits.
>> I'm curious what's the requirements requesting these two built-in
>> functions?
> 
> The builtins were requested for use in GLibC.  As of version 2.31 they
> were added as inline asm.  They requested a builtin so the asm could be
> removed.
> 

OK, thanks for the information.

>>
>>> The patch has been tested on Power 10 with no regressions.  
>>>
>>> Please let me know if the patch is acceptable for
>>> mainline.  Thanks.
>>>
>>> Carl
>>>
>>> 
>>> rs6000: Add buildin for mffscrn instructions
>>>
>>
>> s/buildin/built-in/
> 
> fixed
>>
>>> This patch adds overloaded __builtin_mffscrn for the move From
>>> FPSCR
>>> Control & Set R instruction with an immediate argument.  It also
>>> adds the
>>> builtin with a floating point register argument.  A new runnable
>>> test is
>>> added for the new builtin.
>>
>> s/Set R/Set RN/
> 
> fixed
> 
>>> gcc/
>>>
>>> * config/rs6000/rs6000-builtins.def (__builtin_mffscrni,
>>> __builtin_mffscrnd): Add builtin definitions.
>>> * config/rs6000/rs6000-overload.def (__builtin_mffscrn): Add
>>> overloaded definition.
>>> * doc/extend.texi: Add documentation for __builtin_mffscrn.
>>>
>>> gcc/testsuite/
>>>
>>> * gcc.target/powerpc/builtin-mffscrn.c: Add testcase for new
>>> builtin.
>>> ---
>>>  gcc/config/rs6000/rs6000-builtins.def |   7 ++
>>>  gcc/config/rs6000/rs6000-overload.def |   5 +
>>>  gcc/doc/extend.texi   |   8 ++
>>>  .../gcc.target/powerpc/builtin-mffscrn.c  | 106
>>> ++
>>>  4 files changed, 126 insertions(+)
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/builtin-
>>> mffscrn.c
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-builtins.def
>>> b/gcc/config/rs6000/rs6000-builtins.def
>>> index 92d9b46e1b9..67125473684 100644
>>> --- a/gcc/config/rs6000/rs6000-builtins.def
>>> +++ b/gcc/config/rs6000/rs6000-builtins.def
>>> @@ -2875,6 +2875,13 @@
>>>pure vsc __builtin_vsx_xl_len_r (void *, signed long);
>>>  XL_LEN_R xl_len_r {}
>>>  
>>> +; Immediate instruction only uses the least significant two bits
>>> of the
>>> +; const int.
>>> +  double __builtin_mffscrni (const int<2>);
>>> +MFFSCRNI rs6000_mffscrni {}
>>> +
>>> +  double __builtin_mffscrnd (double);
>>> +MFFSCRNF rs6000_mffscrn {}
>>>  
>>
>> Why are them put in [power9-64] rather than [power9]?  IMHO [power9]
>> is the
>> correct stanza for them.
> 
> Moved them to power 9 stanza.
> 
>>   Besides, {nosoft} attribute is required.
> 
> OK, added that.  I was trying to figure out why nosoft is needed.  The
> instructions are manipulating bits in a physical register that controls
> the hardware floating point instructions.  It looks to me like that
> would be why.  Because if you were using msoft float then the floating
> point HW registers are disabled and the floating point operations are
> done using software.  Did I figure this out correctly?

Yes, and also the destination of these two instructions is hardware float
register, its relatives mffs and mffsl have that as well.

> 
>  
>>
>>>  ; Builtins requiring hardware support for IEEE-128 floating-point.
>>>  [ieee128-hw]
>>> diff --git a/gcc/config/rs6000/rs6000-overload.def
>>> b/gcc/config/rs6000/rs6000-overload.def
>>> index c582490c084..adda2df69ea 100644
>>> --- a/gcc/config/rs6000/rs6000-overload.def
>>> +++ b/gcc/config/rs6000/rs6000-overload.def
>>> @@ -78,6 +78,11 @@
>>>  ; like after a required newline, but nowhere else.  Lines
>>> beginning with
>>>  ; a semicolon are also treated as blank lines.
>>>  
>>> +[MFFSCR, __builtin_mffscrn, __builtin_mffscrn]
>>> +  double __builtin_mffscrn (const int<2>);
>>> +MFFSCRNI
>>> +  double __builtin_mffscrn (double);
>>> +MFFSCRNF
>>>  
>>>  [BCDADD, __builtin_bcdadd, __builtin_vec_bcdadd]
>>>vsq __builtin_vec_bcdadd (vsq, vsq, const int);
>>> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
>>> index ed8b9c8a87b..f16c046051a 100644
>>> --- a/gcc/doc/extend.texi
>>> +++ b/gcc/doc/extend.texi
>>> @@ -18455,6 +18455,9 @@ int __builtin_dfp_dtstsfi_ov_td (unsigned
>>> int comparison, _Decimal128 value);
>>>  
>>>  double __builtin_mffsl(void);
>>>  
>>> +double __builtin_mffscrn (unsigned long int);
>>> +double 

[PATCH] RISC-V: Add the option "-mdisable-multilib-check" to avoid multilib checks breaking the compilation.

2023-05-22 Thread Jin Ma via Gcc-patches
When testing a extension, it is often necessary for a certain program not to
need some kind of extension, such as the bitmanip extension, to evaluate the
performance or codesize of the extension. However, the current multilib rules
will report an error when it is not a superset of the MULTILIB_REQUIRED list,
which will cause the program to be unable to link normally, thus failing to
achieve the expected purpose.

Therefore, the compilation option is added to avoid riscv_multi_lib_check()
interruption of compilation.

gcc/ChangeLog:

* config/riscv/elf.h (LIB_SPEC): Do not run riscv_multi_lib_check() when
-mdisable-multilib-check.
* config/riscv/riscv.opt: New.
---
 gcc/config/riscv/elf.h | 2 +-
 gcc/config/riscv/riscv.opt | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/elf.h b/gcc/config/riscv/elf.h
index 4b7e5c988ca..afde1b12d36 100644
--- a/gcc/config/riscv/elf.h
+++ b/gcc/config/riscv/elf.h
@@ -29,7 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef  LIB_SPEC
 #define LIB_SPEC \
   "--start-group -lc %{!specs=nosys.specs:-lgloss} --end-group " \
-  
"%{!nostartfiles:%{!nodefaultlibs:%{!nolibc:%{!nostdlib:%:riscv_multi_lib_check()"
+  
"%{!mdisable-multilib-check:%{!nostartfiles:%{!nodefaultlibs:%{!nolibc:%{!nostdlib:%:riscv_multi_lib_check()}"
 
 #undef  STARTFILE_SPEC
 #define STARTFILE_SPEC "crt0%O%s crtbegin%O%s"
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 63d4710cb15..9940a24a7f9 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -295,3 +295,7 @@ Enum(riscv_autovec_lmul) String(m8) Value(RVV_M8)
 -param=riscv-autovec-lmul=
 Target RejectNegative Joined Enum(riscv_autovec_lmul) Var(riscv_autovec_lmul) 
Init(RVV_M1)
 -param=riscv-autovec-lmul= Set the RVV LMUL of auto-vectorization 
in the RISC-V port.
+
+mdisable-multilib-check
+Target Bool Var(riscv_disable_multilib_check) Init(0)
+Disable multilib checking by riscv_multi_lib_check().
-- 
2.17.1



Re: [PATCH] RISC-V: Add missing torture-init and torture-finish for rvv.exp

2023-05-22 Thread Vineet Gupta

On 5/22/23 02:17, Kito Cheng wrote:

Ooops, seems still some issue around here,


Yep still 5000 fails :-(


  but I found something might
related this issue:

https://github.com/gcc-mirror/gcc/commit/d6654a4be3ba44c0d57be7c8a51d76d9721345e1
https://github.com/gcc-mirror/gcc/commit/23c49bb8d09bc3bfce9a08be637cf32ac014de56


It seems both of these patches are essentially doing what yours did. So 
something else is amiss still.


Thx,
-Vineet



On Mon, May 22, 2023 at 2:42 PM Kito Cheng  wrote:

Hi Vineet:

Could you help to test this patch, this could resolve that issue on our
machine, but I would like to also work for other env.

Thanks :)

---

We got bunch of following error message for multi-lib run:

ERROR: torture-init: torture_without_loops is not empty as expected
ERROR: tcl error code NONE

And seems we need torture-init and torture-finish around the test
loop.

gcc/testsuite/ChangeLog:

 * gcc.target/riscv/rvv/rvv.exp: Add torture-init and
 torture-finish.
---
  gcc/testsuite/gcc.target/riscv/rvv/rvv.exp | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp 
b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
index bc99cc0c3cf4..19179564361a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
+++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
@@ -39,6 +39,7 @@ if [istarget riscv32-*-*] then {

  # Initialize `dg'.
  dg-init
+torture-init

  # Main loop.
  set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3"
@@ -69,5 +70,7 @@ foreach op $AUTOVEC_TEST_OPTS {
  dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
 "-std=c99 -O3 -ftree-vectorize --param 
riscv-autovec-preference=fixed-vlmax" $CFLAGS

+torture-finish
+
  # All done.
  dg-finish
--
2.40.1





Re: [PATCH] rs6000: Fix __builtin_vec_xst_trunc definition

2023-05-22 Thread Kewen.Lin via Gcc-patches
on 2023/5/23 03:50, Carl Love wrote:
> On Mon, 2023-05-22 at 17:04 +0800, Kewen.Lin wrote:
>> Hi Carl,
>>
>> on 2023/5/11 02:06, Carl Love via Gcc-patches wrote:
>>> GCC maintainers:
>>>
>>> The following patch fixes errors in the arguments in the
>>> __builtin_altivec_tr_stxvrhx,   __builtin_altivec_tr_stxvrwx
>>> builtin
>>> definitions.  Note, these builtins are used by the overloaded
>>> __builtin_vec_xst_trunc builtin.
>>>
>>> The patch adds a new overloaded builtin definition for
>>> __builtin_vec_xst_trunc for the third argument to be unsigned and
>>> signed long int.
>>>
>>> A new testcase is added for the various overloaded versions of
>>> __builtin_vec_xst_trunc.
>>>
>>> The patch has been tested on Power 10 with no new regressions.
>>>
>>> Please let me know if the patch is acceptable for
>>> mainline.  Thanks.
>>>
>>> Carl
>>>
>>> ---
>>> rs6000: Fix __builtin_vec_xst_trunc definition
>>>
>>> Built-in __builtin_vec_xst_trunc calls __builtin_altivec_tr_stxvrhx
>>> and __builtin_altivec_tr_stxvrwx to handle the short and word
>>> cases.  The
>>> arguments for these two builtins are wrong.  This patch fixes the
>>> wrong
>>> arguments for the builtins.
>>>
>>> Additionally, the patch adds a new __builtin_vec_xst_trunc
>>> overloaded
>>> version for the destination being signed or unsigned long int.
>>>
>>> A runnable test case is added to test each of the overloaded
>>> definitions
>>> of __builtin_vec_xst_tru
>>>
>>> gcc/
>>> * config/rs6000/builtins.def (__builtin_altivec_tr_stxvrhx,
>>> __builtin_altivec_tr_stxvrwx): Fix type of second argument.
>>> Add, definition for send argument to be signed long.
>>> * config/rs6000/rs6000-overload.def (__builtin_vec_xst_trunc):
>>> add definition with thrird arument signed and unsigned long.
>>> * doc/extend.texi (__builtin_vec_xst_trunc): Add documentation
>>> for
>>> new unsinged long and signed long versions.
>>>
>>> gcc/testsuite/
>>> * gcc.target/powerpc/vsx-builtin-vec_xst_trunc.c: New test case
>>> for __builtin_vec_xst_trunc builtin.
>>> ---
>>>  gcc/config/rs6000/rs6000-builtins.def |   7 +-
>>>  gcc/config/rs6000/rs6000-overload.def |   4 +
>>>  gcc/doc/extend.texi   |   2 +
>>>  .../powerpc/vsx-builtin-vec_xst_trunc.c   | 217
>>> ++
>>>  4 files changed, 228 insertions(+), 2 deletions(-)
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-
>>> vec_xst_trunc.c
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-builtins.def
>>> b/gcc/config/rs6000/rs6000-builtins.def
>>> index 638d0bc72ca..a378491b358 100644
>>> --- a/gcc/config/rs6000/rs6000-builtins.def
>>> +++ b/gcc/config/rs6000/rs6000-builtins.def
>>> @@ -3161,12 +3161,15 @@
>>>void __builtin_altivec_tr_stxvrbx (vsq, signed long, signed char
>>> *);
>>>  TR_STXVRBX vsx_stxvrbx {stvec}
>>>  
>>> -  void __builtin_altivec_tr_stxvrhx (vsq, signed long, signed int
>>> *);
>>> +  void __builtin_altivec_tr_stxvrhx (vsq, signed long, signed
>>> short *);
>>>  TR_STXVRHX vsx_stxvrhx {stvec}
>>>  
>>> -  void __builtin_altivec_tr_stxvrwx (vsq, signed long, signed
>>> short *);
>>> +  void __builtin_altivec_tr_stxvrwx (vsq, signed long, signed int
>>> *);
>>>  TR_STXVRWX vsx_stxvrwx {stvec}
>>
>> Good catching!
>>
>>>  
>>> +  void __builtin_altivec_tr_stxvrlx (vsq, signed long, signed long
>>> *);
>>> +TR_STXVRLX vsx_stxvrdx {stvec}
>>> +
>>
>> This is mapped to the one used for type long long, it's a hard
>> mapping,
>> IMHO it's wrong and not consistent with what the users expect, since
>> on Power
>> the size of type long int is 4 bytes at -m32 while 8 bytes at -m64,
>> this
>> implementation binding to 8 bytes can cause trouble in 32-bit.  I
>> wonder if
>> it's a good idea to add one overloaded version for type long int, for
>> now
>> openxl also emits error message for long int type pointer (see its
>> doc [1]),
>> users can use casting to make it to the acceptable pointer types
>> (long long
>> or int as its size).
>>
>> [1] 
>> https://www.ibm.com/docs/en/openxl-c-and-cpp-lop/17.1.1?topic=functions-vec-xst-trunc
>>
>>
> 
> If I understand this correctly, the "signed long" is mapped to type
> "long long int"?  Just curious, where is the mapping done?

Sorry for the confusion, the mapping here is for your implementation,

>>> +  void __builtin_altivec_tr_stxvrlx (vsq, signed long, signed long
>>> *);
>>> +TR_STXVRLX vsx_stxvrdx {stvec}

you used the one **vsx_stxvrdx** which is for "signed long long int"
(doubleword, exactly 8 bytes size element), so I said you have a hard
mapping, takes "signed long int" as "signed long long int".

But again type signed long int isn't guaranteed to be with the same size
as type signed long long int, on Power it has the same size as int (4 bytes)
on 32-bit env, while has the same size as long long (8 bytes) on 64-bit
env, since your test case is guarded with power10, 

Re: [PATCH 2/2] vect: Enhance cost evaluation in vect_transform_slp_perm_load_1

2023-05-22 Thread Kewen.Lin via Gcc-patches
Hi Richi,

Thanks for the review!

on 2023/5/22 21:44, Richard Biener wrote:
> On Wed, May 17, 2023 at 8:15 AM Kewen.Lin  wrote:
>>
>> Hi,
>>
>> Following Richi's suggestion in [1], I'm working on deferring
>> cost evaluation next to the transformation, this patch is
>> to enhance function vect_transform_slp_perm_load_1 which
>> could under-cost for vector permutation, since the costing
>> doesn't try to consider nvectors_per_build, it's inconsistent
>> with the transformation part.
>>
>> Bootstrapped and regtested on x86_64-redhat-linux,
>> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>>
>> Is it ok for trunk?
>>
>> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563624.html
>>
>> BR,
>> Kewen
>> -
>> gcc/ChangeLog:
>>
>> * tree-vect-slp.cc (vect_transform_slp_perm_load_1): Adjust the
>> calculation on n_perms by considering nvectors_per_build.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c: New test.
>> ---
>>  .../vect/costmodel/ppc/costmodel-slp-perm.c   | 23 +++
>>  gcc/tree-vect-slp.cc  | 66 ++-
>>  2 files changed, 57 insertions(+), 32 deletions(-)
>>  create mode 100644 
>> gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c 
>> b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c
>> new file mode 100644
>> index 000..e5c4dceddfb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c
>> @@ -0,0 +1,23 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-require-effective-target powerpc_p9vector_ok } */
>> +/* Specify power9 to ensure the vectorization is profitable
>> +   and test point stands, otherwise it could be not profitable
>> +   to vectorize.  */
>> +/* { dg-additional-options "-mdejagnu-cpu=power9 -mpower9-vector" } */
>> +
>> +/* Verify we cost the exact count for required vec_perm.  */
>> +
>> +int x[1024], y[1024];
>> +
>> +void
>> +foo ()
>> +{
>> +  for (int i = 0; i < 512; ++i)
>> +{
>> +  x[2 * i] = y[1023 - (2 * i)];
>> +  x[2 * i + 1] = y[1023 - (2 * i + 1)];
>> +}
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "2 times vec_perm" 1 "vect" } } */
>> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
>> index e5c9d7e766e..af9a6dd4fa9 100644
>> --- a/gcc/tree-vect-slp.cc
>> +++ b/gcc/tree-vect-slp.cc
>> @@ -8115,12 +8115,12 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, 
>> slp_tree node,
>>
>>mode = TYPE_MODE (vectype);
>>poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
>> +  unsigned int nstmts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
>>
>>/* Initialize the vect stmts of NODE to properly insert the generated
>>   stmts later.  */
>>if (! analyze_only)
>> -for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
>> -i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
>> +for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); i < nstmts; i++)
>>SLP_TREE_VEC_STMTS (node).quick_push (NULL);
>>
>>/* Generate permutation masks for every NODE. Number of masks for each 
>> NODE
>> @@ -8161,7 +8161,10 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, 
>> slp_tree node,
>>  (b) the permutes only need a single vector input.  */
>>mask.new_vector (nunits, group_size, 3);
>>nelts_to_build = mask.encoded_nelts ();
>> -  nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
>> +  /* It's possible to obtain zero nstmts during analyze_only, so make
>> +it at least one to ensure the later computation for n_perms
>> +proceed.  */
>> +  nvectors_per_build = nstmts > 0 ? nstmts : 1;
>>in_nlanes = DR_GROUP_SIZE (stmt_info) * 3;
>>  }
>>else
>> @@ -8252,40 +8255,39 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, 
>> slp_tree node,
>>   return false;
>> }
>>
>> - ++*n_perms;
>> -
>> + tree mask_vec = NULL_TREE;
>>   if (!analyze_only)
>> -   {
>> - tree mask_vec = vect_gen_perm_mask_checked (vectype, 
>> indices);
>> +   mask_vec = vect_gen_perm_mask_checked (vectype, indices);
>>
>> - if (second_vec_index == -1)
>> -   second_vec_index = first_vec_index;
>> + if (second_vec_index == -1)
>> +   second_vec_index = first_vec_index;
>>
>> - for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
>> + for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
>> +   {
>> + ++*n_perms;
> 
> So the "real" change is doing
> 
>   *n_perms += nvectors_per_build;
> 
> and *n_perms was unused when !analyze_only?  And since at

Yes, although both !analyze_only and analyze_only calls pass n_perms in, now
only the call sites with analyze_only will use 

Re: [PATCH v4] libgfortran: Replace mutex with rwlock

2023-05-22 Thread Zhu, Lipeng via Gcc-patches




On 5/16/2023 3:08 PM, Zhu, Lipeng wrote:



On 5/9/2023 10:32 AM, Zhu, Lipeng wrote:



On 1/1/1970 8:00 AM, Bernhard Reutner-Fischer wrote:

On Mon,  8 May 2023 17:44:43 +0800
Lipeng Zhu  wrote:


This patch try to introduce the rwlock and split the read/write to
unit_root tree and unit_cache with rwlock instead of the mutex to
increase CPU efficiency. In the get_gfc_unit function, the percentage
to step into the insert_unit function is around 30%, in most
instances, we can get the unit in the phase of reading the unit_cache
or unit_root tree. So split the read/write phase by rwlock would be an
approach to make it more parallel.

BTW, the IPC metrics can gain around 9x in our test server with 220
cores. The benchmark we used is https://github.com/rwesson/NEAT


See commentary typos below.
You did not state if you regression tested the patch?
I use valgrind --tool=helgrind or --tool=drd to test 'make 
check-fortran'. Is it necessary to add an additional unit test for 
this patch?



Other than that it LGTM but i cannot approve it.
Thank you for your kind help for this patch, is there anything that I 
can do or can you help to push this patch forward?



Hi Bernhard,

Is there any other refinement that need I to do for this patch?

Thanks.




May I know any comment or concern on this patch, thanks for your time :)




diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h index
ad226c8e856..0033cc74252 100644
--- a/libgfortran/io/async.h
+++ b/libgfortran/io/async.h
@@ -210,6 +210,128 @@
  DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s 
%78p\n", aio_prefix, #mutex,

mutex); \

Thanks, corrected in Patch v5.


    } while (0)
+#ifdef __GTHREAD_RWLOCK_INIT
+#define RWLOCK_DEBUG_ADD(rwlock) do {    \
+    aio_rwlock_debug *n;    \
+    n = xmalloc (sizeof(aio_rwlock_debug));    \


Missing space before the open brace: sizeof (


Thanks, corrected in Patch v5.


diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c index
82664dc5f98..62f1db21d34 100644
--- a/libgfortran/io/unit.c
+++ b/libgfortran/io/unit.c
@@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME
respectively.  If not, see
  /* IO locking rules:
-   UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and 
UNIT_CACHE.
+   UNIT_RWLOCK is a master lock, protecting UNIT_ROOT tree and 
UNIT_CACHE.

+   And use the rwlock to spilt read and write phase to UNIT_ROOT tree
+   and UNIT_CACHE to increase CPU efficiency.


s/spilt/split. Maybe:

Using an rwlock improves efficiency by allowing us to separate 
readers and writers of both UNIT_ROOT

and UNIT_CACHE.


Thanks, corrected in Patch v5.


@@ -350,6 +356,17 @@ retry:
    if (c == 0)
  break;
  }
+  /* We did not find a unit in the cache nor in the unit list, 
create a new

+    (locked) unit and insert into the unit list and cache.
+    Manipulating either or both the unit list and the unit cache 
requires to

+    hold a write-lock [for obvious reasons]:
+    1. By separating the read/write lock, it will greatly reduce 
the contention
+   at the read part, while write part is not always necessary 
or most

+   unlikely once the unit hit in cache.


+    By separating the read/write lock, we will greatly reduce the 
contention
+    on the read part, while the write part is unlikely once the unit 
hits

+    the cache.

+    2. We try to balance the implementation complexity and the 
performance

+   gains that fit into current cases we observed by just using a
+   pthread_rwlock. */


Let's drop 2.


Got it, thanks!

thanks,


Re: [PATCH 1/2] xtensa: Optimize '(x & CST1_POW2) != 0 ? CST2_POW2 : 0'

2023-05-22 Thread Andrew Pinski via Gcc-patches
On Mon, May 22, 2023 at 7:28 PM Max Filippov via Gcc-patches
 wrote:
>
> Hi Suwa-san,
>
> On Mon, May 22, 2023 at 12:06 AM Takayuki 'January June' Suwa
>  wrote:
> >
> > This patch decreses one machine instruction from "single bit extraction
> > with shifting" operation, and tries to eliminate the conditional
> > branch if CST2_POW2 doesn't fit into signed 12 bits with the help
> > of ifcvt optimization.
> >
> > /* example #1 */
> > int test0(int x) {
> >   return (x & 1048576) != 0 ? 1024 : 0;
> > }
> > extern int foo(void);
> > int test1(void) {
> >   return (foo() & 1048576) != 0 ? 16777216 : 0;
> > }
> >
> > ;; before
> > test0:
> > movia9, 0x400
> > sraia2, a2, 10
> > and a2, a2, a9
> > ret.n
> > test1:
> > addisp, sp, -16
> > s32i.n  a0, sp, 12
> > call0   foo
> > extui   a2, a2, 20, 1
> > sllia2, a2, 20
> > beqz.n  a2, .L2
> > movi.n  a2, 1
> > sllia2, a2, 24
> > .L2:
> > l32i.n  a0, sp, 12
> > addisp, sp, 16
> > ret.n
> >
> > ;; after
> > test0:
> > extui   a2, a2, 20, 1
> > sllia2, a2, 10
> > ret.n
> > test1:
> > addisp, sp, -16
> > s32i.n  a0, sp, 12
> > call0   foo
> > l32i.n  a0, sp, 12
> > extui   a2, a2, 20, 1
> > sllia2, a2, 24
> > addisp, sp, 16
> > ret.n
> >
> > In addition, if the left shift amount ('exact_log2(CST2_POW2)') is
> > between 1 through 3 and a either addition or subtraction with another
> > register follows, emit a ADDX[248] or SUBX[248] machine instruction
> > instead of separate left shift and add/subtract ones.
> >
> > /* example #2 */
> > int test2(int x, int y) {
> >   return ((x & 1048576) != 0 ? 4 : 0) + y;
> > }
> > int test3(int x, int y) {
> >   return ((x & 2) != 0 ? 8 : 0) - y;
> > }
> >
> > ;; before
> > test2:
> > movi.n  a9, 4
> > sraia2, a2, 18
> > and a2, a2, a9
> > add.n   a2, a2, a3
> > ret.n
> > test3:
> > movi.n  a9, 8
> > sllia2, a2, 2
> > and a2, a2, a9
> > sub a2, a2, a3
> > ret.n
> >
> > ;; after
> > test2:
> > extui   a2, a2, 20, 1
> > addx4   a2, a2, a3
> > ret.n
> > test3:
> > extui   a2, a2, 1, 1
> > subx8   a2, a2, a3
> > ret.n
> >
> > gcc/ChangeLog:
> >
> > * config/xtensa/predicates.md (addsub_operator): New.
> > * config/xtensa/xtensa.md (*extzvsi-1bit_ashlsi3,
> > *extzvsi-1bit_addsubx): New insn_and_split patterns.
> > * config/xtensa/xtensa.cc (xtensa_rtx_costs):
> > Add a special case about ifcvt 'noce_try_cmove()' to handle
> > constant loads that do not fit into signed 12 bits in the
> > patterns added above.
> > ---
> >  gcc/config/xtensa/predicates.md |  3 ++
> >  gcc/config/xtensa/xtensa.cc |  3 +-
> >  gcc/config/xtensa/xtensa.md | 75 +
> >  3 files changed, 80 insertions(+), 1 deletion(-)
>
> This change introduces a bunch of test failures on big endian configuration.
> I believe that's because the starting bit position for zero_extract is counted
> from different ends depending on the endianness.

Yes I ran into something similar just recently when I was improving a
similar thing in expand.

Thanks,
Andrew

>
> --
> Thanks.
> -- Max


Re: [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABS_EXPR.

2023-05-22 Thread Hongtao Liu via Gcc-patches
On Mon, May 22, 2023 at 3:35 PM liuhongt  wrote:
>
> Also for 64-bit vector abs intrinsics _mm_abs_{pi8,pi16,pi32}.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
Ready push to main trunk.
>
> gcc/ChangeLog:
>
> PR target/109900
> * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
> _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} and
> _mm_abs_{pi8,pi16,pi32} into gimple ABS_EXPR.
> (ix86_masked_all_ones): Handle 64-bit mask.
> * config/i386/i386-builtin.def: Replace icode of related
> non-mask simd abs builtins with CODE_FOR_nothing.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr109900.c: New test.
> ---
>  gcc/config/i386/i386-builtin.def | 18 ++---
>  gcc/config/i386/i386.cc  | 86 +++--
>  gcc/testsuite/gcc.target/i386/pr109900.c | 95 
>  3 files changed, 166 insertions(+), 33 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109900.c
>
> diff --git a/gcc/config/i386/i386-builtin.def 
> b/gcc/config/i386/i386-builtin.def
> index f7b10a6ab1e..c91e3809c75 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -899,12 +899,12 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, 
> CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps"
>  BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, 
> "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) 
> V2DF_FTYPE_V2DF_V2DF)
>
>  /* SSSE3 */
> -BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_absv16qi2, 
> "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) 
> V16QI_FTYPE_V16QI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, 
> CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 
> (int) V8QI_FTYPE_V8QI)
> -BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_absv8hi2, 
> "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) 
> V8HI_FTYPE_V8HI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, 
> CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 
> (int) V4HI_FTYPE_V4HI)
> -BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_absv4si2, 
> "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) 
> V4SI_FTYPE_V4SI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, 
> CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 
> (int) V2SI_FTYPE_V2SI)
> +BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, 
> "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) 
> V16QI_FTYPE_V16QI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, 
> "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> +BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, 
> "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) 
> V8HI_FTYPE_V8HI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, 
> "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> +BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, 
> "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) 
> V4SI_FTYPE_V4SI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, 
> "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
>
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, 
> "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) 
> V8HI_FTYPE_V8HI_V8HI)
>  BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, 
> CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 
> UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> @@ -1178,9 +1178,9 @@ BDESC (OPTION_MASK_ISA_AVX, 0, 
> CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_
>
>  /* AVX2 */
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_mpsadbw, 
> "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) 
> V32QI_FTYPE_V32QI_V32QI_INT)
> -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_absv32qi2, 
> "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) 
> V32QI_FTYPE_V32QI)
> -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_absv16hi2, 
> "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) 
> V16HI_FTYPE_V16HI)
> -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_absv8si2, 
> "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) 
> V8SI_FTYPE_V8SI)
> +BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb256", 
> IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI)
> +BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw256", 
> IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI)
> +BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd256", 
> IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_packssdw, 
> "__builtin_ia32_packssdw256",  IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) 
> V16HI_FTYPE_V8SI_V8SI)
>  BDESC 

Re: [PATCH 1/2] xtensa: Optimize '(x & CST1_POW2) != 0 ? CST2_POW2 : 0'

2023-05-22 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Mon, May 22, 2023 at 12:06 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch decreses one machine instruction from "single bit extraction
> with shifting" operation, and tries to eliminate the conditional
> branch if CST2_POW2 doesn't fit into signed 12 bits with the help
> of ifcvt optimization.
>
> /* example #1 */
> int test0(int x) {
>   return (x & 1048576) != 0 ? 1024 : 0;
> }
> extern int foo(void);
> int test1(void) {
>   return (foo() & 1048576) != 0 ? 16777216 : 0;
> }
>
> ;; before
> test0:
> movia9, 0x400
> sraia2, a2, 10
> and a2, a2, a9
> ret.n
> test1:
> addisp, sp, -16
> s32i.n  a0, sp, 12
> call0   foo
> extui   a2, a2, 20, 1
> sllia2, a2, 20
> beqz.n  a2, .L2
> movi.n  a2, 1
> sllia2, a2, 24
> .L2:
> l32i.n  a0, sp, 12
> addisp, sp, 16
> ret.n
>
> ;; after
> test0:
> extui   a2, a2, 20, 1
> sllia2, a2, 10
> ret.n
> test1:
> addisp, sp, -16
> s32i.n  a0, sp, 12
> call0   foo
> l32i.n  a0, sp, 12
> extui   a2, a2, 20, 1
> sllia2, a2, 24
> addisp, sp, 16
> ret.n
>
> In addition, if the left shift amount ('exact_log2(CST2_POW2)') is
> between 1 through 3 and a either addition or subtraction with another
> register follows, emit a ADDX[248] or SUBX[248] machine instruction
> instead of separate left shift and add/subtract ones.
>
> /* example #2 */
> int test2(int x, int y) {
>   return ((x & 1048576) != 0 ? 4 : 0) + y;
> }
> int test3(int x, int y) {
>   return ((x & 2) != 0 ? 8 : 0) - y;
> }
>
> ;; before
> test2:
> movi.n  a9, 4
> sraia2, a2, 18
> and a2, a2, a9
> add.n   a2, a2, a3
> ret.n
> test3:
> movi.n  a9, 8
> sllia2, a2, 2
> and a2, a2, a9
> sub a2, a2, a3
> ret.n
>
> ;; after
> test2:
> extui   a2, a2, 20, 1
> addx4   a2, a2, a3
> ret.n
> test3:
> extui   a2, a2, 1, 1
> subx8   a2, a2, a3
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/predicates.md (addsub_operator): New.
> * config/xtensa/xtensa.md (*extzvsi-1bit_ashlsi3,
> *extzvsi-1bit_addsubx): New insn_and_split patterns.
> * config/xtensa/xtensa.cc (xtensa_rtx_costs):
> Add a special case about ifcvt 'noce_try_cmove()' to handle
> constant loads that do not fit into signed 12 bits in the
> patterns added above.
> ---
>  gcc/config/xtensa/predicates.md |  3 ++
>  gcc/config/xtensa/xtensa.cc |  3 +-
>  gcc/config/xtensa/xtensa.md | 75 +
>  3 files changed, 80 insertions(+), 1 deletion(-)

This change introduces a bunch of test failures on big endian configuration.
I believe that's because the starting bit position for zero_extract is counted
from different ends depending on the endianness.

-- 
Thanks.
-- Max


[patch] mcore: Fix sprintf length warning

2023-05-22 Thread Jan-Benedict Glaw
Hi!

One of the supplied argument strings is unneccesarily long (c-sky, using
basically the same code, fixed it to a shorter length) and this fixes overflow
warnings, as GCC fails to deduce that the full 256 bytes for load_op[] are
not used at all.


make[1]: Entering directory 
'/var/lib/laminar/run/gcc-mcore-elf/38/toolchain-build/gcc'
[...]
/usr/lib/gcc-snapshot/bin/g++  -fno-PIE -c   -g -O2   -DIN_GCC  
-DCROSS_DIRECTORY_STRUCTURE   -fno-exceptions -fno-rtti 
-fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
-Wcast-qual -Wmissing-format-attribute -Wconditionally-supported 
-Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
-Wno-overlength-strings -Werror -fno-common  -DHAVE_CONFIG_H -I. -I. 
-I../../gcc/gcc -I../../gcc/gcc/. -I../../gcc/gcc/../include  
-I../../gcc/gcc/../libcpp/include -I../../gcc/gcc/../libcody  
-I../../gcc/gcc/../libdecnumber -I../../gcc/gcc/../libdecnumber/dpd 
-I../libdecnumber -I../../gcc/gcc/../libbacktrace   -o mcore.o -MT mcore.o -MMD 
-MP -MF ./.deps/mcore.TPo ../../gcc/gcc/config/mcore/mcore.cc
../../gcc/gcc/config/mcore/mcore.cc: In function 'const char* 
output_inline_const(machine_mode, rtx_def**)':
../../gcc/gcc/config/mcore/mcore.cc:1264:24: error: '
ixw ' directive writing 6 bytes into a region of size between 1 and 
256 [-Werror=format-overflow=]
 1264 |   sprintf (buf, "%s\n\tixw\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, 
dst_fmt, value, value);
  |^
../../gcc/gcc/config/mcore/mcore.cc:1264:21: note: using the range [0, 
18446744073709551615] for directive argument
 1264 |   sprintf (buf, "%s\n\tixw\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, 
dst_fmt, value, value);
  | ^~~~
../../gcc/gcc/config/mcore/mcore.cc:1264:15: note: 'sprintf' output between 21 
and 310 bytes into a destination of size 256
 1264 |   sprintf (buf, "%s\n\tixw\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, 
dst_fmt, value, value);
  |   
^~~~
../../gcc/gcc/config/mcore/mcore.cc:1261:24: error: '
ixh ' directive writing 6 bytes into a region of size between 1 and 
256 [-Werror=format-overflow=]
 1261 |   sprintf (buf, "%s\n\tixh\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, 
dst_fmt, value, value);
  |^
../../gcc/gcc/config/mcore/mcore.cc:1261:21: note: using the range [0, 
18446744073709551615] for directive argument
 1261 |   sprintf (buf, "%s\n\tixh\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, 
dst_fmt, value, value);
  | ^~~~
../../gcc/gcc/config/mcore/mcore.cc:1261:15: note: 'sprintf' output between 21 
and 310 bytes into a destination of size 256
 1261 |   sprintf (buf, "%s\n\tixh\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, 
dst_fmt, value, value);
  |   
^~~~
../../gcc/gcc/config/mcore/mcore.cc:1258:24: error: '
lsli' directive writing 7 bytes into a region of size between 1 and 
256 [-Werror=format-overflow=]
 1258 |   sprintf (buf, "%s\n\tlsli\t%s,%%2\t// %ld 0x%lx", load_op, 
dst_fmt, value, value);
  |^~
../../gcc/gcc/config/mcore/mcore.cc:1258:21: note: using the range [0, 
18446744073709551615] for directive argument
 1258 |   sprintf (buf, "%s\n\tlsli\t%s,%%2\t// %ld 0x%lx", load_op, 
dst_fmt, value, value);
  | ^~
../../gcc/gcc/config/mcore/mcore.cc:1258:15: note: 'sprintf' output between 22 
and 311 bytes into a destination of size 256
 1258 |   sprintf (buf, "%s\n\tlsli\t%s,%%2\t// %ld 0x%lx", load_op, 
dst_fmt, value, value);
  |   
^
../../gcc/gcc/config/mcore/mcore.cc:1255:24: error: '
rotli   ' directive writing 8 bytes into a region of size between 1 and 
256 [-Werror=format-overflow=]
 1255 |   sprintf (buf, "%s\n\trotli\t%s,%%2\t// %ld 0x%lx", load_op, 
dst_fmt, value, value);
  |^~~
../../gcc/gcc/config/mcore/mcore.cc:1255:21: note: using the range [0, 
18446744073709551615] for directive argument
 1255 |   sprintf (buf, "%s\n\trotli\t%s,%%2\t// %ld 0x%lx", load_op, 
dst_fmt, value, value);
  | ^~~
../../gcc/gcc/config/mcore/mcore.cc:1255:15: note: 'sprintf' output between 23 
and 312 bytes into a destination of size 256
 1255 |   sprintf (buf, "%s\n\trotli\t%s,%%2\t// %ld 0x%lx", load_op, 
dst_fmt, value, value);
  |   
^~
../../gcc/gcc/config/mcore/mcore.cc:1252:24: error: '
bclri   ' directive writing 8 bytes into a region of size between 1 and 

[pushed] libobjc: Add local macros to support encode generation [P109913].

2023-05-22 Thread Iain Sandoe via Gcc-patches
This fixes bootstrap on powerpc-darwin, and was tested there for
both GNU and NeXT Objective-C testsuite.  Approved in the PR by Andrew,
pushed to trunk, thanks,
Iain

--- 8< ---

r14-976-g9907413a3a6aa3 alters code to use the preferred _P-style
macros rather than direct comparisons of (for example) tree codes.

In the context of libobjc this does not work, since we do not include
the relevant headers; the encoding implementation contains a local
emulation of the target type layouts.

The fix here provides relevant macros local to the use.

Signed-off-by: Iain Sandoe 

PR libobjc/109913

libobjc/ChangeLog:

* encoding.c (RECORD_OR_UNION_TYPE_P, VECTOR_TYPE_P): New.

Co-authored-by: Andrew Pinski 
---
 libobjc/encoding.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/libobjc/encoding.c b/libobjc/encoding.c
index 9bd261c52bd..f1bbd6bf6ee 100644
--- a/libobjc/encoding.c
+++ b/libobjc/encoding.c
@@ -67,6 +67,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #define UNION_TYPE  _C_UNION_B
 #define QUAL_UNION_TYPE _C_UNION_B
 #define ARRAY_TYPE  _C_ARY_B
+#define RECORD_OR_UNION_TYPE_P(TYPE)   \
+   ((TREE_CODE (TYPE) == RECORD_TYPE)  \
+|| (TREE_CODE (TYPE) == UNION_TYPE)\
+|| (TREE_CODE (TYPE) == QUAL_UNION_TYPE))
+#define VECTOR_TYPE_P(TYPE) (TREE_CODE (TYPE) == VECTOR_TYPE)
 
 #define REAL_TYPE   _C_DBL
 
-- 
2.39.2 (Apple Git-143)



Re: [PATCH] libstdc++: Add missing constexpr to simd

2023-05-22 Thread Marc Glisse via Gcc-patches

On Mon, 22 May 2023, Jonathan Wakely via Libstdc++ wrote:


* subscripting vector builtins is not allowed in constant expressions


Is that just because nobody made it work (yet)?


Yes.

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101651 and others.


* if the implementation would otherwise call SIMD intrinsics/builtins


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80517 and others.

Makes sense to work around them for now.

--
Marc Glisse


Re: [PATCH] Replace __gnu_cxx::__ops::__negate with std::not_fn

2023-05-22 Thread Jonathan Wakely via Gcc-patches
On Mon, 22 May 2023 at 21:51, François Dumont via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> I was thinking that it might be nice to get rid of predefined_ops.h
> content.
>
> So here is a start with __negate. Drawback is that stl_algo.h has to
> include .


We definitely don't want that. std::not_fn could be move to its own header.

But I'm not sure this is a good change anyway, as we can't do it
unconditionally. Pre-C++17 code would still be using the predefined_ops.h
function objects, so we can't remove that code. And we'll get template
bloat from instantiating the algos twice, once with the old function
objects and once with std::not_fn.



> For now I just get rid of stl_algo.h include in
>  to rather use stl_algobase.h. But maybe it would be better
> to also isolate std::not_fn in a dedicated header file so that
> stl_algo.h do not have to include all .
>
>  libstdc++: Replace __gnu_cxx::__ops::__negate with std::not_fn
>
>  Replace the internal __gnu_cxx::__ops::__negate function and
> associated
>  __gnu_cxx::__ops::_Iter_negate by the C++17 std::not_fn.
>
>  libstdc++-v3/ChangeLog:
>
>  * include/bits/predefined_ops.h: Include .
>

No, please don't include  anywhere. If you do that, it means
 now defines every feature test macro in the entire library,
which makes it look like you can get smart pointers and ranges and
constexpr math all from .



>  [__cpp_lib_not_fn](__gnu_cxx::__ops::_Iter_negate): Remove.
>  [__cpp_lib_not_fn](__gnu_cxx::__ops::__negate): Remove.
>  * include/bits/stl_algo.h: Include  for C++17
> and later.
>  [__cpp_lib_not_fn](__find_if_not): Use std::not_fn.
>  (std::__search, std::search(_FwdIt1, _FwdIt1, _FwdIt2,
> _FwdIt2, _BinPred)): Move...
>  * include/bits/stl_algobase.h: ...here.
>  * include/std/functional: Replace  include by
> .
>
> Tests still running.
>
> François
>
>


Re: [PATCH] libstdc++: Add missing constexpr to simd

2023-05-22 Thread Jonathan Wakely via Gcc-patches
On Mon, 22 May 2023 at 21:27, Matthias Kretz  wrote:

> On Monday, 22 May 2023 18:25:15 CEST Jonathan Wakely wrote:
> > I note that using if (not __builtin_constant_evaluated()) will fail if
> > compiled with -fno-operator-names, which is why we don't use 'not',
> 'and',
> > etc. elsewhere in libstdc++. I don't know if (or why) anybody uses that
> > option though, so I don't think you need to hange anything in stdx::simd.
>
> Ah, I just recently convinced myself that "operator-names" are more
> readable
> (=> easier to maintain).


I tend to agree, but every time I decide to start using them some testcases
start to fail and I remember why we don't use them :-(



> But OTOH a mix isn't necessarily better. I'm fine
> with keeping it consistent.
>
> > > * subscripting vector builtins is not allowed in constant expressions
> >
> > Is that just because nobody made it work (yet)?
>
> That is a good question. I guess I should open a PR.
>
> > * if the implementation needs/uses memcpy
> >
> > > * if the implementation would otherwise call SIMD intrinsics/builtins
> >
> > The indentation looks off here and in the _M_set member function
> following
> > it:
>
> Yes. I had to put an #if between an else and an if. Looks like this:
>
>   else
> #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
> if (not __builtin_is_constant_evaluated())
> return reinterpret_cast*>(this)[__i];
>   else
> #endif
> if constexpr (__is_scalar_abi<_Abi0>())
>
>
Ah yes, so the if is indented two spaces from the else above it.
What looks wrong to me is that the return is the at the same indentation as
the if controlling it.



> Should the `if` be aligned to the `else` instead?
>

How about moving the two else tokens?

 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
   else if (not __builtin_is_constant_evaluated())
 return reinterpret_cast*>(this)[__i];
 #endif
   else if constexpr (__is_scalar_abi<_Abi0>())

I think that avoids the issue.



>
> > Are the copyright years on
> > testsuite/experimental/simd/pr109261_constexpr_simd.cc correct, or just
> > copy?
>
> Right, copy Should I simply remove the complete header?
>
>
You could do. I don't think there's much in that test that's novel or worth
asserting copyright over - but if you disagree and want to assign whatever
is copyrightable to the FSF, keep the header but fix the years. Either way
is fine by me.

OK for trunk and backports, with the comments above suitably resolved.


[PATCH] Replace __gnu_cxx::__ops::__negate with std::not_fn

2023-05-22 Thread François Dumont via Gcc-patches

I was thinking that it might be nice to get rid of predefined_ops.h content.

So here is a start with __negate. Drawback is that stl_algo.h has to 
include . For now I just get rid of stl_algo.h include in 
 to rather use stl_algobase.h. But maybe it would be better 
to also isolate std::not_fn in a dedicated header file so that 
stl_algo.h do not have to include all .


    libstdc++: Replace __gnu_cxx::__ops::__negate with std::not_fn

    Replace the internal __gnu_cxx::__ops::__negate function and associated
    __gnu_cxx::__ops::_Iter_negate by the C++17 std::not_fn.

    libstdc++-v3/ChangeLog:

    * include/bits/predefined_ops.h: Include .
    [__cpp_lib_not_fn](__gnu_cxx::__ops::_Iter_negate): Remove.
    [__cpp_lib_not_fn](__gnu_cxx::__ops::__negate): Remove.
    * include/bits/stl_algo.h: Include  for C++17 
and later.

    [__cpp_lib_not_fn](__find_if_not): Use std::not_fn.
    (std::__search, std::search(_FwdIt1, _FwdIt1, _FwdIt2, 
_FwdIt2, _BinPred)): Move...

    * include/bits/stl_algobase.h: ...here.
    * include/std/functional: Replace  include by 
.


Tests still running.

François

diff --git a/libstdc++-v3/include/bits/predefined_ops.h b/libstdc++-v3/include/bits/predefined_ops.h
index e9933373ed9..8fdb11ea84b 100644
--- a/libstdc++-v3/include/bits/predefined_ops.h
+++ b/libstdc++-v3/include/bits/predefined_ops.h
@@ -30,6 +30,7 @@
 #ifndef _GLIBCXX_PREDEFINED_OPS_H
 #define _GLIBCXX_PREDEFINED_OPS_H	1
 
+#include 
 #include 
 
 namespace __gnu_cxx
@@ -377,6 +378,7 @@ namespace __ops
 	  _GLIBCXX_MOVE(__comp._M_comp), __it);
 }
 
+#if !__cpp_lib_not_fn
   template
 struct _Iter_negate
 {
@@ -400,6 +402,7 @@ namespace __ops
 inline _Iter_negate<_Predicate>
 __negate(_Iter_pred<_Predicate> __pred)
 { return _Iter_negate<_Predicate>(_GLIBCXX_MOVE(__pred._M_pred)); }
+#endif
 
 } // namespace __ops
 } // namespace __gnu_cxx
diff --git a/libstdc++-v3/include/bits/stl_algo.h b/libstdc++-v3/include/bits/stl_algo.h
index 54695490166..849d8a59ec2 100644
--- a/libstdc++-v3/include/bits/stl_algo.h
+++ b/libstdc++-v3/include/bits/stl_algo.h
@@ -65,6 +65,10 @@
 #include 
 #endif
 
+#if __cplusplus >= 201703L
+#include  // for std::not_fn.
+#endif
+
 #if _GLIBCXX_HOSTED
 # include   // for _Temporary_buffer
 # if (__cplusplus <= 201103L || _GLIBCXX_USE_DEPRECATED)
@@ -110,7 +114,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		  _Predicate __pred)
 {
   return std::__find_if(__first, __last,
+#if __cpp_lib_not_fn
+			std::not_fn(std::move(__pred)),
+#else
 			__gnu_cxx::__ops::__negate(__pred),
+#endif
 			std::__iterator_category(__first));
 }
 
@@ -140,54 +148,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // count
   // count_if
   // search
-
-  template
-_GLIBCXX20_CONSTEXPR
-_ForwardIterator1
-__search(_ForwardIterator1 __first1, _ForwardIterator1 __last1,
-	 _ForwardIterator2 __first2, _ForwardIterator2 __last2,
-	 _BinaryPredicate  __predicate)
-{
-  // Test for empty ranges
-  if (__first1 == __last1 || __first2 == __last2)
-	return __first1;
-
-  // Test for a pattern of length 1.
-  _ForwardIterator2 __p1(__first2);
-  if (++__p1 == __last2)
-	return std::__find_if(__first1, __last1,
-		__gnu_cxx::__ops::__iter_comp_iter(__predicate, __first2));
-
-  // General case.
-  _ForwardIterator1 __current = __first1;
-
-  for (;;)
-	{
-	  __first1 =
-	std::__find_if(__first1, __last1,
-		__gnu_cxx::__ops::__iter_comp_iter(__predicate, __first2));
-
-	  if (__first1 == __last1)
-	return __last1;
-
-	  _ForwardIterator2 __p = __p1;
-	  __current = __first1;
-	  if (++__current == __last1)
-	return __last1;
-
-	  while (__predicate(__current, __p))
-	{
-	  if (++__p == __last2)
-		return __first1;
-	  if (++__current == __last1)
-		return __last1;
-	}
-	  ++__first1;
-	}
-  return __first1;
-}
-
   // search_n
 
   /**
@@ -4147,48 +4107,6 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
 			   __gnu_cxx::__ops::__iter_equal_to_iter());
 }
 
-  /**
-   *  @brief Search a sequence for a matching sub-sequence using a predicate.
-   *  @ingroup non_mutating_algorithms
-   *  @param  __first1 A forward iterator.
-   *  @param  __last1  A forward iterator.
-   *  @param  __first2 A forward iterator.
-   *  @param  __last2  A forward iterator.
-   *  @param  __predicate  A binary predicate.
-   *  @return   The first iterator @c i in the range
-   *  @p [__first1,__last1-(__last2-__first2)) such that
-   *  @p __predicate(*(i+N),*(__first2+N)) is true for each @c N in the range
-   *  @p [0,__last2-__first2), or @p __last1 if no such iterator exists.
-   *
-   *  Searches the range @p [__first1,__last1) for a sub-sequence that
-   *  compares equal value-by-value with the sequence given by @p
-   *  [__first2,__last2), using @p __predicate to determine equality,
-   *  and returns an iterator to the first element 

[COMMITTED] i386: Adjust emulated integer vector mode shift costs

2023-05-22 Thread Uros Bizjak via Gcc-patches
Returned integer vector mode costs of emulated instructions in
ix86_shift_rotate_cost are wrong and do not reflect generated
instruction sequences.  Rewrite handling of different integer vector
modes and different target ABIs to return real instruction
counts in order to calcuate better costs of various emulated modes.

Also add the cost of a memory read, when the instruction in the
sequence reads memory.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_shift_rotate_cost): Correct
calculation of integer vector mode costs to reflect generated
instruction sequences of different integer vector modes and
different target ABIs.  Remove "speed" function argument.
(ix86_rtx_costs): Update call for removed function argument.
(ix86_vector_costs::add_stmt_cost): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/sse2-shiftqihi-constant-1.c: Remove XFAILs.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index a36e625342d..38125ce284a 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20565,20 +20565,23 @@ ix86_shift_rotate_cost (const struct processor_costs 
*cost,
enum rtx_code code,
enum machine_mode mode, bool constant_op1,
HOST_WIDE_INT op1_val,
-   bool speed,
bool and_in_op1,
bool shift_and_truncate,
bool *skip_op0, bool *skip_op1)
 {
   if (skip_op0)
 *skip_op0 = *skip_op1 = false;
+
   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
 {
-  /* V*QImode is emulated with 1-11 insns.  */
-  if (mode == V16QImode || mode == V32QImode)
+  int count;
+  /* Cost of reading the memory.  */
+  int extra;
+
+  switch (mode)
{
- int count = 11;
- if (TARGET_XOP && mode == V16QImode)
+   case V16QImode:
+ if (TARGET_XOP)
{
  /* For XOP we use vpshab, which requires a broadcast of the
 value to the variable shift insn.  For constants this
@@ -20586,37 +20589,65 @@ ix86_shift_rotate_cost (const struct processor_costs 
*cost,
 shift with one insn set the cost to prefer paddb.  */
  if (constant_op1)
{
- if (skip_op1)
-   *skip_op1 = true;
- return ix86_vec_cost (mode,
-   cost->sse_op
-   + (speed
-  ? 2
-  : COSTS_N_BYTES
-  (GET_MODE_UNIT_SIZE (mode;
+ extra = cost->sse_load[2];
+ return ix86_vec_cost (mode, cost->sse_op) + extra;
+   }
+ else
+   {
+ count = (code == ASHIFT) ? 2 : 3;
+ return ix86_vec_cost (mode, cost->sse_op * count);
+   }
+   }
+ /* FALLTHRU */
+   case V32QImode:
+ extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+ if (constant_op1)
+   {
+ if (code == ASHIFTRT)
+   {
+ count = 4;
+ extra *= 2;
+   }
+ else
+   count = 2;
+   }
+ else if (TARGET_SSE4_1)
+   count = 8;
+ else if (code == ASHIFTRT)
+   count = 9;
+ else
+   count = 8;
+ return ix86_vec_cost (mode, cost->sse_op * count) + extra;
+
+   case V2DImode:
+   case V4DImode:
+ /* V*DImode arithmetic right shift is emulated.  */
+ if (code == ASHIFTRT && !TARGET_AVX512VL)
+   {
+ if (constant_op1)
+   {
+ if (op1_val == 63)
+   count = TARGET_SSE4_2 ? 1 : 2;
+ else if (TARGET_XOP)
+   count = 2;
+ else
+   count = 4;
}
- count = 3;
+ else if (TARGET_XOP)
+   count = 3;
+ else if (TARGET_SSE4_2)
+   count = 4;
+ else
+   count = 5;
+
+ return ix86_vec_cost (mode, cost->sse_op * count);
}
- else if (TARGET_SSSE3)
-   count = 7;
- return ix86_vec_cost (mode, cost->sse_op * count);
-   }
-  /* V*DImode arithmetic right shift is emulated.  */
-  else if (code == ASHIFTRT
-  && (mode == V2DImode || mode == V4DImode)
-  && !TARGET_XOP
-  && !TARGET_AVX512VL)
-   {
- int count = 4;
- if (constant_op1 && op1_val == 63 && TARGET_SSE4_2)
-   count = 2;
- else if (constant_op1)
-   count = 3;
- return ix86_vec_cost (mode, cost->sse_op * count);
+ /* 

Re: [PATCH] libstdc++: Add missing constexpr to simd

2023-05-22 Thread Matthias Kretz via Gcc-patches
On Monday, 22 May 2023 18:25:15 CEST Jonathan Wakely wrote:
> I note that using if (not __builtin_constant_evaluated()) will fail if
> compiled with -fno-operator-names, which is why we don't use 'not', 'and',
> etc. elsewhere in libstdc++. I don't know if (or why) anybody uses that
> option though, so I don't think you need to hange anything in stdx::simd.

Ah, I just recently convinced myself that "operator-names" are more readable 
(=> easier to maintain). But OTOH a mix isn't necessarily better. I'm fine 
with keeping it consistent.

> > * subscripting vector builtins is not allowed in constant expressions
> 
> Is that just because nobody made it work (yet)?

That is a good question. I guess I should open a PR.

> * if the implementation needs/uses memcpy
> 
> > * if the implementation would otherwise call SIMD intrinsics/builtins
> 
> The indentation looks off here and in the _M_set member function following
> it:

Yes. I had to put an #if between an else and an if. Looks like this:

  else
#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
if (not __builtin_is_constant_evaluated())
return reinterpret_cast*>(this)[__i];
  else
#endif
if constexpr (__is_scalar_abi<_Abi0>())

Should the `if` be aligned to the `else` instead?

> Are the copyright years on
> testsuite/experimental/simd/pr109261_constexpr_simd.cc correct, or just
> copy?

Right, copy Should I simply remove the complete header?

- Matthias
-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


Re: [C PATCH v3] Fix ICEs related to VM types in C 2/2

2023-05-22 Thread Joseph Myers
On Mon, 22 May 2023, Martin Uecker via Gcc-patches wrote:

> +static void
> +add_decl_expr(location_t loc, enum decl_context decl_context, tree type, 
> tree *expr)

Missing space before '(', and the line should be wrapped to be no more 
than 80 columns.

The C front-end changes are OK with those fixes.  The testsuite changes 
are also OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH ver 2] rs6000: Fix __builtin_vec_xst_trunc definition

2023-05-22 Thread Carl Love via Gcc-patches
Kewen, GCC maintainers:

Version 2, addressed comments from Kewen. Added an additional
overloaded builtin:  
   void __builtin_vec_xst_trunc (vuq, signed long long, long *);


The following patch fixes errors in the arguments in the
__builtin_altivec_tr_stxvrhx,   __builtin_altivec_tr_stxvrwx builtin
definitions.  Note, these builtins are used by the overloaded
__builtin_vec_xst_trunc builtin.

The patch adds a new overloaded builtin definition for
__builtin_vec_xst_trunc for the third argument to be unsigned and
signed long int.

A new testcase is added for the various overloaded versions of
__builtin_vec_xst_trunc.

The patch has been tested on Power 10 with no new regressions.

Please let me know if the patch is acceptable for mainline.  Thanks.

Carl

-
rs6000: Fix __builtin_vec_xst_trunc definition

Built-in __builtin_vec_xst_trunc calls __builtin_altivec_tr_stxvrhx
and __builtin_altivec_tr_stxvrwx to handle the short and word cases.  The
arguments for these two builtins are wrong.  This patch fixes the wrong
arguments for the builtins.

Additionally, the patch adds a new __builtin_vec_xst_trunc overloaded
version for the destination being signed or unsigned long int.

A runnable test case is added to test each of the overloaded definitions
of __builtin_vec_xst_tru

gcc/
* config/rs6000/builtins.def (__builtin_altivec_tr_stxvrhx,
__builtin_altivec_tr_stxvrwx): Fix type of second argument.
Add, definition for send argument to be signed long.
* config/rs6000/rs6000-overload.def (__builtin_vec_xst_trunc):
add definition with thrird arument signed and unsigned long.
* doc/extend.texi (__builtin_vec_xst_trunc): Add documentation for
new unsinged long and signed long versions.

gcc/testsuite/
* gcc.target/powerpc/vsx-builtin-vec_xst_trunc.c: New test case
for __builtin_vec_xst_trunc builtin.
---
 gcc/config/rs6000/rs6000-builtins.def |   7 +-
 gcc/config/rs6000/rs6000-overload.def |   6 +
 gcc/doc/extend.texi   |   2 +
 .../powerpc/vsx-builtin-vec_xst_trunc.c   | 241 ++
 4 files changed, 254 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-vec_xst_trunc.c

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 638d0bc72ca..a378491b358 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3161,12 +3161,15 @@
   void __builtin_altivec_tr_stxvrbx (vsq, signed long, signed char *);
 TR_STXVRBX vsx_stxvrbx {stvec}
 
-  void __builtin_altivec_tr_stxvrhx (vsq, signed long, signed int *);
+  void __builtin_altivec_tr_stxvrhx (vsq, signed long, signed short *);
 TR_STXVRHX vsx_stxvrhx {stvec}
 
-  void __builtin_altivec_tr_stxvrwx (vsq, signed long, signed short *);
+  void __builtin_altivec_tr_stxvrwx (vsq, signed long, signed int *);
 TR_STXVRWX vsx_stxvrwx {stvec}
 
+  void __builtin_altivec_tr_stxvrlx (vsq, signed long, signed long *);
+TR_STXVRLX vsx_stxvrdx {stvec}
+
   void __builtin_altivec_tr_stxvrdx (vsq, signed long, signed long long *);
 TR_STXVRDX vsx_stxvrdx {stvec}
 
diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def
index c582490c084..fd47f5b24e8 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -4872,6 +4872,12 @@
 TR_STXVRWX  TR_STXVRWX_S
   void __builtin_vec_xst_trunc (vuq, signed long long, unsigned int *);
 TR_STXVRWX  TR_STXVRWX_U
+  void __builtin_vec_xst_trunc (vsq, signed long long, signed long *);
+TR_STXVRLX  TR_STXVRLX_S
+  void __builtin_vec_xst_trunc (vuq, signed long long, unsigned long *);
+TR_STXVRLX  TR_STXVRLX_U
+  void __builtin_vec_xst_trunc (vuq, signed long long, long *);
+TR_STXVRLX  TR_STXVRLX_I
   void __builtin_vec_xst_trunc (vsq, signed long long, signed long long *);
 TR_STXVRDX  TR_STXVRDX_S
   void __builtin_vec_xst_trunc (vuq, signed long long, unsigned long long *);
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e426a2eb7d8..7e2ae790ab3 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18570,10 +18570,12 @@ instructions.
 @defbuiltin{{void} vec_xst_trunc (vector signed __int128, signed long long, 
signed char *)}
 @defbuiltinx{{void} vec_xst_trunc (vector signed __int128, signed long long, 
signed short *)}
 @defbuiltinx{{void} vec_xst_trunc (vector signed __int128, signed long long, 
signed int *)}
+@defbuiltinx{{void} vec_xst_trunc (vector signed __int128, signed long long, 
signed long *)}
 @defbuiltinx{{void} vec_xst_trunc (vector signed __int128, signed long long, 
signed long long *)}
 @defbuiltinx{{void} vec_xst_trunc (vector unsigned __int128, signed long long, 
unsigned char *)}
 @defbuiltinx{{void} vec_xst_trunc (vector unsigned __int128, signed long long, 

Re: [PATCH] rs6000: Fix __builtin_vec_xst_trunc definition

2023-05-22 Thread Carl Love via Gcc-patches
On Mon, 2023-05-22 at 17:04 +0800, Kewen.Lin wrote:
> Hi Carl,
> 
> on 2023/5/11 02:06, Carl Love via Gcc-patches wrote:
> > GCC maintainers:
> > 
> > The following patch fixes errors in the arguments in the
> > __builtin_altivec_tr_stxvrhx,   __builtin_altivec_tr_stxvrwx
> > builtin
> > definitions.  Note, these builtins are used by the overloaded
> > __builtin_vec_xst_trunc builtin.
> > 
> > The patch adds a new overloaded builtin definition for
> > __builtin_vec_xst_trunc for the third argument to be unsigned and
> > signed long int.
> > 
> > A new testcase is added for the various overloaded versions of
> > __builtin_vec_xst_trunc.
> > 
> > The patch has been tested on Power 10 with no new regressions.
> > 
> > Please let me know if the patch is acceptable for
> > mainline.  Thanks.
> > 
> > Carl
> > 
> > ---
> > rs6000: Fix __builtin_vec_xst_trunc definition
> > 
> > Built-in __builtin_vec_xst_trunc calls __builtin_altivec_tr_stxvrhx
> > and __builtin_altivec_tr_stxvrwx to handle the short and word
> > cases.  The
> > arguments for these two builtins are wrong.  This patch fixes the
> > wrong
> > arguments for the builtins.
> > 
> > Additionally, the patch adds a new __builtin_vec_xst_trunc
> > overloaded
> > version for the destination being signed or unsigned long int.
> > 
> > A runnable test case is added to test each of the overloaded
> > definitions
> > of __builtin_vec_xst_tru
> > 
> > gcc/
> > * config/rs6000/builtins.def (__builtin_altivec_tr_stxvrhx,
> > __builtin_altivec_tr_stxvrwx): Fix type of second argument.
> > Add, definition for send argument to be signed long.
> > * config/rs6000/rs6000-overload.def (__builtin_vec_xst_trunc):
> > add definition with thrird arument signed and unsigned long.
> > * doc/extend.texi (__builtin_vec_xst_trunc): Add documentation
> > for
> > new unsinged long and signed long versions.
> > 
> > gcc/testsuite/
> > * gcc.target/powerpc/vsx-builtin-vec_xst_trunc.c: New test case
> > for __builtin_vec_xst_trunc builtin.
> > ---
> >  gcc/config/rs6000/rs6000-builtins.def |   7 +-
> >  gcc/config/rs6000/rs6000-overload.def |   4 +
> >  gcc/doc/extend.texi   |   2 +
> >  .../powerpc/vsx-builtin-vec_xst_trunc.c   | 217
> > ++
> >  4 files changed, 228 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-
> > vec_xst_trunc.c
> > 
> > diff --git a/gcc/config/rs6000/rs6000-builtins.def
> > b/gcc/config/rs6000/rs6000-builtins.def
> > index 638d0bc72ca..a378491b358 100644
> > --- a/gcc/config/rs6000/rs6000-builtins.def
> > +++ b/gcc/config/rs6000/rs6000-builtins.def
> > @@ -3161,12 +3161,15 @@
> >void __builtin_altivec_tr_stxvrbx (vsq, signed long, signed char
> > *);
> >  TR_STXVRBX vsx_stxvrbx {stvec}
> >  
> > -  void __builtin_altivec_tr_stxvrhx (vsq, signed long, signed int
> > *);
> > +  void __builtin_altivec_tr_stxvrhx (vsq, signed long, signed
> > short *);
> >  TR_STXVRHX vsx_stxvrhx {stvec}
> >  
> > -  void __builtin_altivec_tr_stxvrwx (vsq, signed long, signed
> > short *);
> > +  void __builtin_altivec_tr_stxvrwx (vsq, signed long, signed int
> > *);
> >  TR_STXVRWX vsx_stxvrwx {stvec}
> 
> Good catching!
> 
> >  
> > +  void __builtin_altivec_tr_stxvrlx (vsq, signed long, signed long
> > *);
> > +TR_STXVRLX vsx_stxvrdx {stvec}
> > +
> 
> This is mapped to the one used for type long long, it's a hard
> mapping,
> IMHO it's wrong and not consistent with what the users expect, since
> on Power
> the size of type long int is 4 bytes at -m32 while 8 bytes at -m64,
> this
> implementation binding to 8 bytes can cause trouble in 32-bit.  I
> wonder if
> it's a good idea to add one overloaded version for type long int, for
> now
> openxl also emits error message for long int type pointer (see its
> doc [1]),
> users can use casting to make it to the acceptable pointer types
> (long long
> or int as its size).
> 
> [1] 
> https://www.ibm.com/docs/en/openxl-c-and-cpp-lop/17.1.1?topic=functions-vec-xst-trunc
> 
> 

If I understand this correctly, the "signed long" is mapped to type
"long long int"?  Just curious, where is the mapping done?

So I believe you would like to have an additional overloaded
definition:

 void __builtin_vec_xst_trunc (vuq, signed long long, long *);

Am I understanding this correctly?  I added the above definition.

> >void __builtin_altivec_tr_stxvrdx (vsq, signed long, signed long
> > long *);
> >  TR_STXVRDX vsx_stxvrdx {stvec}
> >  
> > diff --git a/gcc/config/rs6000/rs6000-overload.def
> > b/gcc/config/rs6000/rs6000-overload.def
> > index c582490c084..54b7ae5e51b 100644
> > --- a/gcc/config/rs6000/rs6000-overload.def
> > +++ b/gcc/config/rs6000/rs6000-overload.def
> > @@ -4872,6 +4872,10 @@
> >  TR_STXVRWX  TR_STXVRWX_S
> >void __builtin_vec_xst_trunc (vuq, signed long long, unsigned
> > int *);
> 

[PATCH] Convert remaining uses of value_range in ipa-*.cc to Value_Range.

2023-05-22 Thread Aldy Hernandez via Gcc-patches
Minor cleanups to get rid of value_range in IPA.  There's only one left,
but it's in the switch code which is integer specific.

OK?

gcc/ChangeLog:

* ipa-cp.cc (decide_whether_version_node): Adjust comment.
* ipa-fnsummary.cc (evaluate_conditions_for_known_args): Adjust
for Value_Range.
(set_switch_stmt_execution_predicate): Same.
* ipa-prop.cc (ipa_compute_jump_functions_for_edge): Same.
---
 gcc/ipa-cp.cc|  3 +--
 gcc/ipa-fnsummary.cc | 22 ++
 gcc/ipa-prop.cc  |  9 +++--
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index 03273666ea2..2e64415096e 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -6287,8 +6287,7 @@ decide_whether_version_node (struct cgraph_node *node)
{
  /* If some values generated for self-recursive calls with
 arithmetic jump functions fall outside of the known
-value_range for the parameter, we can skip them.  VR interface
-supports this only for integers now.  */
+range for the parameter, we can skip them.  */
  if (TREE_CODE (val->value) == INTEGER_CST
  && !plats->m_value_range.bottom_p ()
  && !ipa_range_contains_p (plats->m_value_range.m_vr,
diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc
index 0474af8991e..1ce8501fe85 100644
--- a/gcc/ipa-fnsummary.cc
+++ b/gcc/ipa-fnsummary.cc
@@ -488,19 +488,20 @@ evaluate_conditions_for_known_args (struct cgraph_node 
*node,
  if (vr.varying_p () || vr.undefined_p ())
break;
 
- value_range res;
+ Value_Range res (op->type);
  if (!op->val[0])
{
+ Value_Range varying (op->type);
+ varying.set_varying (op->type);
  range_op_handler handler (op->code, op->type);
  if (!handler
  || !res.supports_type_p (op->type)
- || !handler.fold_range (res, op->type, vr,
- value_range (op->type)))
+ || !handler.fold_range (res, op->type, vr, varying))
res.set_varying (op->type);
}
  else if (!op->val[1])
{
- value_range op0;
+ Value_Range op0 (op->type);
  range_op_handler handler (op->code, op->type);
 
  ipa_range_set_and_normalize (op0, op->val[0]);
@@ -518,14 +519,14 @@ evaluate_conditions_for_known_args (struct cgraph_node 
*node,
}
  if (!vr.varying_p () && !vr.undefined_p ())
{
- value_range res;
- value_range val_vr;
+ int_range<2> res;
+ Value_Range val_vr (TREE_TYPE (c->val));
  range_op_handler handler (c->code, boolean_type_node);
 
  ipa_range_set_and_normalize (val_vr, c->val);
 
  if (!handler
- || !res.supports_type_p (boolean_type_node)
+ || !val_vr.supports_type_p (TREE_TYPE (c->val))
  || !handler.fold_range (res, boolean_type_node, vr, 
val_vr))
res.set_varying (boolean_type_node);
 
@@ -1687,12 +1688,17 @@ set_switch_stmt_execution_predicate (struct 
ipa_func_body_info *fbi,
   int bound_limit = opt_for_fn (fbi->node->decl,
param_ipa_max_switch_predicate_bounds);
   int bound_count = 0;
-  value_range vr;
+  // This can safely be an integer range, as switches can only hold
+  // integers.
+  int_range<2> vr;
 
   get_range_query (cfun)->range_of_expr (vr, op);
   if (vr.undefined_p ())
 vr.set_varying (TREE_TYPE (op));
   tree vr_min, vr_max;
+  // ?? This entire function could use a rewrite to use the irange
+  // API, instead of trying to recreate its intersection/union logic.
+  // Any use of get_legacy_range() is a serious code smell.
   value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
   wide_int vr_wmin = wi::to_wide (vr_min);
   wide_int vr_wmax = wi::to_wide (vr_max);
diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index 6383bc11e0a..5f9e6dbbff2 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -2348,7 +2348,6 @@ ipa_compute_jump_functions_for_edge (struct 
ipa_func_body_info *fbi,
   gcall *call = cs->call_stmt;
   int n, arg_num = gimple_call_num_args (call);
   bool useful_context = false;
-  value_range vr;
 
   if (arg_num == 0 || args->jump_functions)
 return;
@@ -2379,6 +2378,7 @@ ipa_compute_jump_functions_for_edge (struct 
ipa_func_body_info *fbi,
useful_context = true;
}
 
+  Value_Range vr (TREE_TYPE (arg));
   if (POINTER_TYPE_P (TREE_TYPE (arg)))
{
  bool 

[PATCH] Implement ipa_vr hashing.

2023-05-22 Thread Aldy Hernandez via Gcc-patches
Implement hashing for ipa_vr.  When all is said and done, all these
patches incurr a 7.64% slowdown for ipa-cp, with is entirely covered by
the similar 7% increase in this area last week.  So we get type agnostic
ranges with "infinite" range precision close to free.

There is no change in overall compilation.

OK?

gcc/ChangeLog:

* ipa-prop.cc (struct ipa_vr_ggc_hash_traits): Adjust for use with
ipa_vr instead of value_range.
(gt_pch_nx): Same.
(gt_ggc_mx): Same.
(ipa_get_value_range): Same.
* value-range.cc (gt_pch_nx): Move to ipa-prop.cc and adjust for
ipa_vr.
(gt_ggc_mx): Same.
---
 gcc/ipa-prop.cc| 76 +++---
 gcc/value-range.cc | 15 -
 2 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index c46a89f1b49..6383bc11e0a 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -109,53 +109,53 @@ struct ipa_bit_ggc_hash_traits : public ggc_cache_remove 

 /* Hash table for avoid repeated allocations of equal ipa_bits.  */
 static GTY ((cache)) hash_table *ipa_bits_hash_table;
 
-/* Traits for a hash table for reusing value_ranges used for IPA.  Note that
-   the equiv bitmap is not hashed and is expected to be NULL.  */
+/* Traits for a hash table for reusing ranges.  */
 
-struct ipa_vr_ggc_hash_traits : public ggc_cache_remove 
+struct ipa_vr_ggc_hash_traits : public ggc_cache_remove 
 {
-  typedef value_range *value_type;
-  typedef value_range *compare_type;
+  typedef ipa_vr *value_type;
+  typedef const vrange *compare_type;
   static hashval_t
-  hash (const value_range *p)
+  hash (const ipa_vr *p)
 {
-  tree min, max;
-  value_range_kind kind = get_legacy_range (*p, min, max);
-  inchash::hash hstate (kind);
-  inchash::add_expr (min, hstate);
-  inchash::add_expr (max, hstate);
+  // This never get called, except in the verification code, as
+  // ipa_get_value_range() calculates the hash itself.  This
+  // function is mostly here for completness' sake.
+  Value_Range vr;
+  p->get_vrange (vr);
+  inchash::hash hstate;
+  add_vrange (vr, hstate);
   return hstate.end ();
 }
   static bool
-  equal (const value_range *a, const value_range *b)
+  equal (const ipa_vr *a, const vrange *b)
 {
-  return (types_compatible_p (a->type (), b->type ())
- && *a == *b);
+  return a->equal_p (*b);
 }
   static const bool empty_zero_p = true;
   static void
-  mark_empty (value_range *)
+  mark_empty (ipa_vr *)
 {
   p = NULL;
 }
   static bool
-  is_empty (const value_range *p)
+  is_empty (const ipa_vr *p)
 {
   return p == NULL;
 }
   static bool
-  is_deleted (const value_range *p)
+  is_deleted (const ipa_vr *p)
 {
-  return p == reinterpret_cast (1);
+  return p == reinterpret_cast (1);
 }
   static void
-  mark_deleted (value_range *)
+  mark_deleted (ipa_vr *)
 {
-  p = reinterpret_cast (1);
+  p = reinterpret_cast (1);
 }
 };
 
-/* Hash table for avoid repeated allocations of equal value_ranges.  */
+/* Hash table for avoid repeated allocations of equal ranges.  */
 static GTY ((cache)) hash_table *ipa_vr_hash_table;
 
 /* Holders of ipa cgraph hooks: */
@@ -265,6 +265,22 @@ ipa_vr::dump (FILE *out) const
 fprintf (out, "NO RANGE");
 }
 
+// ?? These stubs are because we use an ipa_vr in a hash_traits and
+// hash-traits.h defines an extern of gt_ggc_mx (T &) instead of
+// picking up the gt_ggc_mx (T *) version.
+void
+gt_pch_nx (ipa_vr *)
+{
+  return gt_pch_nx ((ipa_vr *) x);
+}
+
+void
+gt_ggc_mx (ipa_vr *)
+{
+  return gt_ggc_mx ((ipa_vr *) x);
+}
+
+
 /* Return true if DECL_FUNCTION_SPECIFIC_OPTIMIZATION of the decl associated
with NODE should prevent us from analyzing it for the purposes of IPA-CP.  
*/
 
@@ -2284,27 +2300,25 @@ ipa_set_jfunc_bits (ipa_jump_func *jf, const widest_int 
,
   jf->bits = ipa_get_ipa_bits_for_value (value, mask);
 }
 
-/* Return a pointer to a value_range just like *TMP, but either find it in
-   ipa_vr_hash_table or allocate it in GC memory.  TMP->equiv must be NULL.  */
+/* Return a pointer to an ipa_vr just like TMP, but either find it in
+   ipa_vr_hash_table or allocate it in GC memory.  */
 
 static ipa_vr *
 ipa_get_value_range (const vrange )
 {
-  /* FIXME: Add hashing support.
-  value_range **slot = ipa_vr_hash_table->find_slot (tmp, INSERT);
+  inchash::hash hstate;
+  inchash::add_vrange (tmp, hstate);
+  hashval_t hash = hstate.end ();
+  ipa_vr **slot = ipa_vr_hash_table->find_slot_with_hash (, hash, INSERT);
   if (*slot)
 return *slot;
 
-  value_range *vr = new (ggc_alloc ()) value_range;
-  *vr = *tmp;
-  *slot = vr;
-  */
   ipa_vr *vr = new (ggc_alloc ()) ipa_vr (tmp);
-
+  *slot = vr;
   return vr;
 }
 
-/* Assign to JF a pointer to a value_range just like TMP but either fetch a
+/* Assign to JF a pointer to a range just like TMP 

[PATCH] Convert ipa_jump_func to use ipa_vr instead of a value_range.

2023-05-22 Thread Aldy Hernandez via Gcc-patches
This patch converts the ipa_jump_func code to use the type agnostic
ipa_vr suitable for GC instead of value_range which is integer specific.

I've disabled the range cacheing to simplify the patch for review, but
it is handled in the next patch in the series.

OK?

gcc/ChangeLog:

* ipa-cp.cc (ipa_vr_operation_and_type_effects): New.
* ipa-prop.cc (ipa_get_value_range): Adjust for ipa_vr.
(ipa_set_jfunc_vr): Take a range.
(ipa_compute_jump_functions_for_edge): Pass range to
ipa_set_jfunc_vr.
(ipa_write_jump_function): Call streamer write helper.
(ipa_read_jump_function): Call streamer read helper.
* ipa-prop.h (class ipa_vr): Change m_vr to an ipa_vr.
---
 gcc/ipa-cp.cc   | 15 +++
 gcc/ipa-prop.cc | 70 ++---
 gcc/ipa-prop.h  |  5 +++-
 3 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index bdbc2184b5f..03273666ea2 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -1928,6 +1928,21 @@ ipa_vr_operation_and_type_effects (vrange _vr,
  && !dst_vr.undefined_p ());
 }
 
+/* Same as above, but the SRC_VR argument is an IPA_VR which must
+   first be extracted onto a vrange.  */
+
+static bool
+ipa_vr_operation_and_type_effects (vrange _vr,
+  const ipa_vr _vr,
+  enum tree_code operation,
+  tree dst_type, tree src_type)
+{
+  Value_Range tmp;
+  src_vr.get_vrange (tmp);
+  return ipa_vr_operation_and_type_effects (dst_vr, tmp, operation,
+   dst_type, src_type);
+}
+
 /* Determine range of JFUNC given that INFO describes the caller node or
the one it is inlined to, CS is the call graph edge corresponding to JFUNC
and PARM_TYPE of the parameter.  */
diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index bbfe0f8aa45..c46a89f1b49 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -2287,9 +2287,10 @@ ipa_set_jfunc_bits (ipa_jump_func *jf, const widest_int 
,
 /* Return a pointer to a value_range just like *TMP, but either find it in
ipa_vr_hash_table or allocate it in GC memory.  TMP->equiv must be NULL.  */
 
-static value_range *
-ipa_get_value_range (value_range *tmp)
+static ipa_vr *
+ipa_get_value_range (const vrange )
 {
+  /* FIXME: Add hashing support.
   value_range **slot = ipa_vr_hash_table->find_slot (tmp, INSERT);
   if (*slot)
 return *slot;
@@ -2297,40 +2298,27 @@ ipa_get_value_range (value_range *tmp)
   value_range *vr = new (ggc_alloc ()) value_range;
   *vr = *tmp;
   *slot = vr;
+  */
+  ipa_vr *vr = new (ggc_alloc ()) ipa_vr (tmp);
 
   return vr;
 }
 
-/* Return a pointer to a value range consisting of TYPE, MIN, MAX and an empty
-   equiv set. Use hash table in order to avoid creating multiple same copies of
-   value_ranges.  */
-
-static value_range *
-ipa_get_value_range (enum value_range_kind kind, tree min, tree max)
-{
-  value_range tmp (TREE_TYPE (min),
-  wi::to_wide (min), wi::to_wide (max), kind);
-  return ipa_get_value_range ();
-}
-
-/* Assign to JF a pointer to a value_range structure with TYPE, MIN and MAX and
-   a NULL equiv bitmap.  Use hash table in order to avoid creating multiple
-   same value_range structures.  */
+/* Assign to JF a pointer to a value_range just like TMP but either fetch a
+   copy from ipa_vr_hash_table or allocate a new on in GC memory.  */
 
 static void
-ipa_set_jfunc_vr (ipa_jump_func *jf, enum value_range_kind type,
- tree min, tree max)
+ipa_set_jfunc_vr (ipa_jump_func *jf, const vrange )
 {
-  jf->m_vr = ipa_get_value_range (type, min, max);
+  jf->m_vr = ipa_get_value_range (tmp);
 }
 
-/* Assign to JF a pointer to a value_range just like TMP but either fetch a
-   copy from ipa_vr_hash_table or allocate a new on in GC memory.  */
-
 static void
-ipa_set_jfunc_vr (ipa_jump_func *jf, value_range *tmp)
+ipa_set_jfunc_vr (ipa_jump_func *jf, const ipa_vr )
 {
-  jf->m_vr = ipa_get_value_range (tmp);
+  Value_Range tmp;
+  vr.get_vrange (tmp);
+  ipa_set_jfunc_vr (jf, tmp);
 }
 
 /* Compute jump function for all arguments of callsite CS and insert the
@@ -2392,8 +2380,8 @@ ipa_compute_jump_functions_for_edge (struct 
ipa_func_body_info *fbi,
 
  if (addr_nonzero)
{
- tree z = build_int_cst (TREE_TYPE (arg), 0);
- ipa_set_jfunc_vr (jfunc, VR_ANTI_RANGE, z, z);
+ vr.set_nonzero (TREE_TYPE (arg));
+ ipa_set_jfunc_vr (jfunc, vr);
}
  else
gcc_assert (!jfunc->m_vr);
@@ -2412,7 +2400,7 @@ ipa_compute_jump_functions_for_edge (struct 
ipa_func_body_info *fbi,
  value_range resvr = vr;
  range_cast (resvr, param_type);
  if (!resvr.undefined_p () && !resvr.varying_p ())
-   ipa_set_jfunc_vr (jfunc, );
+   ipa_set_jfunc_vr (jfunc, resvr);
  

Re: [PATCH] Convert ipcp_vr_lattice to type agnostic framework.

2023-05-22 Thread Aldy Hernandez via Gcc-patches
I've adjusted the patch with some minor cleanups that came up when I
implemented the rest of the IPA revamp.

Rested.  OK?

On Wed, May 17, 2023 at 4:31 PM Aldy Hernandez  wrote:
>
> This converts the lattice to store ranges in Value_Range instead of
> value_range (*) to make it type agnostic, and adjust all users
> accordingly.
>
> I think it is a good example on converting from static ranges to more
> general, type agnostic ones.
>
> I've been careful to make sure Value_Range never ends up on GC, since
> it contains an int_range_max and can expand on-demand onto the heap.
> Longer term storage for ranges should be done with vrange_storage, as
> per the previous patch ("Provide an API for ipa_vr").
>
> (*) I do know the Value_Range naming versus value_range is quite
> annoying, but it was a judgement call last release for the eventual
> migration to having "value_range" be a type agnostic range object.  We
> will ultimately rename Value_Range to value_range.
>
> OK for trunk?
>
> gcc/ChangeLog:
>
> * ipa-cp.cc (ipcp_vr_lattice::init): Take type argument.
> (ipcp_vr_lattice::print): Call dump method.
> (ipcp_vr_lattice::meet_with): Adjust for m_vr being a
> Value_Range.
> (ipcp_vr_lattice::meet_with_1): Make argument a reference.
> (ipcp_vr_lattice::set_to_bottom): Add type argument.
> (set_all_contains_variable): Same.
> (initialize_node_lattices): Pass type when appropriate.
> (ipa_vr_operation_and_type_effects): Make type agnostic.
> (ipa_value_range_from_jfunc): Same.
> (propagate_vr_across_jump_function): Same.
> (propagate_constants_across_call): Same.
> * ipa-fnsummary.cc (evaluate_conditions_for_known_args): Same.
> (evaluate_properties_for_edge): Same.
> * ipa-prop.cc (ipcp_update_vr): Same.
> * ipa-prop.h (ipa_value_range_from_jfunc): Same.
> (ipa_range_set_and_normalize): Same.
> ---
>  gcc/ipa-cp.cc| 159 +++
>  gcc/ipa-fnsummary.cc |  16 ++---
>  gcc/ipa-prop.cc  |   2 +-
>  gcc/ipa-prop.h   |  19 ++
>  4 files changed, 101 insertions(+), 95 deletions(-)
>
> diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
> index d4b9d4ac27e..bd5b1da17b2 100644
> --- a/gcc/ipa-cp.cc
> +++ b/gcc/ipa-cp.cc
> @@ -343,20 +343,29 @@ private:
>  class ipcp_vr_lattice
>  {
>  public:
> -  value_range m_vr;
> +  Value_Range m_vr;
>
>inline bool bottom_p () const;
>inline bool top_p () const;
> -  inline bool set_to_bottom ();
> -  bool meet_with (const value_range *p_vr);
> +  inline bool set_to_bottom (tree type);
> +  bool meet_with (const vrange _vr);
>bool meet_with (const ipcp_vr_lattice );
> -  void init () { gcc_assert (m_vr.undefined_p ()); }
> +  void init (tree type);
>void print (FILE * f);
>
>  private:
> -  bool meet_with_1 (const value_range *other_vr);
> +  bool meet_with_1 (const vrange _vr);
>  };
>
> +inline void
> +ipcp_vr_lattice::init (tree type)
> +{
> +  if (type)
> +m_vr.set_type (type);
> +
> +  // Otherwise m_vr will default to unsupported_range.
> +}
> +
>  /* Structure containing lattices for a parameter itself and for pieces of
> aggregates that are passed in the parameter or by a reference in a 
> parameter
> plus some other useful flags.  */
> @@ -585,7 +594,7 @@ ipcp_bits_lattice::print (FILE *f)
>  void
>  ipcp_vr_lattice::print (FILE * f)
>  {
> -  dump_value_range (f, _vr);
> +  m_vr.dump (f);
>  }
>
>  /* Print all ipcp_lattices of all functions to F.  */
> @@ -1016,14 +1025,14 @@ set_agg_lats_contain_variable (class 
> ipcp_param_lattices *plats)
>  bool
>  ipcp_vr_lattice::meet_with (const ipcp_vr_lattice )
>  {
> -  return meet_with_1 (_vr);
> +  return meet_with_1 (other.m_vr);
>  }
>
>  /* Meet the current value of the lattice with value range described by VR
> lattice.  */
>
>  bool
> -ipcp_vr_lattice::meet_with (const value_range *p_vr)
> +ipcp_vr_lattice::meet_with (const vrange _vr)
>  {
>return meet_with_1 (p_vr);
>  }
> @@ -1032,23 +1041,23 @@ ipcp_vr_lattice::meet_with (const value_range *p_vr)
> OTHER_VR lattice.  Return TRUE if anything changed.  */
>
>  bool
> -ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
> +ipcp_vr_lattice::meet_with_1 (const vrange _vr)
>  {
>if (bottom_p ())
>  return false;
>
> -  if (other_vr->varying_p ())
> -return set_to_bottom ();
> +  if (other_vr.varying_p ())
> +return set_to_bottom (other_vr.type ());
>
>bool res;
>if (flag_checking)
>  {
> -  value_range save (m_vr);
> -  res = m_vr.union_ (*other_vr);
> +  Value_Range save (m_vr);
> +  res = m_vr.union_ (other_vr);
>gcc_assert (res == (m_vr != save));
>  }
>else
> -res = m_vr.union_ (*other_vr);
> +res = m_vr.union_ (other_vr);
>return res;
>  }
>
> @@ -1073,16 +1082,11 @@ ipcp_vr_lattice::bottom_p () const
> previously was in a different state.  */
>
>  bool
> 

Re: [PATCH] Provide an API for ipa_vr.

2023-05-22 Thread Aldy Hernandez via Gcc-patches
I've adjusted the patch with some minor cleanups that came up when I
implemented the rest of the IPA revamp.

Retested.   OK?

On Wed, May 17, 2023 at 4:16 PM Aldy Hernandez  wrote:
>
> This patch encapsulates the ipa_vr internals into an API.  It also
> makes it type agnostic, in preparation for upcoming changes to IPA.
>
> Interestingly, there's a 0.44% improvement to IPA-cp, which I'm sure
> we'll soak up with future changes in this area :).
>
> BTW, there's a note here:
> +  // vrange_storage is typeless, but we need to know what type of
> +  // range that is being streamed out (irange, frange, etc).  AFAICT,
> +  // there's no way to get at the underlying type by the time we
> +  // stream out in write_ipcp_transformation_info.
> +  tree m_type;
>
> Could someone more IPA savvy double check this is indeed the case?
>
> OK for trunk?
>
> gcc/ChangeLog:
>
> * ipa-cp.cc (ipa_value_range_from_jfunc): Use new ipa_vr API.
> (ipcp_store_vr_results): Same.
> * ipa-prop.cc (ipa_vr::ipa_vr): New.
> (ipa_vr::get_vrange): New.
> (ipa_vr::set_unknown): New.
> (ipa_vr::streamer_read): New.
> (ipa_vr::streamer_write): New.
> (write_ipcp_transformation_info): Use new ipa_vr API.
> (read_ipcp_transformation_info): Same.
> (ipa_vr::nonzero_p): Delete.
> (ipcp_update_vr): Use new ipa_vr API.
> * ipa-prop.h (class ipa_vr): Provide an API and hide internals.
> * ipa-sra.cc (zap_useless_ipcp_results): Use new ipa_vr API.
> * gcc.dg/ipa/pr78121.c: Adjust for vrange::dump use.
> * gcc.dg/ipa/vrp1.c: Same.
> * gcc.dg/ipa/vrp2.c: Same.
> * gcc.dg/ipa/vrp3.c: Same.
> * gcc.dg/ipa/vrp4.c: Same.
> * gcc.dg/ipa/vrp5.c: Same.
> * gcc.dg/ipa/vrp6.c: Same.
> * gcc.dg/ipa/vrp7.c: Same.
> * gcc.dg/ipa/vrp8.c: Same.
> ---
>  gcc/ipa-cp.cc  |  22 ++---
>  gcc/ipa-prop.cc| 129 -
>  gcc/ipa-prop.h |  25 --
>  gcc/ipa-sra.cc |   4 +-
>  gcc/testsuite/gcc.dg/ipa/pr78121.c |   2 +-
>  gcc/testsuite/gcc.dg/ipa/vrp1.c|   4 +-
>  gcc/testsuite/gcc.dg/ipa/vrp2.c|   4 +-
>  gcc/testsuite/gcc.dg/ipa/vrp3.c|   2 +-
>  gcc/testsuite/gcc.dg/ipa/vrp4.c|   2 +-
>  gcc/testsuite/gcc.dg/ipa/vrp5.c|   2 +-
>  gcc/testsuite/gcc.dg/ipa/vrp6.c|   2 +-
>  gcc/testsuite/gcc.dg/ipa/vrp7.c|   2 +-
>  gcc/testsuite/gcc.dg/ipa/vrp8.c|   2 +-
>  13 files changed, 109 insertions(+), 93 deletions(-)
>
> diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
> index 8cd0fa2cae7..d4b9d4ac27e 100644
> --- a/gcc/ipa-cp.cc
> +++ b/gcc/ipa-cp.cc
> @@ -1947,13 +1947,11 @@ ipa_value_range_from_jfunc (ipa_node_params *info, 
> cgraph_edge *cs,
>
>idx = ipa_get_jf_pass_through_formal_id (jfunc);
>
> -  if (!(*sum->m_vr)[idx].known)
> +  if (!(*sum->m_vr)[idx].known_p ())
> return vr;
>tree vr_type = ipa_get_type (info, idx);
> -  value_range srcvr (vr_type,
> -(*sum->m_vr)[idx].min,
> -(*sum->m_vr)[idx].max,
> -(*sum->m_vr)[idx].type);
> +  value_range srcvr;
> +  (*sum->m_vr)[idx].get_vrange (srcvr, vr_type);
>
>enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
>
> @@ -6621,25 +6619,19 @@ ipcp_store_vr_results (void)
>for (unsigned i = 0; i < count; i++)
> {
>   ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
> - ipa_vr vr;
>
>   if (!plats->m_value_range.bottom_p ()
>   && !plats->m_value_range.top_p ()
>   && dbg_cnt (ipa_cp_vr))
> {
> - tree min, max;
> - vr.known = true;
> - vr.type = get_legacy_range (plats->m_value_range.m_vr, min, 
> max);
> - vr.min = wi::to_wide (min);
> - vr.max = wi::to_wide (max);
> + ipa_vr vr (plats->m_value_range.m_vr);
> + ts->m_vr->quick_push (vr);
> }
>   else
> {
> - vr.known = false;
> - vr.type = VR_VARYING;
> - vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
> + ipa_vr vr;
> + ts->m_vr->quick_push (vr);
> }
> - ts->m_vr->quick_push (vr);
> }
>  }
>  }
> diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
> index d7d70e5ec68..4ace410de49 100644
> --- a/gcc/ipa-prop.cc
> +++ b/gcc/ipa-prop.cc
> @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "symtab-clones.h"
>  #include "attr-fnspec.h"
>  #include "gimple-range.h"
> +#include "value-range-storage.h"
>
>  /* Function summary where the parameter infos are actually stored. */
>  ipa_node_params_t *ipa_node_params_sum = NULL;
> @@ -177,6 +178,66 @@ struct ipa_cst_ref_desc
>  static object_allocator ipa_refdesc_pool
>("IPA-PROP 

[COMMITTED] Implement some miscellaneous zero accessors for Value_Range.

2023-05-22 Thread Aldy Hernandez via Gcc-patches
This adds some missing accessors to the type agnostic Value_Range
class.  They'll be used in the upcoming IPA work.

gcc/ChangeLog:

* value-range.h (class Value_Range): Implement set_zero,
set_nonzero, and nonzero_p.
---
 gcc/value-range.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/value-range.h b/gcc/value-range.h
index af81d6080da..171e6426c6e 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -542,6 +542,9 @@ public:
   bool contains_p (tree cst) const { return m_vrange->contains_p (cst); }
   bool singleton_p (tree *result = NULL) const
 { return m_vrange->singleton_p (result); }
+  void set_zero (tree type) { return m_vrange->set_zero (type); }
+  void set_nonzero (tree type) { return m_vrange->set_nonzero (type); }
+  bool nonzero_p () const { return m_vrange->nonzero_p (); }
   bool zero_p () const { return m_vrange->zero_p (); }
   wide_int lower_bound () const; // For irange/prange comparability.
   wide_int upper_bound () const; // For irange/prange comparability.
-- 
2.40.1



[testsuite,committed]: PR52614: Fix more of the int=32 assumption fallout.

2023-05-22 Thread Georg-Johann Lay

Applied more of the int=32 assumption fallout.

Johann

--

testsuite/52641: Fix more of implicit int=32 assumption fallout.

gcc/testsuite/
PR testsuite/52641
* gcc.c-torture/compile/pr108892.c: Require int32.
* gcc.c-torture/compile/pr98199.c: Require int32plus.
* gcc.dg/analyzer/call-summaries-pr107072.c: Same.
* gcc.dg/analyzer/null-deref-pr105755.c: Same.
* gcc.dg/tree-ssa/pr102232.c: Same.
* gcc.dg/tree-ssa/pr105860.c: Same.
* gcc.dg/tree-ssa/pr96730.c: Same.
* gcc.dg/tree-ssa/pr96779-disabled.c: Same.
* gcc.dg/tree-ssa/pr96779.c: Same.
* gcc.dg/tree-ssa/pr98513.c: Same.
* gcc.dg/tree-ssa/ssa-sink-18.c
* gcc.dg/analyzer/coreutils-cksum-pr108664.c: Require int32plus,
size24plus.
* gcc.dg/analyzer/doom-s_sound-pr108867.c: Require size32plus.
* gcc.dg/analyzer/malloc-CWE-590-examples.c: Same.
* gcc.dg/debug/btf/btf-bitfields-4.c: Same.
* gcc.dg/tree-ssa/pr93435.c: Same.
* gcc.dg/analyzer/null-deref-pr102671-1.c: Require ptr_eq_long:
* gcc.dg/analyzer/null-deref-pr102671-2.c: Same.
* gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c:
Same.
* gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early.c:
Same.
* gcc.dg/tree-ssa/pr103345.c: Use uint32_t.
* gcc.dg/tree-ssa/ssa-ccp-41.c [sizeof(int)==2]: Same.
* gcc.dg/tree-ssa/pr109031-1.c: Use uint16_t, uint32_t.
* gcc.dg/tree-ssa/pr109031-2.c: Same.
* gcc.dg/Warray-bounds-49.c (dg-warning): Discriminate int != short.
* gcc.dg/Warray-bounds-52.c (dg-warning): Discriminate avr.
* gcc.dg/Warray-bounds-33.c: Skip target avr.
* gcc.dg/analyzer/fd-access-mode-target-headers.c: Same.
* gcc.dg/analyzer/flex-with-call-summaries.c: Same.
* gcc.dg/analyzer/isatty-1.c: Same.
* gcc.dg/analyzer/pipe-glibc.c: Same.

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr108892.c 
b/gcc/testsuite/gcc.c-torture/compile/pr108892.c

index d7fecd54ecf..fb0a258cdba 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr108892.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr108892.c
@@ -1,3 +1,5 @@
+/* { dg-require-effective-target int32 } */
+
 typedef char __attribute__((__vector_size__ (64))) U;
 typedef int __attribute__((__vector_size__ (64))) V;

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr98199.c 
b/gcc/testsuite/gcc.c-torture/compile/pr98199.c

index b5c8d204f0e..6605d38788c 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr98199.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr98199.c
@@ -1,4 +1,5 @@
 /* PR tree-optimization/98199 */
+/* { dg-require-effective-target int32plus } */

 struct A { long a; short d; int c, f, e, g; };
 struct B { int a, i; short j; struct A k; signed : 20; int e, g; } 
__attribute__((packed));
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-33.c 
b/gcc/testsuite/gcc.dg/Warray-bounds-33.c

index 28f14b4722c..13efabe33b6 100644
--- a/gcc/testsuite/gcc.dg/Warray-bounds-33.c
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-33.c
@@ -2,6 +2,7 @@
an object of incomplete type
{ dg-do compile }
{ dg-options "-O2 -Wall" }  */
+/* { dg-skip-if "acessing data memory with program memory address" { 
"avr-*-*" } } */


 struct S
 {
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-49.c 
b/gcc/testsuite/gcc.dg/Warray-bounds-49.c

index f271dd526b8..9335f1507e8 100644
--- a/gcc/testsuite/gcc.dg/Warray-bounds-49.c
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-49.c
@@ -17,7 +17,8 @@ void test_a0 (void)
   // The first three elements fit in the tail padding.
   a0.a2[0] = 0; a0.a2[1] = 1; a0.a2[2] = 2;

-  a0.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" }
+  a0.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" "" { target { ! short_eq_int } } }
+  // { dg-warning "array subscript 3 is above array bounds of 
'int\\\[]'" "" { target { short_eq_int } } .-1 }

 }


@@ -27,7 +28,8 @@ void test_a1 (void)
 {
   a1.a2[0] = 0; a1.a2[1] = 1; a1.a2[2] = 2;

-  a1.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" }
+  a1.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" "" { target { ! short_eq_int } } }
+  // { dg-warning "array subscript 3 is above array bounds of 
'int\\\[]'" "" { target { short_eq_int } } .-1 }

 }


@@ -37,7 +39,8 @@ void test_a2 (void)
 {
   a2.a2[0] = 0; a2.a2[1] = 1; a2.a2[2] = 2;

-  a2.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" }
+  a2.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" "" { target { ! short_eq_int } } }
+  // { dg-warning "array subscript 3 is above array bounds of 
'int\\\[]'" "" { target { short_eq_int } } .-1 }

 }


@@ -47,7 +50,8 @@ void test_a3 (void)
 {
   

[PATCH v3] rs6000: Add buildin for mffscrn instructions

2023-05-22 Thread Carl Love via Gcc-patches
Kewen, Segher, GCC maintainers:

Version 3, fixed various issues noted by Kewen.  Retested on Power 10. 
No regression issues.

Version 2,  Fixed an issue with the test case.  The dg-options line was
missing.

The following patch adds an overloaded builtin.  There are two possible
arguments for the builtin.  The builtin definitions are:

  double __builtin_mffscrn (unsigned long int);
  double __builtin_mffscrn (double);

The patch has been tested on Power 10 with no regressions.  

Please let me know if the patch is acceptable for mainline.  Thanks.

Carl 
---

rs6000: Add builtin for mffscrn instructions

This patch adds overloaded __builtin_mffscrn for the move From FPSCR
Control & Set RN instruction with an immediate argument.  It also adds the
builtin with a floating point register argument.  A new runnable test is
added for the new builtin.

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_mffscrni,
__builtin_mffscrnd): Add builtin definitions.
* config/rs6000/rs6000-overload.def (__builtin_mffscrn): Add
overloaded definition.
* doc/extend.texi: Add documentation for __builtin_mffscrn.

gcc/testsuite/

* gcc.target/powerpc/builtin-mffscrn.c: Add testcase for new
builtin.

---
 gcc/config/rs6000/rs6000-builtins.def |   9 +-
 gcc/config/rs6000/rs6000-overload.def |   5 +
 gcc/doc/extend.texi   |  10 ++
 .../gcc.target/powerpc/builtin-mffscrn.c  | 106 ++
 4 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtin-mffscrn.c

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 92d9b46e1b9..ae08d2fbff7 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2849,6 +2849,14 @@
   const signed int  __builtin_vsx_scalar_extract_exp (double);
 VSEEDP xsxexpdp_si {}
 
+; Immediate instruction only uses the least significant two bits of the
+; const int.
+  double __builtin_mffscrni (const int<2>);
+MFFSCRNI rs6000_mffscrni {nosoft}
+
+  double __builtin_mffscrnd (double);
+MFFSCRNF rs6000_mffscrn {nosoft}
+
 [power9-64]
   void __builtin_altivec_xst_len_r (vsc, void *, long);
 XST_LEN_R xst_len_r {}
@@ -2875,7 +2883,6 @@
   pure vsc __builtin_vsx_xl_len_r (void *, signed long);
 XL_LEN_R xl_len_r {}
 
-
 ; Builtins requiring hardware support for IEEE-128 floating-point.
 [ieee128-hw]
   fpmath _Float128 __builtin_addf128_round_to_odd (_Float128, _Float128);
diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def
index 26dc662b8fb..39423bcec2b 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -78,6 +78,11 @@
 ; like after a required newline, but nowhere else.  Lines beginning with
 ; a semicolon are also treated as blank lines.
 
+[MFFSCR, __builtin_mffscrn, __builtin_mffscrn]
+  double __builtin_mffscrn (const int<2>);
+MFFSCRNI
+  double __builtin_mffscrn (double);
+MFFSCRNF
 
 [BCDADD, __builtin_bcdadd, __builtin_vec_bcdadd]
   vsq __builtin_vec_bcdadd (vsq, vsq, const int);
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ed8b9c8a87b..82f9932666a 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18274,6 +18274,16 @@ The @code{__builtin_recipdiv}, and 
@code{__builtin_recipdivf}
 functions generate multiple instructions to implement division using
 the reciprocal estimate instructions.
 
+double __builtin_mffscrn (const int);
+double __builtin_mffscrn (double);
+
+The @code{__builtin_mffscrn} returns the contents of the control bits DRN, VE,
+OE, UE, ZE, XE, NI, RN in the FPSCR are returned with RN updated appropriately.
+In the case of the const int variant of the builtin, RN is set to the 2-bit
+value specified in the builtin.  In the case of the double builtin variant, the
+2-bit value in the double argument that corresponds to the RN location in the
+FPSCR is updated.
+
 The following functions require @option{-mhard-float} and
 @option{-mmultiple} options.
 
diff --git a/gcc/testsuite/gcc.target/powerpc/builtin-mffscrn.c 
b/gcc/testsuite/gcc.target/powerpc/builtin-mffscrn.c
new file mode 100644
index 000..69a7a17cfc7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtin-mffscrn.c
@@ -0,0 +1,106 @@
+/* { dg-do run } */
+/* { dg-require-effective-target p9modulo_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include 
+
+#ifdef DEBUG
+#include 
+#endif
+
+#define MASK 0x3
+#define EXPECTED1 0x1
+#define EXPECTED2 0x2
+
+void abort (void);
+
+int
+main()
+{
+  unsigned long mask, result, expected;
+  double double_arg;
+  
+  union convert_t {
+double d;
+unsigned long ul;
+  } val;
+
+  /* Test immediate version of __builtin_mffscrn. */
+  /* Read FPSCR and set RN bits in FPSCR[62:63]. */
+  val.d = __builtin_mffscrn 

Re: [PATCH v2] rs6000: Add buildin for mffscrn instructions

2023-05-22 Thread Carl Love via Gcc-patches
On Mon, 2023-05-22 at 14:36 +0800, Kewen.Lin wrote:
> Hi Carl,
> 
> on 2023/5/19 05:12, Carl Love via Gcc-patches wrote:
> > GCC maintainers:
> > 
> > version 2.  Fixed an issue with the test case.  The dg-options line
> > was
> > missing.
> > 
> > The following patch adds an overloaded builtin.  There are two
> > possible
> > arguments for the builtin.  The builtin definitions are:
> > 
> >   double __builtin_mffscrn (unsigned long int);
> >   double __builtin_mffscrn (double);
> > 
> 
> We already have one  bif __builtin_set_fpscr_rn for RN setting,
> apparently
> these two are mainly for direct mapping to mffscr[ni] and want the
> FPSCR bits.
> I'm curious what's the requirements requesting these two built-in
> functions?

The builtins were requested for use in GLibC.  As of version 2.31 they
were added as inline asm.  They requested a builtin so the asm could be
removed.

> 
> > The patch has been tested on Power 10 with no regressions.  
> > 
> > Please let me know if the patch is acceptable for
> > mainline.  Thanks.
> > 
> > Carl
> > 
> > 
> > rs6000: Add buildin for mffscrn instructions
> > 
> 
> s/buildin/built-in/

fixed
> 
> > This patch adds overloaded __builtin_mffscrn for the move From
> > FPSCR
> > Control & Set R instruction with an immediate argument.  It also
> > adds the
> > builtin with a floating point register argument.  A new runnable
> > test is
> > added for the new builtin.
> 
> s/Set R/Set RN/

fixed

> > gcc/
> > 
> > * config/rs6000/rs6000-builtins.def (__builtin_mffscrni,
> > __builtin_mffscrnd): Add builtin definitions.
> > * config/rs6000/rs6000-overload.def (__builtin_mffscrn): Add
> > overloaded definition.
> > * doc/extend.texi: Add documentation for __builtin_mffscrn.
> > 
> > gcc/testsuite/
> > 
> > * gcc.target/powerpc/builtin-mffscrn.c: Add testcase for new
> > builtin.
> > ---
> >  gcc/config/rs6000/rs6000-builtins.def |   7 ++
> >  gcc/config/rs6000/rs6000-overload.def |   5 +
> >  gcc/doc/extend.texi   |   8 ++
> >  .../gcc.target/powerpc/builtin-mffscrn.c  | 106
> > ++
> >  4 files changed, 126 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/powerpc/builtin-
> > mffscrn.c
> > 
> > diff --git a/gcc/config/rs6000/rs6000-builtins.def
> > b/gcc/config/rs6000/rs6000-builtins.def
> > index 92d9b46e1b9..67125473684 100644
> > --- a/gcc/config/rs6000/rs6000-builtins.def
> > +++ b/gcc/config/rs6000/rs6000-builtins.def
> > @@ -2875,6 +2875,13 @@
> >pure vsc __builtin_vsx_xl_len_r (void *, signed long);
> >  XL_LEN_R xl_len_r {}
> >  
> > +; Immediate instruction only uses the least significant two bits
> > of the
> > +; const int.
> > +  double __builtin_mffscrni (const int<2>);
> > +MFFSCRNI rs6000_mffscrni {}
> > +
> > +  double __builtin_mffscrnd (double);
> > +MFFSCRNF rs6000_mffscrn {}
> >  
> 
> Why are them put in [power9-64] rather than [power9]?  IMHO [power9]
> is the
> correct stanza for them.

Moved them to power 9 stanza.

>   Besides, {nosoft} attribute is required.

OK, added that.  I was trying to figure out why nosoft is needed.  The
instructions are manipulating bits in a physical register that controls
the hardware floating point instructions.  It looks to me like that
would be why.  Because if you were using msoft float then the floating
point HW registers are disabled and the floating point operations are
done using software.  Did I figure this out correctly?

 
> 
> >  ; Builtins requiring hardware support for IEEE-128 floating-point.
> >  [ieee128-hw]
> > diff --git a/gcc/config/rs6000/rs6000-overload.def
> > b/gcc/config/rs6000/rs6000-overload.def
> > index c582490c084..adda2df69ea 100644
> > --- a/gcc/config/rs6000/rs6000-overload.def
> > +++ b/gcc/config/rs6000/rs6000-overload.def
> > @@ -78,6 +78,11 @@
> >  ; like after a required newline, but nowhere else.  Lines
> > beginning with
> >  ; a semicolon are also treated as blank lines.
> >  
> > +[MFFSCR, __builtin_mffscrn, __builtin_mffscrn]
> > +  double __builtin_mffscrn (const int<2>);
> > +MFFSCRNI
> > +  double __builtin_mffscrn (double);
> > +MFFSCRNF
> >  
> >  [BCDADD, __builtin_bcdadd, __builtin_vec_bcdadd]
> >vsq __builtin_vec_bcdadd (vsq, vsq, const int);
> > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> > index ed8b9c8a87b..f16c046051a 100644
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -18455,6 +18455,9 @@ int __builtin_dfp_dtstsfi_ov_td (unsigned
> > int comparison, _Decimal128 value);
> >  
> >  double __builtin_mffsl(void);
> >  
> > +double __builtin_mffscrn (unsigned long int);
> > +double __builtin_mffscrn (double);
> 
> s/unsigned long int/const int/

Fixed

> 
> Note that this section is for all configurations and your
> implementation is put
> __builtin_mffscrn power9 only, so if the intention (requirement) is
> to make this
> be for also all 

[C PATCH v3] Fix ICEs related to VM types in C 2/2

2023-05-22 Thread Martin Uecker via Gcc-patches



This version contains the middle-end changes for PR109450
and test cases as before.  The main middle-end change is that
we use gimplify_type_sizes also for parameters and remove
the special code that also walked into pointers (which is
incorrect).  

In addition, in the C FE this patch now also adds DECL_EXPR
for vm-types which are pointed-to by parameters declared
as arrays.  The new function created contains the exact
code previously used only for regular pointers, and is
now also called for parameters declared as arrays.


Martin







Fix ICEs related to VM types in C 2/2 [PR109450]

Size expressions were sometimes lost and not gimplified correctly,
leading to ICEs and incorrect evaluation order.  Fix this by 1) not
recursing pointers when gimplifying parameters in the middle-end
(the code is merged with gimplify_type_sizes), which is incorrect
because it might access variables declared later for incomplete
structs, and 2) adding a decl expr for variably-modified arrays
that are pointed to by parameters declared as arrays.

PR c/109450

gcc/
* c/c-decl.cc (add_decl_expr): New function.
(grokdeclarator): Add decl expr for size expression in
types pointed to by parameters declared as arrays.
* function.cc (gimplify_parm_type): Remove function.
(gimplify_parameters): Call gimplify_parm_sizes.
* gimplify.cc (gimplify_type_sizes): Make function static.
(gimplify_parm_sizes): New function.

gcc/testsuite/
* gcc.dg/pr109450-1.c: New test.
* gcc.dg/pr109450-2.c: New test.
* gcc.dg/vla-26.c: New test.

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 494d3cf1747..c35347734b2 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -6490,6 +6490,55 @@ smallest_type_quals_location (const location_t 
*locations,
   return loc;
 }
 
+
+/* We attach an artificial TYPE_DECL to pointed-to type
+   and arrange for it to be included in a DECL_EXPR.  This
+   forces the sizes evaluation at a safe point and ensures it
+   is not deferred until e.g. within a deeper conditional context.
+
+   PARM contexts have no enclosing statement list that
+   can hold the DECL_EXPR, so we need to use a BIND_EXPR
+   instead, and add it to the list of expressions that
+   need to be evaluated.
+
+   TYPENAME contexts do have an enclosing statement list,
+   but it would be incorrect to use it, as the size should
+   only be evaluated if the containing expression is
+   evaluated.  We might also be in the middle of an
+   expression with side effects on the pointed-to type size
+   "arguments" prior to the pointer declaration point and
+   the fake TYPE_DECL in the enclosing context would force
+   the size evaluation prior to the side effects.  We therefore
+   use BIND_EXPRs in TYPENAME contexts too.  */
+static void
+add_decl_expr(location_t loc, enum decl_context decl_context, tree type, tree 
*expr)
+{
+  tree bind = NULL_TREE;
+  if (decl_context == TYPENAME || decl_context == PARM || decl_context == 
FIELD)
+{
+  bind = build3 (BIND_EXPR, void_type_node, NULL_TREE, NULL_TREE, 
NULL_TREE);
+  TREE_SIDE_EFFECTS (bind) = 1;
+  BIND_EXPR_BODY (bind) = push_stmt_list ();
+  push_scope ();
+}
+
+  tree decl = build_decl (loc, TYPE_DECL, NULL_TREE, type);
+  pushdecl (decl);
+  DECL_ARTIFICIAL (decl) = 1;
+  add_stmt (build_stmt (DECL_SOURCE_LOCATION (decl), DECL_EXPR, decl));
+  TYPE_NAME (type) = decl;
+
+  if (bind)
+{
+  pop_scope ();
+  BIND_EXPR_BODY (bind) = pop_stmt_list (BIND_EXPR_BODY (bind));
+  if (*expr)
+   *expr = build2 (COMPOUND_EXPR, void_type_node, *expr, bind);
+  else
+   *expr = bind;
+}
+}
+
 /* Given declspecs and a declarator,
determine the name and type of the object declared
and construct a ..._DECL node for it.
@@ -7474,58 +7523,9 @@ grokdeclarator (const struct c_declarator *declarator,
 
   This is expected to happen automatically when the pointed-to
   type has a name/declaration of it's own, but special attention
-  is required if the type is anonymous.
-
-  We attach an artificial TYPE_DECL to such pointed-to type
-  and arrange for it to be included in a DECL_EXPR.  This
-  forces the sizes evaluation at a safe point and ensures it
-  is not deferred until e.g. within a deeper conditional context.
-
-  PARM contexts have no enclosing statement list that
-  can hold the DECL_EXPR, so we need to use a BIND_EXPR
-  instead, and add it to the list of expressions that
-  need to be evaluated.
-
-  TYPENAME contexts do have an enclosing statement list,
-  but it would be incorrect to use it, as the size should
-  only be evaluated if the containing expression is
-  

[C PATCH v3] Fix ICEs related to VM types in C 1/2

2023-05-22 Thread Martin Uecker via Gcc-patches


Hi Joseph,

I had to create another revision of the patch because of some
case I had overlooked (vm-types pointed-to by parameters
declared as I arrays).  I splitted the patch up in two parts
for easier reviewing.  The first part only has the FE changes
and most of the tests.  The only minor change is that I replaced
a bug number because there was a duplicate and that I added
the specific example from this bug report (PR70418) as another
test. Since this half of the patch was already reviewed, I will
commit it tomorrow if there are no comments.

The second part contains the middle-end changes and the new
FE fix you haven't seen.

Both patches bootstrapped and regression tested on x86. 


Martin







Fix ICEs related to VM types in C 1/2 [PR70418, PR107557, PR108423]

Size expressions were sometimes lost and not gimplified correctly, leading 
to
ICEs and incorrect evaluation order.  Fix this by 1) not recursing into
pointers when gimplifying parameters in the middle-end (the code is merged 
with
gimplify_type_sizes), which is incorrect because it might access variables
declared later for incomplete structs, and 2) tracking size expressions for
struct/union members correctly, 3) emitting code to evaluate size 
expressions
for missing cases (nested functions, empty declarations, and 
structs/unions).

PR c/70418
PR c/106465
PR c/107557
PR c/108423

gcc/
* c/c-decl.cc (start_decl): Make sure size expression are
evaluated only in correct context.
(grokdeclarator): Size expression in fields may need a bind
expression, make sure DECL_EXPR is always created.
(grokfield, declspecs_add_type): Pass along size expressions.
(finish_struct): Remove unneeded DECL_EXPR.
(start_function): Evaluate size expressions for nested functions.
* c/c-parser.cc (c_parser_struct_declarations,
c_parser_struct_or_union_specifier): Pass along size expressions.
(c_parser_declaration_or_fndef): Evaluate size expression.
(c_parser_objc_at_property_declaration,
c_parser_objc_class_instance_variables): Adapt.

gcc/testsuite/
* gcc.dg/nested-vla-1.c: New test.
* gcc.dg/nested-vla-2.c: New test.
* gcc.dg/nested-vla-3.c: New test.
* gcc.dg/pr70418.c: New test.
* gcc.dg/pr106465.c: New test.
* gcc.dg/pr107557-1.c: New test.
* gcc.dg/pr107557-2.c: New test.
* gcc.dg/pr108423-1.c: New test.
* gcc.dg/pr108423-2.c: New test.
* gcc.dg/pr108423-3.c: New test.
* gcc.dg/pr108423-4.c: New test.
* gcc.dg/pr108423-5.c: New test.
* gcc.dg/pr108423-6.c: New test.
* gcc.dg/typename-vla-2.c: New test.
* gcc.dg/typename-vla-3.c: New test.
* gcc.dg/typename-vla-4.c: New test.
* gcc.misc-tests/gcov-pr85350.c: Adapt.

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 94ce760b55e..494d3cf1747 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -5364,7 +5364,8 @@ start_decl (struct c_declarator *declarator, struct 
c_declspecs *declspecs,
 if (lastdecl != error_mark_node)
   *lastloc = DECL_SOURCE_LOCATION (lastdecl);
 
-  if (expr)
+  /* Make sure the size expression is evaluated at this point.  */
+  if (expr && !current_scope->parm_flag)
 add_stmt (fold_convert (void_type_node, expr));
 
   if (TREE_CODE (decl) != FUNCTION_DECL && MAIN_NAME_P (DECL_NAME (decl))
@@ -7498,7 +7499,8 @@ grokdeclarator (const struct c_declarator *declarator,
&& c_type_variably_modified_p (type))
  {
tree bind = NULL_TREE;
-   if (decl_context == TYPENAME || decl_context == PARM)
+   if (decl_context == TYPENAME || decl_context == PARM
+   || decl_context == FIELD)
  {
bind = build3 (BIND_EXPR, void_type_node, NULL_TREE,
   NULL_TREE, NULL_TREE);
@@ -7507,10 +7509,11 @@ grokdeclarator (const struct c_declarator *declarator,
push_scope ();
  }
tree decl = build_decl (loc, TYPE_DECL, NULL_TREE, type);
-   DECL_ARTIFICIAL (decl) = 1;
pushdecl (decl);
-   finish_decl (decl, loc, NULL_TREE, NULL_TREE, NULL_TREE);
+   DECL_ARTIFICIAL (decl) = 1;
+   add_stmt (build_stmt (DECL_SOURCE_LOCATION (decl), DECL_EXPR, 
decl));
TYPE_NAME (type) = decl;
+
if (bind)
  {
pop_scope ();
@@ -8709,7 +8712,7 @@ start_struct (location_t loc, enum tree_code code, tree 
name,
 tree
 grokfield (location_t loc,
   struct c_declarator *declarator, struct c_declspecs *declspecs,
-  tree 

Re: [PATCH] libstdc++: Add missing constexpr to simd

2023-05-22 Thread Jonathan Wakely via Gcc-patches
On Mon, 22 May 2023 at 16:36, Matthias Kretz via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> OK for trunk and backporting?
>
> regtested on x86_64-linux and aarch64-linux
>
> The constexpr API is only available with -std=gnu++XX (and proposed for
> C++26). The proposal is to have the complete simd API usable in constant
> expressions.
>
> This patch resolves several issues with using simd in constant
> expressions.
>
> Issues why constant_evaluated branches are necessary:
>

I note that using if (not __builtin_constant_evaluated()) will fail if
compiled with -fno-operator-names, which is why we don't use 'not', 'and',
etc. elsewhere in libstdc++. I don't know if (or why) anybody uses that
option though, so I don't think you need to hange anything in stdx::simd.




> * subscripting vector builtins is not allowed in constant expressions
>

Is that just because nobody made it work (yet)?


* if the implementation needs/uses memcpy
> * if the implementation would otherwise call SIMD intrinsics/builtins
>


The indentation looks off here and in the _M_set member function following
it:

 operator[](size_t __i) const noexcept
 {
   if constexpr (_S_tuple_size == 1)
  return _M_subscript_read(__i);
   else
- {
 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
-  return reinterpret_cast*>(this)[__i];
-#else
-  if constexpr (__is_scalar_abi<_Abi0>())
-{
-  const _Tp* ptr = 
-  return ptr[__i];
-}
-  else
-return __i < simd_size_v<_Tp, _Abi0>
- ? _M_subscript_read(__i)
- : second[__i - simd_size_v<_Tp, _Abi0>];
+ if (not __builtin_is_constant_evaluated())
+ return reinterpret_cast*>(this)[__i];
+  else
 #endif
+ if constexpr (__is_scalar_abi<_Abi0>())
+ {
+  const _Tp* ptr = 
+  return ptr[__i];
  }
+  else
+ return __i < simd_size_v<_Tp, _Abi0> ? _M_subscript_read(__i)
+ : second[__i - simd_size_v<_Tp, _Abi0>];
 }


Are the copyright years on
testsuite/experimental/simd/pr109261_constexpr_simd.cc correct, or just
copy?


Re: [PATCH] add glibc-stdint.h to vax and lm32 linux target (PR target/105525)

2023-05-22 Thread Jan-Benedict Glaw
Hi Mikael!

On Mon, 2023-05-22 17:25:39 +0200, Mikael Pettersson  
wrote:
> On Mon, May 22, 2023 at 3:57 PM Jan-Benedict Glaw  wrote:
> > On Mon, 2023-05-22 14:10:48 +0100, Maciej W. Rozycki  
> > wrote:
> > > On Fri, 19 May 2023, Mikael Pettersson wrote:
> > > > The background is that I maintain a script to build GCC-based crosses to
> > > > as many targets as I can, currently it supports 78 distinct processors 
> > > > and
> > > > 82 triplets (four processors have multiple triplets). I only check that 
> > > > I can
> > > > build the toolchains (full linux-gnu ones where possible).
> > >
> > >  Great work, thanks!
> >
> > I'd be very much interested in running your script as one build
> > variant for my http://toolchain.lug-owl.de/ efforts. Is it available
> > somewhere? That would be nice!
> 
> The script is publicly available as https://github.com/mikpe/buildcross.git.
> Usage for actively maintained toolchains is pretty easy. For example, to build
> a cross to sparc64-unknown-linux-gnu you just run
> 
> buildcross -jN sparc64
> 
> and it will leave the toolchain in cross-sparc64. (Other bits will
> land in downloads/,
> sources/, and host-tools/.)

Thanks, I'll have a look!

> If you're only interested in linux-gnu toolchains for actively
> maintained targets there's
> a build-many-glibcs.py script in glibc that should be a better fit.

My intention is to gain access to as many targets and different
configurations and build strategies as possible. glibc's script is
already included. (As I build with all languages, most of the target
configurations fail right now.)

Thanks!

Jan-Benedict
-- 


signature.asc
Description: PGP signature


[PATCH] libstdc++: Add missing constexpr to simd

2023-05-22 Thread Matthias Kretz via Gcc-patches
OK for trunk and backporting?

regtested on x86_64-linux and aarch64-linux

The constexpr API is only available with -std=gnu++XX (and proposed for
C++26). The proposal is to have the complete simd API usable in constant
expressions.

This patch resolves several issues with using simd in constant
expressions.

Issues why constant_evaluated branches are necessary:
* subscripting vector builtins is not allowed in constant expressions
* if the implementation needs/uses memcpy
* if the implementation would otherwise call SIMD intrinsics/builtins

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109261
* include/experimental/bits/simd.h (_SimdWrapper::_M_set):
Avoid vector builtin subscripting in constant expressions.
(resizing_simd_cast): Avoid memcpy if constant_evaluated.
(const_where_expression, where_expression, where)
(__extract_part, simd_mask, _SimdIntOperators, simd): Add either
_GLIBCXX_SIMD_CONSTEXPR (on public APIs), or constexpr (on
internal APIs).
* include/experimental/bits/simd_builtin.h (__vector_permute)
(__vector_shuffle, __extract_part, _GnuTraits::_SimdCastType1)
(_GnuTraits::_SimdCastType2, _SimdImplBuiltin)
(_MaskImplBuiltin::_S_store): Add constexpr.
(_CommonImplBuiltin::_S_store_bool_array)
(_SimdImplBuiltin::_S_load, _SimdImplBuiltin::_S_store)
(_SimdImplBuiltin::_S_reduce, _MaskImplBuiltin::_S_load): Add
constant_evaluated case.
* include/experimental/bits/simd_fixed_size.h
(_S_masked_load): Reword comment.
(__tuple_element_meta, __make_meta, _SimdTuple::_M_apply_r)
(_SimdTuple::_M_subscript_read, _SimdTuple::_M_subscript_write)
(__make_simd_tuple, __optimize_simd_tuple, __extract_part)
(__autocvt_to_simd, _Fixed::__traits::_SimdBase)
(_Fixed::__traits::_SimdCastType, _SimdImplFixedSize): Add
constexpr.
(_SimdTuple::operator[], _M_set): Add constexpr and add
constant_evaluated case.
(_MaskImplFixedSize::_S_load): Add constant_evaluated case.
* include/experimental/bits/simd_scalar.h: Add constexpr.

* include/experimental/bits/simd_x86.h (_CommonImplX86): Add
constexpr and add constant_evaluated case.
(_SimdImplX86::_S_equal_to, _S_not_equal_to, _S_less)
(_S_less_equal): Value-initialize to satisfy constexpr
evaluation.
(_MaskImplX86::_S_load): Add constant_evaluated case.
(_MaskImplX86::_S_store): Add constexpr and constant_evaluated
case. Value-initialize local variables.
(_MaskImplX86::_S_logical_and, _S_logical_or, _S_bit_not)
(_S_bit_and, _S_bit_or, _S_bit_xor): Add constant_evaluated
case.
* testsuite/experimental/simd/pr109261_constexpr_simd.cc: New
test.
---
 libstdc++-v3/include/experimental/bits/simd.h | 153 ---
 .../include/experimental/bits/simd_builtin.h  | 100 ++
 .../experimental/bits/simd_fixed_size.h   | 177 +-
 .../include/experimental/bits/simd_scalar.h   |  78 
 .../include/experimental/bits/simd_x86.h  |  68 +--
 .../simd/pr109261_constexpr_simd.cc   | 109 +++
 6 files changed, 437 insertions(+), 248 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/experimental/simd/
pr109261_constexpr_simd.cc


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 224153ffbaf..b0571ca26c4 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2675,7 +2675,14 @@ _SimdWrapper(_V __x)
 
 _GLIBCXX_SIMD_INTRINSIC constexpr void
 _M_set(size_t __i, _Tp __x)
-{ _M_data[__i] = __x; }
+{
+  if (__builtin_is_constant_evaluated())
+	_M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
+		return __j == __i ? __x : _M_data[__j()];
+		  });
+  else
+	_M_data[__i] = __x;
+}
 
 _GLIBCXX_SIMD_INTRINSIC
 constexpr bool
@@ -3186,6 +3193,10 @@ resizing_simd_cast(const simd<_Up, _Ap>& __x)
   {
 if constexpr (is_same_v)
   return __x;
+else if (__builtin_is_constant_evaluated())
+  return _Tp([&](auto __i) constexpr {
+	   return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
+	 });
 else if constexpr (simd_size_v<_Up, _Ap> == 1)
   {
 	_Tp __r{};
@@ -3321,10 +3332,11 @@ __get_lvalue(const const_where_expression& __x)
 
 const_where_expression& operator=(const const_where_expression&) = delete;
 
-_GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& 

[committed] libstdc++: Resolve -Wunused-variable warnings in stdx::simd and tests

2023-05-22 Thread Matthias Kretz via Gcc-patches
pushed to master, will backport later

regtested on x86_64-linux

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_builtin.h (_S_fpclassify): Move
__infn into #ifdef'ed block.
* testsuite/experimental/simd/tests/fpclassify.cc: Declare
constants only when used.
* testsuite/experimental/simd/tests/frexp.cc: Likewise.
* testsuite/experimental/simd/tests/logarithm.cc: Likewise.
* testsuite/experimental/simd/tests/trunc_ceil_floor.cc:
Likewise.
* testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc:
Move totest and expect1 into #ifdef'ed block.
---
 libstdc++-v3/include/experimental/bits/simd_builtin.h   | 4 ++--
 .../testsuite/experimental/simd/tests/fpclassify.cc | 2 ++
 libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc | 6 ++
 .../experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc| 4 ++--
 libstdc++-v3/testsuite/experimental/simd/tests/logarithm.cc | 4 +++-
 .../testsuite/experimental/simd/tests/trunc_ceil_floor.cc   | 2 ++
 6 files changed, 17 insertions(+), 5 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 4c008da26e0..3d52bc6c96a 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -2370,12 +2370,12 @@ _S_fpclassify(_SimdWrapper<_Tp, _Np> __x)
 	constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
 	_GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
 	  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
-	_GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
-	  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
 
 	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
 	  = __vector_broadcast<_NI, _I>(FP_NORMAL);
 #if !__FINITE_MATH_ONLY__
+	_GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
+	  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
 	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
 	  = __vector_broadcast<_NI, _I>(FP_NAN);
 	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/fpclassify.cc b/libstdc++-v3/testsuite/experimental/simd/tests/fpclassify.cc
index 00c608f9530..13262df80ac 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/fpclassify.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/fpclassify.cc
@@ -38,9 +38,11 @@ test()
   {
 using T = typename V::value_type;
 using intv = std::experimental::fixed_size_simd;
+#if __GCC_IEC_559 >= 2
 constexpr T inf = std::__infinity_v;
 constexpr T denorm_min = std::__infinity_v;
 constexpr T nan = std::__quiet_NaN_v;
+#endif
 constexpr T max = std::__finite_max_v;
 constexpr T norm_min = std::__norm_min_v;
 test_values(
diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc b/libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc
index f6a47cedd13..2c3f500beee 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc
@@ -25,11 +25,17 @@ test()
   {
 using int_v = std::experimental::fixed_size_simd;
 using T = typename V::value_type;
+#if __GCC_IEC_559 >= 2 || defined __STDC_IEC_559__
 constexpr auto denorm_min = std::__denorm_min_v;
+#endif
+#if __GCC_IEC_559 >= 2
 constexpr auto norm_min = std::__norm_min_v;
+#endif
 constexpr auto max = std::__finite_max_v;
+#if defined __STDC_IEC_559__
 constexpr auto nan = std::__quiet_NaN_v;
 constexpr auto inf = std::__infinity_v;
+#endif
 test_values(
   {0, 0.25, 0.5, 1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 32, 31, -0., -0.25, -0.5, -1,
diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc b/libstdc++-v3/testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc
index 0fb1338fc04..56e275ee4bf 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc
@@ -137,7 +137,6 @@ test()
 	if (modf_is_broken)
 	  return;
 	V integral = {};
-	const V totest = modf(input, );
 	auto&& expected = [&](const auto& v) -> std::pair {
 	  std::pair tmp = {};
 	  using std::modf;
@@ -149,8 +148,9 @@ test()
 	}
 	  return tmp;
 	};
-	const auto expect1 = expected(input);
 #ifdef __STDC_IEC_559__
+	const V totest = modf(input, );
+	const auto expect1 = expected(input);
 	COMPARE(isnan(totest), isnan(expect1.first))
 	  << "modf(" << input << ", iptr) = 

Re: [PATCH] add glibc-stdint.h to vax and lm32 linux target (PR target/105525)

2023-05-22 Thread Mikael Pettersson via Gcc-patches
On Mon, May 22, 2023 at 3:57 PM Jan-Benedict Glaw  wrote:
>
> Hi!
>
> On Mon, 2023-05-22 14:10:48 +0100, Maciej W. Rozycki  
> wrote:
> > On Fri, 19 May 2023, Mikael Pettersson wrote:
> > > The background is that I maintain a script to build GCC-based crosses to
> > > as many targets as I can, currently it supports 78 distinct processors and
> > > 82 triplets (four processors have multiple triplets). I only check that I 
> > > can
> > > build the toolchains (full linux-gnu ones where possible).
> >
> >  Great work, thanks!
>
> I'd be very much interested in running your script as one build
> variant for my http://toolchain.lug-owl.de/ efforts. Is it available
> somewhere? That would be nice!

The script is publicly available as https://github.com/mikpe/buildcross.git.
Usage for actively maintained toolchains is pretty easy. For example, to build
a cross to sparc64-unknown-linux-gnu you just run

buildcross -jN sparc64

and it will leave the toolchain in cross-sparc64. (Other bits will
land in downloads/,
sources/, and host-tools/.)

Many older targets require older gcc versions to even build, and may
not work well
as 64-bit builds, so for those the operator needs to provide overrides
via the environment,
see e.g. the comments for the "a29k" target. None of that is automated, sorry.

If you're only interested in linux-gnu toolchains for actively
maintained targets there's
a build-many-glibcs.py script in glibc that should be a better fit.

/Mikael


Re: [PATCH 1/2] Improve do_store_flag for single bit comparison against 0

2023-05-22 Thread Andrew Pinski via Gcc-patches
On Mon, May 22, 2023 at 4:56 AM Richard Biener via Gcc-patches
 wrote:
>
> On Fri, May 19, 2023 at 4:15 AM Andrew Pinski via Gcc-patches
>  wrote:
> >
> > While working something else, I noticed we could improve
> > the following function code generation:
> > ```
> > unsigned f(unsigned t)
> > {
> >   if (t & ~(1<<30)) __builtin_unreachable();
> >   return t != 0;
> > }
> > ```
> > Right know we just emit a comparison against 0 instead
> > of just a shift right by 30.
> > There is code in do_store_flag which already optimizes
> > `(t & 1<<30) != 0` to `(t >> 30) & 1`. This patch
> > extends it to handle the case where we know t has a
> > nonzero of just one bit set.
> >
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > gcc/ChangeLog:
> >
> > * expr.cc (do_store_flag): Extend the one bit checking case
> > to handle the case where we don't have an and but rather still
> > one bit is known to be non-zero.
> > ---
> >  gcc/expr.cc | 27 +--
> >  1 file changed, 21 insertions(+), 6 deletions(-)
> >
> > diff --git a/gcc/expr.cc b/gcc/expr.cc
> > index 5ede094e705..91528e734e7 100644
> > --- a/gcc/expr.cc
> > +++ b/gcc/expr.cc
> > @@ -13083,15 +13083,30 @@ do_store_flag (sepops ops, rtx target, 
> > machine_mode mode)
> >&& integer_zerop (arg1)
> >&& (TYPE_PRECISION (ops->type) != 1 || TYPE_UNSIGNED (ops->type)))
> >  {
> > -  gimple *srcstmt = get_def_for_expr (arg0, BIT_AND_EXPR);
> > -  if (srcstmt
> > - && integer_pow2p (gimple_assign_rhs2 (srcstmt)))
> > +  wide_int nz = tree_nonzero_bits (arg0);
> > +
> > +  if (wi::popcount (nz) == 1)
> > {
> > + tree op0;
> > + tree op1;
> > + gimple *srcstmt = get_def_for_expr (arg0, BIT_AND_EXPR);
> > + /* If the defining statement was (x & POW2), then remove the and
> > +as we are going to add it back. */
> > + if (srcstmt
> > + && integer_pow2p (gimple_assign_rhs2 (srcstmt)))
> > +   {
> > + op0 = gimple_assign_rhs1 (srcstmt);
> > + op1 = gimple_assign_rhs2 (srcstmt);
> > +   }
> > + else
> > +   {
> > + op0 = arg0;
> > + op1 = wide_int_to_tree (TREE_TYPE (op0), nz);
> > +   }
> >   enum tree_code tcode = code == NE ? NE_EXPR : EQ_EXPR;
> >   type = lang_hooks.types.type_for_mode (mode, unsignedp);
> > - tree temp = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (arg1),
> > -  gimple_assign_rhs1 (srcstmt),
> > -  gimple_assign_rhs2 (srcstmt));
> > + tree temp = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (op0),
> > +  op0, op1);
> >   temp = fold_single_bit_test (loc, tcode, temp, arg1, type);
> >   if (temp)
> > return expand_expr (temp, target, VOIDmode, EXPAND_NORMAL);
>
> I wonder if, instead of expanding expand with these kind of tricks we
> want to instead
> add to ISEL and use direct optab IFNs for things we matched?  In
> particular I think
> we do want to get rid of TER but the above adds another use of 
> get_def_for_expr.

The above does not add another at all. It was there before, it just
moves it around slightly. Instead we depend on the non-zero bits to be
correct before even trying get_def_for_expr .
The get_def_for_expr is there to remove the & if it can be ter'ed.

>
> As Jeff says the above doesn't look like it includes costing so that would be 
> an
> argument to make it a generic match.pd transform (it appears to be "simpler")?

For the TER case, it would be same number of gimple instructions so
that can happen if we want
t = a & CST
result = t != 0
vs:
t1 = BIT_FIELD_REF 
result = (bool)t1

For the non-TER case (which is what this patch is trying to solve).
we just have `t != 0` (where t has a non-zero value of CST) so it might increase
the number of gimple instructions by 1.

Is that ok? Or should that still happen in expand only.

The cost issue between a != 0 vs bit_extraction (for the non-ter case)
is something which I will be solving next weekend.

>
> Richard.
>
> > --
> > 2.31.1
> >


Re: [PATCH] c-family: implement -ffp-contract=on

2023-05-22 Thread Alexander Monakov via Gcc-patches


On Mon, 22 May 2023, Richard Biener wrote:

> On Thu, May 18, 2023 at 11:04 PM Alexander Monakov via Gcc-patches
>  wrote:
> >
> > Implement -ffp-contract=on for C and C++ without changing default
> > behavior (=off for -std=cNN, =fast for C++ and -std=gnuNN).
> 
> The documentation changes mention the defaults are changed for
> standard modes, I suppose you want to remove that hunk.

No, the current documentation is incomplete, and that hunk extends it
to match the current GCC behavior. Should I break it out to a separate
patch? I see this drive-by fix could look confusing — sorry about that.

> it would be possible to do
> 
>   *expr_p = build_call_expr_internal (ifn, type, ops[0], ops[1]. ops[2]);
>   return GS_OK;
> 
> and not worry about temporary creation and gimplifying of the operands.
> That would in theory also leave the possibility to do this during
> genericization instead (and avoid the guard against late invocation of
> the hook).

Ah, no, I deliberately decided against that, because that way we would go
via gimplify_arg, which would emit all side effects in *pre_p. That seems
wrong if arguments had side-effects that should go in *post_p.

Thanks.
Alexander

> Otherwise it looks OK, but I'll let frontend maintainers have a chance to look
> as well.
> 
> Thanks for tackling this long-standing issue.
> Richard.


[testsuite,committed] PR testsuite/52641

2023-05-22 Thread Georg-Johann Lay
Applied more annotations to reduce testsuite fallout for 16-bit int / 
pointer targets.


https://gcc.gnu.org/r14-1074

Most of the affected tests use constants not suitable for 16-bit int, 
bit-fields wider than 16 bits, etc.


Johann

--

commit 9f5065094c9632a50bea604d5896a139609e50cf
Author: Georg-Johann Lay 
Date:   Mon May 22 16:47:56 2023 +0200

testsuite/52641: Fix tests that fail for 16-bit int / pointer targets.

gcc/testsuite/
PR testsuite/52641
* c-c++-common/pr19807-2.c: Use __SIZEOF_INT__ instead of 4.
* gcc.c-torture/compile/pr103813.c: Require size32plus.
* gcc.c-torture/execute/pr108498-2.c: Same.
* gcc.c-torture/compile/pr96426.c: Condition on
__SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__.
* gcc.c-torture/execute/pr103417.c: Require int32plus.
* gcc.dg/pr104198.c: Same.
* gcc.dg/pr21137.c: Same.
* gcc.dg/pr88905.c: Same.
* gcc.dg/pr90838.c: Same.
* gcc.dg/pr97317.c: Same.
* gcc.dg/pr100292.c: Require int32.
* gcc.dg/pr101008.c: Same.
* gcc.dg/pr96542.c: Same.
* gcc.dg/pr96674.c: Same.
* gcc.dg/pr97750.c: Require ptr_eq_long.

diff --git a/gcc/testsuite/c-c++-common/pr19807-2.c 
b/gcc/testsuite/c-c++-common/pr19807-2.c

index 529b9c97322..29a370304d3 100644
--- a/gcc/testsuite/c-c++-common/pr19807-2.c
+++ b/gcc/testsuite/c-c++-common/pr19807-2.c
@@ -6,7 +6,7 @@ int i;
 int main()
 {
   int a[4];
-  if ((char*)[1] + 4*i + 4 != (char*)[i+2])
+  if ((char*)[1] + __SIZEOF_INT__*i + __SIZEOF_INT__ != (char*)[i+2])
 link_error();
   return 0;
 }
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr103813.c 
b/gcc/testsuite/gcc.c-torture/compile/pr103813.c

index b3fc066beed..0aa64fb3152 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr103813.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr103813.c
@@ -1,4 +1,5 @@
 /* PR middle-end/103813 */
+/* { dg-require-effective-target size32plus } */

 struct A { char b; char c[0x2100]; };
 struct A d;
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96426.c 
b/gcc/testsuite/gcc.c-torture/compile/pr96426.c

index bd573fe5366..fdb441efc10 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr96426.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr96426.c
@@ -1,5 +1,7 @@
 /* PR middle-end/96426 */

+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+
 typedef long long V __attribute__((vector_size(16)));
 typedef double W __attribute__((vector_size(16)));

@@ -8,3 +10,5 @@ foo (V *v)
 {
   __builtin_convertvector (*v, W);
 }
+
+#endif
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr103417.c 
b/gcc/testsuite/gcc.c-torture/execute/pr103417.c

index 0fef8908036..ea4b99030a5 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr103417.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr103417.c
@@ -1,4 +1,5 @@
 /* PR tree-optimization/103417 */
+/* { dg-require-effective-target int32plus } */

 struct { int a : 8; int b : 24; } c = { 0, 1 };

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c

index ad930488c33..fdd628cbc86 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c
@@ -1,4 +1,5 @@
 /* PR tree-optimization/108498 */
+/* { dg-require-effective-target int32plus } */

 struct U { char c[16]; };
 struct V { char c[16]; };
diff --git a/gcc/testsuite/gcc.dg/pr100292.c 
b/gcc/testsuite/gcc.dg/pr100292.c

index 675a60c3412..147c9324d81 100644
--- a/gcc/testsuite/gcc.dg/pr100292.c
+++ b/gcc/testsuite/gcc.dg/pr100292.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target int32 } */

 typedef unsigned char __attribute__((__vector_size__ (4))) V;

diff --git a/gcc/testsuite/gcc.dg/pr101008.c 
b/gcc/testsuite/gcc.dg/pr101008.c

index c06208d3425..8229769c6ac 100644
--- a/gcc/testsuite/gcc.dg/pr101008.c
+++ b/gcc/testsuite/gcc.dg/pr101008.c
@@ -1,6 +1,7 @@
 /* PR rtl-optimization/101008 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -g" } */
+/* { dg-require-effective-target int32 } */

 typedef unsigned __attribute__((__vector_size__(32))) U;
 typedef unsigned __attribute__((__vector_size__(16))) V;
diff --git a/gcc/testsuite/gcc.dg/pr104198.c 
b/gcc/testsuite/gcc.dg/pr104198.c

index bfc7a777184..de86f49c9dc 100644
--- a/gcc/testsuite/gcc.dg/pr104198.c
+++ b/gcc/testsuite/gcc.dg/pr104198.c
@@ -3,6 +3,7 @@

 /* { dg-do run } */
 /* { dg-options "-O2 -std=c99" } */
+/* { dg-require-effective-target int32plus } */

 #include 
 #include 
diff --git a/gcc/testsuite/gcc.dg/pr21137.c b/gcc/testsuite/gcc.dg/pr21137.c
index 6d73deaee6c..199555a5017 100644
--- a/gcc/testsuite/gcc.dg/pr21137.c
+++ b/gcc/testsuite/gcc.dg/pr21137.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-require-effective-target int32plus } */

 void foo();

diff --git a/gcc/testsuite/gcc.dg/pr88905.c 

[avr,testsuite,committed] Skip test that fail for avr for this or that reason.

2023-05-22 Thread Georg-Johann Lay

This annotates some tests that won't work for AVR like:

* asm goto with output reload (AVR is not lra).

* Using a program address as a ram address.

* Float related stuff: AVR double is 32-bit, and long double
  is incomplete (some functions missing, no signed zeros, etc.)

Applied as obvious.

Johann

--

Skip some tests that won't work for target AVR.

gcc/testsuite/
* lib/target-supports.exp (check_effective_target_lra) 
[avr]: Return 0.

* gcc.dg/pr19402-2.c: Skip for avr.
* gcc.dg/pr86124.c: Same.
* gcc.dg/pr94291.c: Same.
* gcc.dg/torture/builtin-complex-1.c: Same.
* gcc.dg/torture/fp-int-convert-float32x-timode.c: Same.
* gcc.dg/torture/fp-int-convert-float32x.c: Same.
* gcc.dg/torture/fp-int-convert-float64-timode.c: Same.
* gcc.dg/torture/fp-int-convert-float64.c: Same.
* gcc.dg/torture/fp-int-convert-long-double.c: Same.
* gcc.dg/torture/fp-int-convert-timode.c: Same.
* c-c++-common/torture/builtin-convertvector-1.c: Same.
* c-c++-common/torture/complex-sign-add.c: Same.
* c-c++-common/torture/complex-sign-mixed-add.c: Same.
* c-c++-common/torture/complex-sign-mixed-div.c: Same.
* c-c++-common/torture/complex-sign-mixed-mul.c: Same.
* c-c++-common/torture/complex-sign-mixed-sub.c: Same.
* c-c++-common/torture/complex-sign-mul-minus-one.c: Same.
* c-c++-common/torture/complex-sign-mul-one.c: Same.
* c-c++-common/torture/complex-sign-mul.c: Same.
* c-c++-common/torture/complex-sign-sub.c: Same.

diff --git 
a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c 
b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c

index 347dda7692d..fababf1a9eb 100644
--- a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c
+++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c
@@ -1,3 +1,5 @@
+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */
+
 extern
 #ifdef __cplusplus
 "C"
diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-add.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-add.c

index e81223224dc..c1e7886a0df 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-add.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-add.c
@@ -2,6 +2,7 @@
addition.  */
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c

index a209161e157..36d305baf53 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c
@@ -3,6 +3,7 @@
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
 /* { dg-skip-if "ptx can elide zero additions" { "nvptx-*-*" } { "-O0" 
} { "" } } */

+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c

index f7ee48341c0..a37074bb3b9 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c
@@ -2,6 +2,7 @@
division.  */
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c

index 02f936b75bd..1e528b986c5 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c
@@ -2,6 +2,7 @@
multiplication.  */
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c

index 02ab4db247c..63c75dfdff2 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c
@@ -3,6 +3,7 @@
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
 /* { dg-skip-if "ptx can elide zero additions" { "nvptx-*-*" } { "-O0" 
} { "" } } */

+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git 
a/gcc/testsuite/c-c++-common/torture/complex-sign-mul-minus-one.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mul-minus-one.c

index 05cc4fabea4..f8abdd00e2e 100644
--- 

[COMMITTED] i386: Account for the memory read in V*QImode multiplication sequences

2023-05-22 Thread Uros Bizjak via Gcc-patches
Add the cost of a memory read to the cost of V*QImode vector mult sequences.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_multiplication_cost): Add
the cost of a memory read to the cost of V?QImode sequences.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 6a4b3326219..a36e625342d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20463,27 +20463,42 @@ ix86_multiplication_cost (const struct 
processor_costs *cost,
   {
   case V4QImode:
   case V8QImode:
-   /* Partial V*QImode is emulated with 4-5 insns.  */
-   if ((TARGET_AVX512BW && TARGET_AVX512VL) || TARGET_XOP)
+   /* Partial V*QImode is emulated with 4-6 insns.  */
+   if (TARGET_AVX512BW && TARGET_AVX512VL)
  return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
+   else if (TARGET_AVX2)
+ return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 5);
+   else if (TARGET_XOP)
+ return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3)
+ + cost->sse_load[2]);
else
- return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 4);
+ return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 4)
+ + cost->sse_load[2]);
 
   case V16QImode:
/* V*QImode is emulated with 4-11 insns.  */
if (TARGET_AVX512BW && TARGET_AVX512VL)
  return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
+   else if (TARGET_AVX2)
+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 8);
else if (TARGET_XOP)
- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
-   /* FALLTHRU */
+ return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5)
+ + cost->sse_load[2]);
+   else
+ return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7)
+ + cost->sse_load[2]);
+
   case V32QImode:
-   if (TARGET_AVX512BW && mode == V32QImode)
+   if (TARGET_AVX512BW)
  return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
else
- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7);
+ return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7)
+ + cost->sse_load[3] * 2);
 
   case V64QImode:
-   return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9);
+   return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9)
+   + cost->sse_load[3] * 2
+   + cost->sse_load[4] * 2);
 
   case V4SImode:
/* pmulld is used in this case. No emulation is needed.  */


Re: [PATCH 0/7] openmp: OpenMP 5.1 loop transformation directives

2023-05-22 Thread Jakub Jelinek via Gcc-patches
On Wed, May 17, 2023 at 01:55:00PM +0200, Frederik Harwath wrote:
> Thanks for the explanation. But actually doing this would require a
> complete rewrite which would almost certainly imply that mainline GCC
> would not support the loop transformations for a long time.

I don't think it needs complete rewrite, the change to use
OMP_UNROLL/OMP_TILE should actually simplify stuff when you already have
some other extra construct to handle the clauses if it isn't nested into
something else, so I wouldn't expect it needs more than 2-3 hours of work.
It is true that doing the transformation on trees rather than high gimple
is something different, but again it doesn't require everything to be
rewritten and we have code to do code copying both on trees and high and low
gimple in tree-inline.cc, so the unrolling can just use different APIs
to perform it.

I'd still prefer to do it like that, I think it will pay back in
maintainance costs.

If you don't get to this within say 2 weeks, I'll try to do the conversion
myself.

Jakub



RE: [PATCH] RISC-V: Add "m_" prefix for private member

2023-05-22 Thread Li, Pan2 via Gcc-patches
Committed, thanks Kito.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Kito Cheng via Gcc-patches
Sent: Monday, May 22, 2023 9:49 PM
To: juzhe.zh...@rivai.ai
Cc: gcc-patches@gcc.gnu.org; kito.ch...@gmail.com; pal...@dabbelt.com; 
pal...@rivosinc.com; jeffreya...@gmail.com; rdapp@gmail.com
Subject: Re: [PATCH] RISC-V: Add "m_" prefix for private member

LGTM

On Mon, May 22, 2023 at 8:10 PM  wrote:
>
> From: Juzhe-Zhong 
>
> Since the current framework is hard to maintain and hard to be used in 
> the future possible auto-vectorization patterns.
>
> We will need to keep adding more helpers and arguments during the 
> auto-vectorization supporting. We should refactor the framework now 
> for the future use since the we don't support too much 
> auto-vectorization patterns for now.
>
> Start with this simple patch, this patch is adding "m_" prefix for private 
> the members.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-v.cc: Add "m_" prefix.
>
> ---
>  gcc/config/riscv/riscv-v.cc | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc 
> index d65e7300303..e0b19bc1754 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -66,7 +66,7 @@ const_vlmax_p (machine_mode mode)  template  MAX_OPERANDS> class insn_expander  {
>  public:
> -  insn_expander () : m_opno (0), has_dest(false) {}
> +  insn_expander () : m_opno (0), m_has_dest_p(false) {}
>void add_output_operand (rtx x, machine_mode mode)
>{
>  create_output_operand (_ops[m_opno++], x, mode); @@ -99,41 
> +99,41 @@ public:
>
>void set_dest_and_mask (rtx mask, rtx dest, machine_mode mask_mode)
>{
> -dest_mode = GET_MODE (dest);
> -has_dest = true;
> +m_dest_mode = GET_MODE (dest);
> +m_has_dest_p = true;
>
> -add_output_operand (dest, dest_mode);
> +add_output_operand (dest, m_dest_mode);
>
>  if (mask)
>add_input_operand (mask, GET_MODE (mask));
>  else
>add_all_one_mask_operand (mask_mode);
>
> -add_vundef_operand (dest_mode);
> +add_vundef_operand (m_dest_mode);
>}
>
>void set_len_and_policy (rtx len, bool force_vlmax = false)
>  {
>bool vlmax_p = force_vlmax || !len;
> -  gcc_assert (has_dest);
> +  gcc_assert (m_has_dest_p);
>
> -  if (vlmax_p && const_vlmax_p (dest_mode))
> +  if (vlmax_p && const_vlmax_p (m_dest_mode))
> {
>   /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of the
>  vsetvli to obtain the value of vlmax.  */
> - poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
> + poly_uint64 nunits = GET_MODE_NUNITS (m_dest_mode);
>   len = gen_int_mode (nunits, Pmode);
>   vlmax_p = false; /* It has became NONVLMAX now.  */
> }
>else if (!len)
> {
>   len = gen_reg_rtx (Pmode);
> - emit_vlmax_vsetvl (dest_mode, len);
> + emit_vlmax_vsetvl (m_dest_mode, len);
> }
>
>add_input_operand (len, Pmode);
>
> -  if (GET_MODE_CLASS (dest_mode) != MODE_VECTOR_BOOL)
> +  if (GET_MODE_CLASS (m_dest_mode) != MODE_VECTOR_BOOL)
> add_policy_operand (get_prefer_tail_policy (), 
> get_prefer_mask_policy ());
>
>add_avl_type_operand (vlmax_p ? avl_type::VLMAX : 
> avl_type::NONVLMAX); @@ -152,8 +152,8 @@ public:
>
>  private:
>int m_opno;
> -  bool has_dest;
> -  machine_mode dest_mode;
> +  bool m_has_dest_p;
> +  machine_mode m_dest_mode;
>expand_operand m_ops[MAX_OPERANDS];  };
>
> --
> 2.36.3
>


Re: [PATCH] add glibc-stdint.h to vax and lm32 linux target (PR target/105525)

2023-05-22 Thread Jan-Benedict Glaw
Hi!

On Mon, 2023-05-22 14:10:48 +0100, Maciej W. Rozycki  wrote:
> On Fri, 19 May 2023, Mikael Pettersson wrote:
> > The background is that I maintain a script to build GCC-based crosses to
> > as many targets as I can, currently it supports 78 distinct processors and
> > 82 triplets (four processors have multiple triplets). I only check that I 
> > can
> > build the toolchains (full linux-gnu ones where possible).
> 
>  Great work, thanks!

I'd be very much interested in running your script as one build
variant for my http://toolchain.lug-owl.de/ efforts. Is it available
somewhere? That would be nice!

MfG, JBG

-- 


signature.asc
Description: PGP signature


Re: [PATCH] RISC-V: Add "m_" prefix for private member

2023-05-22 Thread Kito Cheng via Gcc-patches
LGTM

On Mon, May 22, 2023 at 8:10 PM  wrote:
>
> From: Juzhe-Zhong 
>
> Since the current framework is hard to maintain and
> hard to be used in the future possible auto-vectorization patterns.
>
> We will need to keep adding more helpers and arguments during the
> auto-vectorization supporting. We should refactor the framework
> now for the future use since the we don't support too much auto-vectorization
> patterns for now.
>
> Start with this simple patch, this patch is adding "m_" prefix for private 
> the members.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-v.cc: Add "m_" prefix.
>
> ---
>  gcc/config/riscv/riscv-v.cc | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index d65e7300303..e0b19bc1754 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -66,7 +66,7 @@ const_vlmax_p (machine_mode mode)
>  template  class insn_expander
>  {
>  public:
> -  insn_expander () : m_opno (0), has_dest(false) {}
> +  insn_expander () : m_opno (0), m_has_dest_p(false) {}
>void add_output_operand (rtx x, machine_mode mode)
>{
>  create_output_operand (_ops[m_opno++], x, mode);
> @@ -99,41 +99,41 @@ public:
>
>void set_dest_and_mask (rtx mask, rtx dest, machine_mode mask_mode)
>{
> -dest_mode = GET_MODE (dest);
> -has_dest = true;
> +m_dest_mode = GET_MODE (dest);
> +m_has_dest_p = true;
>
> -add_output_operand (dest, dest_mode);
> +add_output_operand (dest, m_dest_mode);
>
>  if (mask)
>add_input_operand (mask, GET_MODE (mask));
>  else
>add_all_one_mask_operand (mask_mode);
>
> -add_vundef_operand (dest_mode);
> +add_vundef_operand (m_dest_mode);
>}
>
>void set_len_and_policy (rtx len, bool force_vlmax = false)
>  {
>bool vlmax_p = force_vlmax || !len;
> -  gcc_assert (has_dest);
> +  gcc_assert (m_has_dest_p);
>
> -  if (vlmax_p && const_vlmax_p (dest_mode))
> +  if (vlmax_p && const_vlmax_p (m_dest_mode))
> {
>   /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of the
>  vsetvli to obtain the value of vlmax.  */
> - poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
> + poly_uint64 nunits = GET_MODE_NUNITS (m_dest_mode);
>   len = gen_int_mode (nunits, Pmode);
>   vlmax_p = false; /* It has became NONVLMAX now.  */
> }
>else if (!len)
> {
>   len = gen_reg_rtx (Pmode);
> - emit_vlmax_vsetvl (dest_mode, len);
> + emit_vlmax_vsetvl (m_dest_mode, len);
> }
>
>add_input_operand (len, Pmode);
>
> -  if (GET_MODE_CLASS (dest_mode) != MODE_VECTOR_BOOL)
> +  if (GET_MODE_CLASS (m_dest_mode) != MODE_VECTOR_BOOL)
> add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy 
> ());
>
>add_avl_type_operand (vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
> @@ -152,8 +152,8 @@ public:
>
>  private:
>int m_opno;
> -  bool has_dest;
> -  machine_mode dest_mode;
> +  bool m_has_dest_p;
> +  machine_mode m_dest_mode;
>expand_operand m_ops[MAX_OPERANDS];
>  };
>
> --
> 2.36.3
>


Re: [PATCH 2/2] vect: Enhance cost evaluation in vect_transform_slp_perm_load_1

2023-05-22 Thread Richard Biener via Gcc-patches
On Wed, May 17, 2023 at 8:15 AM Kewen.Lin  wrote:
>
> Hi,
>
> Following Richi's suggestion in [1], I'm working on deferring
> cost evaluation next to the transformation, this patch is
> to enhance function vect_transform_slp_perm_load_1 which
> could under-cost for vector permutation, since the costing
> doesn't try to consider nvectors_per_build, it's inconsistent
> with the transformation part.
>
> Bootstrapped and regtested on x86_64-redhat-linux,
> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>
> Is it ok for trunk?
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563624.html
>
> BR,
> Kewen
> -
> gcc/ChangeLog:
>
> * tree-vect-slp.cc (vect_transform_slp_perm_load_1): Adjust the
> calculation on n_perms by considering nvectors_per_build.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c: New test.
> ---
>  .../vect/costmodel/ppc/costmodel-slp-perm.c   | 23 +++
>  gcc/tree-vect-slp.cc  | 66 ++-
>  2 files changed, 57 insertions(+), 32 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c
> new file mode 100644
> index 000..e5c4dceddfb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* Specify power9 to ensure the vectorization is profitable
> +   and test point stands, otherwise it could be not profitable
> +   to vectorize.  */
> +/* { dg-additional-options "-mdejagnu-cpu=power9 -mpower9-vector" } */
> +
> +/* Verify we cost the exact count for required vec_perm.  */
> +
> +int x[1024], y[1024];
> +
> +void
> +foo ()
> +{
> +  for (int i = 0; i < 512; ++i)
> +{
> +  x[2 * i] = y[1023 - (2 * i)];
> +  x[2 * i + 1] = y[1023 - (2 * i + 1)];
> +}
> +}
> +
> +/* { dg-final { scan-tree-dump-times "2 times vec_perm" 1 "vect" } } */
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index e5c9d7e766e..af9a6dd4fa9 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -8115,12 +8115,12 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, 
> slp_tree node,
>
>mode = TYPE_MODE (vectype);
>poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
> +  unsigned int nstmts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
>
>/* Initialize the vect stmts of NODE to properly insert the generated
>   stmts later.  */
>if (! analyze_only)
> -for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
> -i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
> +for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); i < nstmts; i++)
>SLP_TREE_VEC_STMTS (node).quick_push (NULL);
>
>/* Generate permutation masks for every NODE. Number of masks for each NODE
> @@ -8161,7 +8161,10 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, 
> slp_tree node,
>  (b) the permutes only need a single vector input.  */
>mask.new_vector (nunits, group_size, 3);
>nelts_to_build = mask.encoded_nelts ();
> -  nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
> +  /* It's possible to obtain zero nstmts during analyze_only, so make
> +it at least one to ensure the later computation for n_perms
> +proceed.  */
> +  nvectors_per_build = nstmts > 0 ? nstmts : 1;
>in_nlanes = DR_GROUP_SIZE (stmt_info) * 3;
>  }
>else
> @@ -8252,40 +8255,39 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, 
> slp_tree node,
>   return false;
> }
>
> - ++*n_perms;
> -
> + tree mask_vec = NULL_TREE;
>   if (!analyze_only)
> -   {
> - tree mask_vec = vect_gen_perm_mask_checked (vectype, 
> indices);
> +   mask_vec = vect_gen_perm_mask_checked (vectype, indices);
>
> - if (second_vec_index == -1)
> -   second_vec_index = first_vec_index;
> + if (second_vec_index == -1)
> +   second_vec_index = first_vec_index;
>
> - for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
> + for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
> +   {
> + ++*n_perms;

So the "real" change is doing

  *n_perms += nvectors_per_build;

and *n_perms was unused when !analyze_only?  And since at
analysis time we (sometimes?) have zero nvectors you have to
fixup above?  Which cases are that?

In principle the patch looks good to me.

Richard.

> + if (analyze_only)
> +   continue;
> + /* Generate the permute statement if necessary.  */
> + tree first_vec = 

Re: [PATCH v2] tree-ssa-sink: Improve code sinking pass

2023-05-22 Thread Richard Biener via Gcc-patches
On Fri, May 19, 2023 at 11:43 AM Ajit Agarwal  wrote:
>
> Hello All:
>
> This patch improves code sinking pass to sink statements before call to reduce
> register pressure.
> Review comments are incorporated.
>
> For example :
>
> void bar();
> int j;
> void foo(int a, int b, int c, int d, int e, int f)
> {
>   int l;
>   l = a + b + c + d +e + f;
>   if (a != 5)
> {
>   bar();
>   j = l;
> }
> }
>
> Code Sinking does the following:
>
> void bar();
> int j;
> void foo(int a, int b, int c, int d, int e, int f)
> {
>   int l;
>
>   if (a != 5)
> {
>   l = a + b + c + d +e + f;
>   bar();
>   j = l;
> }
> }
>
> Bootstrapped regtested on powerpc64-linux-gnu.
>
> Thanks & Regards
> Ajit
>
>
> tree-ssa-sink: Improve code sinking pass
>
> Code Sinking sinks the blocks after call.This increases register pressure
> for callee-saved registers. Improves code sinking before call in the use
> blocks or immediate dominator of use blocks.

Saw this update too late but I think all comments still apply.

> 2023-05-18  Ajit Kumar Agarwal  
>
> gcc/ChangeLog:
>
> * tree-ssa-sink.cc (statement_sink_location): Move statements before
> calls.
> (block_call_p): New function.
> (def_use_same_block): New function.
> (select_best_block): Add heuristics to select the best blocks in the
> immediate post dominator.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
> * gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c |  15 ++
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c |  19 +++
>  gcc/tree-ssa-sink.cc| 160 ++--
>  3 files changed, 183 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
> new file mode 100644
> index 000..69fa6d32e7c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
> @@ -0,0 +1,15 @@
> +/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-sink-stats" } */
> +
> +void bar();
> +int j;
> +void foo(int a, int b, int c, int d, int e, int f)
> +{
> +  int l;
> +  l = a + b + c + d +e + f;
> +  if (a != 5)
> +{
> +  bar();
> +  j = l;
> +}
> +}
> +/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
> new file mode 100644
> index 000..b34959c8a4d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
> @@ -0,0 +1,19 @@
> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
> +
> +void bar();
> +int j, x;
> +void foo(int a, int b, int c, int d, int e, int f)
> +{
> +  int l;
> +  l = a + b + c + d +e + f;
> +  if (a != 5)
> +{
> +  bar();
> +  if (b != 3)
> +x = 3;
> +  else
> +x = 5;
> +  j = l;
> +}
> +}
> +/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
> index b1ba7a2ad6c..091aa90d289 100644
> --- a/gcc/tree-ssa-sink.cc
> +++ b/gcc/tree-ssa-sink.cc
> @@ -171,6 +171,71 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
> bool *debug_stmts)
>return commondom;
>  }
>
> +/* Return TRUE if immediate uses of the defs in
> +   STMT occur in the same block as STMT, FALSE otherwise.  */
> +
> +bool
> +def_use_same_block (gimple *stmt)
> +{
> +  use_operand_p use;
> +  def_operand_p def;
> +  imm_use_iterator imm_iter;
> +  ssa_op_iter iter;
> +
> +  FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF)
> +{
> +  FOR_EACH_IMM_USE_FAST (use, imm_iter, DEF_FROM_PTR (def))
> +   {
> + if (is_gimple_debug (USE_STMT (use)))
> +   continue;
> +
> + if (use && (gimple_bb (USE_STMT (use)) == gimple_bb (stmt)))
> +   return true;
> +   }
> + }
> +  return false;
> +}
> +
> +/* Return TRUE if the block has only one call statement, FALSE otherwise. */
> +
> +bool
> +block_call_p (basic_block bb)
> +{
> +  int i = 0;
> +  bool is_call = false;
> +  gimple_stmt_iterator gsi = gsi_last_bb (bb);
> +  gimple *last_stmt = gsi_stmt (gsi);
> +
> +  if (last_stmt && gimple_code (last_stmt) == GIMPLE_COND)
> +{
> +  if (!gsi_end_p (gsi))
> +   gsi_prev ();
> +
> +   for (; !gsi_end_p (gsi);)
> +{
> +  gimple *stmt = gsi_stmt (gsi);
> +
> +  /* We have already seen a call.  */
> +  if (is_call)
> +return false;
> +
> +  if (is_gimple_call (stmt))
> +is_call = true;
> +  else
> +return false;
> +
> +  if (!gsi_end_p (gsi))
> +gsi_prev ();
> +
> +   ++i;
> +   }
> + }
> +  if (is_call && i == 1)
> +return 

Re: [PATCH 1/4] Missed opportunity to use [SU]ABD

2023-05-22 Thread Richard Biener via Gcc-patches
On Thu, May 18, 2023 at 7:59 PM Richard Sandiford
 wrote:
>
> Thanks for the update.  Some of these comments would have applied
> to the first version, so sorry for not catching them first time.
>
>  writes:
> > From: oluade01 
> >
> > This adds a recognition pattern for the non-widening
> > absolute difference (ABD).
> >
> > gcc/ChangeLog:
> >
> >   * doc/md.texi (sabd, uabd): Document them.
> >   * internal-fn.def (ABD): Use new optab.
> >   * optabs.def (sabd_optab, uabd_optab): New optabs,
> >   * tree-vect-patterns.cc (vect_recog_absolute_difference):
> >   Recognize the following idiom abs (a - b).
> >   (vect_recog_sad_pattern): Refactor to use
> >   vect_recog_absolute_difference.
> >   (vect_recog_abd_pattern): Use patterns found by
> >   vect_recog_absolute_difference to build a new ABD
> >   internal call.
> > ---
> >  gcc/doc/md.texi   |  10 ++
> >  gcc/internal-fn.def   |   3 +
> >  gcc/optabs.def|   2 +
> >  gcc/tree-vect-patterns.cc | 255 +-
> >  4 files changed, 239 insertions(+), 31 deletions(-)
> >
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index 
> > 07bf8bdebffb2e523f25a41f2b57e43c0276b745..3e65584d7efcd301f2c96a40edd82d30b84462b8
> >  100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> >  Vector shift and rotate instructions that take vectors as operand 2
> >  instead of a scalar type.
> >
> > +@cindex @code{uabd@var{m}} instruction pattern
> > +@cindex @code{sabd@var{m}} instruction pattern
> > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > +Signed and unsigned absolute difference instructions.  These
> > +instructions find the difference between operands 1 and 2
> > +then return the absolute value.  A C code equivalent would be:
> > +@smallexample
> > +op0 = op0 > op1 ? op0 - op1 : op1 - op0;
>
> Should be:
>
>   op0 = op1 > op2 ? op1 - op2 : op2 - op1;
>
> since op0 is the output.
>
> > +@end smallexample
> > +
> >  @cindex @code{avg@var{m}3_floor} instruction pattern
> >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> >  @item @samp{avg@var{m}3_floor}
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 
> > 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468
> >  100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > +   sabd, uabd, binary)
> > +
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> > savg_floor, uavg_floor, binary)
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 
> > 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268
> >  100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, 
> > "mask_fold_left_plus_$a")
> >  OPTAB_D (extract_last_optab, "extract_last_$a")
> >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> >
> > +OPTAB_D (uabd_optab, "uabd$a3")
> > +OPTAB_D (sabd_optab, "sabd$a3")
> >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > index 
> > a49b09539776c0056e77f99b10365d0a8747fbc5..50f1822f220c023027f4b0f777965f3757842fa2
> >  100644
> > --- a/gcc/tree-vect-patterns.cc
> > +++ b/gcc/tree-vect-patterns.cc
> > @@ -770,6 +770,93 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info 
> > stmt2_info, tree new_rhs,
> >  }
> >  }
> >
> > +/* Look for the following pattern
> > + X = x[i]
> > + Y = y[i]
> > + DIFF = X - Y
> > + DAD = ABS_EXPR
> > +
> > +   ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
> > +   If REJECT_UNSIGNED is true it aborts if the type of ABS_STMT is 
> > unsigned.
> > +   HALF_TYPE and UNPROM will be set should the statement be found to
> > +   be a widened operation.
> > +   DIFF_OPRNDS will be set to the two inputs of the MINUS_EXPR preceding
> > +   ABS_STMT, otherwise it will be set the operations found by
> > +   vect_widened_op_tree.
> > + */
> > +static bool
> > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > + tree *half_type, bool reject_unsigned,
> > + vect_unpromoted_value unprom[2],
> > + tree diff_oprnds[2])
> > +{
> > +  if (!abs_stmt)
> > +return false;
> > +
> > +  /* FORNOW.  Can continue analyzing the 

Re: [PATCH] PR gcc/98350:Handle FMA friendly in reassoc pass

2023-05-22 Thread Richard Biener via Gcc-patches
On Wed, May 17, 2023 at 3:02 PM Cui, Lili  wrote:
>
> From: Lili Cui 
>
> Make some changes in reassoc pass to make it more friendly to fma pass later.
> Using FMA instead of mult + add reduces register pressure and insruction
> retired.
>
> There are mainly two changes
> 1. Put no-mult ops and mult ops alternately at the end of the queue, which is
> conducive to generating more fma and reducing the loss of FMA when breaking
> the chain.
> 2. Rewrite the rewrite_expr_tree_parallel function to try to build parallel
> chains according to the given correlation width, keeping the FMA chance as
> much as possible.
>
> TEST1:
>
> float
> foo (float a, float b, float c, float d, float *e)
> {
>return  *e  + a * b + c * d ;
> }
>
> For "-Ofast -mfpmath=sse -mfma" GCC generates:
> vmulss  %xmm3, %xmm2, %xmm2
> vfmadd132ss %xmm1, %xmm2, %xmm0
> vaddss  (%rdi), %xmm0, %xmm0
> ret
>
> With this patch GCC generates:
> vfmadd213ss   (%rdi), %xmm1, %xmm0
> vfmadd231ss   %xmm2, %xmm3, %xmm0
> ret
>
> TEST2:
>
> for (int i = 0; i < N; i++)
> {
>   a[i] += b[i]* c[i] + d[i] * e[i] + f[i] * g[i] + h[i] * j[i] + k[i] * l[i] 
> + m[i]* o[i] + p[i];
> }
>
> For "-Ofast -mfpmath=sse -mfma"  GCC generates:
> vmovapd e(%rax), %ymm4
> vmulpd  d(%rax), %ymm4, %ymm3
> addq$32, %rax
> vmovapd c-32(%rax), %ymm5
> vmovapd j-32(%rax), %ymm6
> vmulpd  h-32(%rax), %ymm6, %ymm2
> vmovapd a-32(%rax), %ymm6
> vaddpd  p-32(%rax), %ymm6, %ymm0
> vmovapd g-32(%rax), %ymm7
> vfmadd231pd b-32(%rax), %ymm5, %ymm3
> vmovapd o-32(%rax), %ymm4
> vmulpd  m-32(%rax), %ymm4, %ymm1
> vmovapd l-32(%rax), %ymm5
> vfmadd231pd f-32(%rax), %ymm7, %ymm2
> vfmadd231pd k-32(%rax), %ymm5, %ymm1
> vaddpd  %ymm3, %ymm0, %ymm0
> vaddpd  %ymm2, %ymm0, %ymm0
> vaddpd  %ymm1, %ymm0, %ymm0
> vmovapd %ymm0, a-32(%rax)
> cmpq$8192, %rax
> jne .L4
> vzeroupper
> ret
>
> with this patch applied GCC breaks the chain with width = 2 and generates 6 
> fma:
>
> vmovapd a(%rax), %ymm2
> vmovapd c(%rax), %ymm0
> addq$32, %rax
> vmovapd e-32(%rax), %ymm1
> vmovapd p-32(%rax), %ymm5
> vmovapd g-32(%rax), %ymm3
> vmovapd j-32(%rax), %ymm6
> vmovapd l-32(%rax), %ymm4
> vmovapd o-32(%rax), %ymm7
> vfmadd132pd b-32(%rax), %ymm2, %ymm0
> vfmadd132pd d-32(%rax), %ymm5, %ymm1
> vfmadd231pd f-32(%rax), %ymm3, %ymm0
> vfmadd231pd h-32(%rax), %ymm6, %ymm1
> vfmadd231pd k-32(%rax), %ymm4, %ymm0
> vfmadd231pd m-32(%rax), %ymm7, %ymm1
> vaddpd  %ymm1, %ymm0, %ymm0
> vmovapd %ymm0, a-32(%rax)
> cmpq$8192, %rax
> jne .L2
> vzeroupper
> ret
>
> gcc/ChangeLog:
>
> PR gcc/98350
> * tree-ssa-reassoc.cc
> (rewrite_expr_tree_parallel): Rewrite this function.
> (rank_ops_for_fma): New.
> (reassociate_bb): Handle new function.
>
> gcc/testsuite/ChangeLog:
>
> PR gcc/98350
> * gcc.dg/pr98350-1.c: New test.
> * gcc.dg/pr98350-2.c: Ditto.
> ---
>  gcc/testsuite/gcc.dg/pr98350-1.c |  31 
>  gcc/testsuite/gcc.dg/pr98350-2.c |  11 ++
>  gcc/tree-ssa-reassoc.cc  | 256 +--
>  3 files changed, 215 insertions(+), 83 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr98350-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr98350-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/pr98350-1.c 
> b/gcc/testsuite/gcc.dg/pr98350-1.c
> new file mode 100644
> index 000..185511c5e0a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr98350-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -mfpmath=sse -mfma -Wno-attributes " } */
> +
> +/* Test that the compiler properly optimizes multiply and add
> +   to generate more FMA instructions.  */
> +#define N 1024
> +double a[N];
> +double b[N];
> +double c[N];
> +double d[N];
> +double e[N];
> +double f[N];
> +double g[N];
> +double h[N];
> +double j[N];
> +double k[N];
> +double l[N];
> +double m[N];
> +double o[N];
> +double p[N];
> +
> +
> +void
> +foo (void)
> +{
> +  for (int i = 0; i < N; i++)
> +  {
> +a[i] += b[i] * c[i] + d[i] * e[i] + f[i] * g[i] + h[i] * j[i] + k[i] * 
> l[i] + m[i]* o[i] + p[i];
> +  }
> +}
> +/* { dg-final { scan-assembler-times "vfm" 6  } } */
> diff --git a/gcc/testsuite/gcc.dg/pr98350-2.c 
> b/gcc/testsuite/gcc.dg/pr98350-2.c
> new file mode 100644
> index 000..b35d88aead9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr98350-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -mfpmath=sse -mfma -Wno-attributes " } */
> +
> +/* Test that the compiler rearrange the ops to generate more FMA.  */
> +
> +float
> +foo1 

[PATCH] libiberty: On Windows pass a >32k cmdline through a response file.

2023-05-22 Thread Costas Argyris via Gcc-patches
Currently on Windows, when CreateProcess is called with a command-line
that exceeds the 32k Windows limit, we get a very bad error:

"CreateProcess: No such file or directory"

This patch detects the case where this would happen and writes the
long command-line to a temporary response file and calls CreateProcess
with @file instead.
From 5c7237c102cdaca34e5907cd25c31610bda51919 Mon Sep 17 00:00:00 2001
From: Costas Argyris 
Date: Mon, 22 May 2023 13:55:56 +0100
Subject: [PATCH] libiberty: On Windows, pass a >32k cmdline through a response
 file.

pex-win32.c (win32_spawn): If the command line for CreateProcess
exceeds the 32k Windows limit, try to store it in a temporary
response file and call CreateProcess with @file instead (PR71850).

Signed-off-by: Costas Argyris 
---
 libiberty/pex-win32.c | 57 +--
 1 file changed, 44 insertions(+), 13 deletions(-)

diff --git a/libiberty/pex-win32.c b/libiberty/pex-win32.c
index 23c6c190a2c..0fd8b38734c 100644
--- a/libiberty/pex-win32.c
+++ b/libiberty/pex-win32.c
@@ -569,7 +569,8 @@ env_compare (const void *a_ptr, const void *b_ptr)
  * target is not actually an executable, such as if it is a shell script. */
 
 static pid_t
-win32_spawn (const char *executable,
+win32_spawn (struct pex_obj *obj,
+ const char *executable,
 	 BOOL search,
 	 char *const *argv,
  char *const *env, /* array of strings of the form: VAR=VALUE */
@@ -624,8 +625,37 @@ win32_spawn (const char *executable,
   cmdline = argv_to_cmdline (argv);
   if (!cmdline)
 goto exit;
-
-  /* Create the child process.  */  
+  /* If cmdline is too large, CreateProcess will fail with a bad
+ 'No such file or directory' error. Try passing it through a
+ temporary response file instead.  */
+  if (strlen (cmdline) > 32767)
+{
+  char *response_file = make_temp_file ("");
+  /* Register the file for deletion by pex_free.  */
+  ++obj->remove_count;
+  obj->remove = XRESIZEVEC (char *, obj->remove, obj->remove_count);
+  obj->remove[obj->remove_count - 1] = response_file;
+  int fd = pex_win32_open_write (obj, response_file, 0, 0);
+  if (fd == -1)
+goto exit;
+  FILE *f = pex_win32_fdopenw (obj, fd, 0);
+  /* Don't write argv[0] (program name) to the response file.  */
+  if (writeargv ([1], f))
+{
+  fclose (f);
+  goto exit;
+}
+  fclose (f); /* Also closes fd and the underlying OS handle.  */
+  char *response_arg = concat ("@", response_file, NULL);
+  char *response_argv[3] = {argv[0], response_arg, NULL};
+  free (cmdline);
+  cmdline = argv_to_cmdline (response_argv);
+  free (response_arg);
+  if (!cmdline)
+goto exit;
+}
+  
+  /* Create the child process.  */
   if (CreateProcess (full_executable, cmdline,
 		  /*lpProcessAttributes=*/NULL,
 		  /*lpThreadAttributes=*/NULL,
@@ -645,7 +675,7 @@ win32_spawn (const char *executable,
   free (env_block);
   free (cmdline);
   free (full_executable);
-
+  
   return pid;
 }
 
@@ -653,7 +683,8 @@ win32_spawn (const char *executable,
This function is called as a fallback if win32_spawn fails. */
 
 static pid_t
-spawn_script (const char *executable, char *const *argv,
+spawn_script (struct pex_obj *obj,
+  const char *executable, char *const *argv,
   char* const *env,
 	  DWORD dwCreationFlags,
 	  LPSTARTUPINFO si,
@@ -703,20 +734,20 @@ spawn_script (const char *executable, char *const *argv,
 	  executable = strrchr (executable1, '\\') + 1;
 	  if (!executable)
 		executable = executable1;
-	  pid = win32_spawn (executable, TRUE, argv, env,
+	  pid = win32_spawn (obj, executable, TRUE, argv, env,
  dwCreationFlags, si, pi);
 #else
 	  if (strchr (executable1, '\\') == NULL)
-		pid = win32_spawn (executable1, TRUE, argv, env,
+		pid = win32_spawn (obj, executable1, TRUE, argv, env,
    dwCreationFlags, si, pi);
 	  else if (executable1[0] != '\\')
-		pid = win32_spawn (executable1, FALSE, argv, env,
+		pid = win32_spawn (obj, executable1, FALSE, argv, env,
    dwCreationFlags, si, pi);
 	  else
 		{
 		  const char *newex = mingw_rootify (executable1);
 		  *avhere = newex;
-		  pid = win32_spawn (newex, FALSE, argv, env,
+		  pid = win32_spawn (obj, newex, FALSE, argv, env,
  dwCreationFlags, si, pi);
 		  if (executable1 != newex)
 		free ((char *) newex);
@@ -726,7 +757,7 @@ spawn_script (const char *executable, char *const *argv,
 		  if (newex != executable1)
 			{
 			  *avhere = newex;
-			  pid = win32_spawn (newex, FALSE, argv, env,
+			  pid = win32_spawn (obj, newex, FALSE, argv, env,
 	 dwCreationFlags, si, pi);
 			  free ((char *) newex);
 			}
@@ -745,7 +776,7 @@ spawn_script (const char *executable, char *const *argv,
 /* Execute a child.  */
 
 static pid_t
-pex_win32_exec_child (struct pex_obj *obj ATTRIBUTE_UNUSED, int 

Re: [PATCH 1/2] PR gcc/98350:Add a param to control the length of the chain with FMA in reassoc pass

2023-05-22 Thread Richard Biener via Gcc-patches
On Wed, May 17, 2023 at 3:05 PM Cui, Lili  wrote:
>
> > I think to make a difference you need to hit the number of parallel 
> > fadd/fmul
> > the pipeline can perform.  I don't think issue width is ever a problem for
> > chains w/o fma and throughput of fma vs fadd + fmul should be similar.
> >
>
> Yes, for x86 backend, fadd , fmul and fma have the same TP meaning they 
> should have the same width.
> The current implementation is reasonable  /* reassoc int, fp, vec_int, 
> vec_fp.  */.
>
> > That said, I think iff then we should try to improve
> > rewrite_expr_tree_parallel rather than adding a new function.  For example
> > for the case with equal rank operands we can try to sort adds first.  I 
> > can't
> > convince myself that rewrite_expr_tree_parallel honors ranks properly
> > quickly.
> >
>
> I rewrite this patch, there are mainly two changes:
> 1. I made some changes to rewrite_expr_tree_parallel_for_fma and used it 
> instead of rewrite_expr_tree_parallel. The following example shows that the 
> sequence generated by the this patch is better.
> 2. Put no-mult ops and mult ops alternately at the end of the queue, which is 
> conducive to generating more fma and reducing the loss of FMA when breaking 
> the chain.
>
> With these two changes, GCC can break the chain with width = 2 and generates 
> 6 FMAs for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98350  without any 
> params.
>
> --
> Source code: g + h + j + s + m + n+a+b +e  (https://godbolt.org/z/G8sb86n84)
> Compile options: -Ofast -mfpmath=sse -mfma
> Width = 3 was chosen for reassociation
> -
> Old rewrite_expr_tree_parallel generates:
>   _6 = g_8(D) + h_9(D);   --> parallel 0
>   _3 = s_11(D) + m_12(D);  --> parallel 1
>   _5 = _3 + j_10(D);
>   _2 = n_13(D) + a_14(D);   --> parallel 2
>   _1 = b_15(D) + e_16(D);  -> Parallel 3, This is not necessary, and it 
> is not friendly to FMA.
>   _4 = _1 + _2;
>   _7 = _4 + _5;
>   _17 = _6 + _7;
>   return _17;
>
> When the width = 3,  we need 5 cycles here.
> -first 
> end-
> Rewrite the old rewrite_expr_tree_parallel (3 sets in parallel) generates:
>
>   _3 = s_11(D) + m_12(D);  --> parallel 0
>   _5 = _3 + j_10(D);
>   _2 = n_13(D) + a_14(D);   --> parallel 1
>   _1 = b_15(D) + e_16(D);   --> parallel 2
>   _4 = _1 + _2;
>   _6 = _4 + _5;
>   _7 = _6 + h_9(D);
>   _17 = _7 + g_8(D);
>   return _17;
>
> When the width = 3, we need 5 cycles here.
> -second 
> end---
> Use rewrite_expr_tree_parallel_for_fma instead of rewrite_expr_tree_parallel 
> generates:
>
>   _3 = s_11(D) + m_12(D);
>   _6 = _3 + g_8(D);
>   _2 = n_13(D) + a_14(D);
>   _5 = _2 + h_9(D);
>   _1 = b_15(D) + e_16(D);
>   _4 = _1 + j_10(D);
>   _7 = _4 + _5;
>   _17 = _7 + _6;
>   return _17;
>
> When the width = 3, we need 4 cycles here.
> third 
> end---

Yes, so what I was saying is that I doubt rewrite_expr_tree_parallel
is optimal - you show
that for the specific example rewrite_expr_tree_parallel_for_fma is
better.  I was arguing
we want a single function, whether we single out leaves with
multiplications or not.

And we want documentation that shows the strategy will result in optimal latency
(I think we should not sacrifice latency just for the sake of forming
more FMAs).

Richard.

>
> Thanks,
> Lili.
>


Re: [PATCH] add glibc-stdint.h to vax and lm32 linux target (PR target/105525)

2023-05-22 Thread Maciej W. Rozycki
On Fri, 19 May 2023, Mikael Pettersson wrote:

> >  Hmm, I find it quite insteresting and indeed encouraging that someone
> > actually verifies our VAX/Linux target.
> >
> >  Mikael, how do you actually verify it however?
> 
> My vax builds are only cross-compilers without kernel headers or libc.

 Hmm, interesting, I wasn't aware you could actually build stage 1 GCC 
without target headers nowadays.

 When I tried it previously, it failed, and I had to come up with a hack 
to make glibc's `make install-headers' work, as ordinarily it requires a 
target compiler, making it a chicken-and-egg problem.

> The background is that I maintain a script to build GCC-based crosses to
> as many targets as I can, currently it supports 78 distinct processors and
> 82 triplets (four processors have multiple triplets). I only check that I can
> build the toolchains (full linux-gnu ones where possible).

 Great work, thanks!

  Maciej


Re: [PATCH 2/3] Refactor widen_plus as internal_fn

2023-05-22 Thread Richard Biener via Gcc-patches
On Thu, 18 May 2023, Andre Vieira (lists) wrote:

> How about this?
> 
> Not sure about the DEF_INTERNAL documentation I rewrote in internal-fn.def,
> was struggling to word these, so improvements welcome!

The even/odd variant optabs are also commutative_optab_p, so is
the vec_widen_sadd without hi/lo or even/odd.

+/* { dg-options "-O3 -save-temps -fdump-tree-vect-all" } */

do you really want -all?  I think you want -details

+  else if (widening_fn_p (ifn)
+  || narrowing_fn_p (ifn))
+   {
+ tree lhs = gimple_get_lhs (stmt);
+ if (!lhs)
+   {
+ error ("vector IFN call with no lhs");
+ debug_generic_stmt (fn);

that's an error because ...?  Maybe we want to verify this
for all ECF_CONST|ECF_NOTHROW (or pure instead of const) internal
function calls, but I wouldn't add any verification as part
of this patch (not special to widening/narrowing fns either).

if (gimple_call_internal_p (stmt))
- return 0;
+ {
+   internal_fn fn = gimple_call_internal_fn (stmt);
+   switch (fn)
+ {
+ case IFN_VEC_WIDEN_PLUS_HI:
+ case IFN_VEC_WIDEN_PLUS_LO:
+ case IFN_VEC_WIDEN_MINUS_HI:
+ case IFN_VEC_WIDEN_MINUS_LO:
+   return 1;

this now looks incomplete.  I think that we want instead to
have a default: returning 1 and then special-cases we want
to cost as zero.  Not sure which - maybe blame tells why
this was added?  I think we can deal with this as followup
(likewise the ranger additions).

Otherwise looks good to me.

Thanks,
Richard.

> gcc/ChangeLog:
> 
> 2023-04-25  Andre Vieira  
> Joel Hutton  
> Tamar Christina  
> 
> * config/aarch64/aarch64-simd.md (vec_widen_addl_lo_):
> Rename
> this ...
> (vec_widen_add_lo_): ... to this.
> (vec_widen_addl_hi_): Rename this ...
> (vec_widen_add_hi_): ... to this.
> (vec_widen_subl_lo_): Rename this ...
> (vec_widen_sub_lo_): ... to this.
> (vec_widen_subl_hi_): Rename this ...
> (vec_widen_sub_hi_): ...to this.
> * doc/generic.texi: Document new IFN codes.
>   * internal-fn.cc (ifn_cmp): Function to compare ifn's for
> sorting/searching.
>   (lookup_hilo_internal_fn): Add lookup function.
>   (commutative_binary_fn_p): Add widen_plus fn's.
>   (widening_fn_p): New function.
>   (narrowing_fn_p): New function.
>(direct_internal_fn_optab): Change visibility.
>   * internal-fn.def (DEF_INTERNAL_WIDENING_OPTAB_FN): Macro to define an
> internal_fn that expands into multiple internal_fns for widening.
> (DEF_INTERNAL_NARROWING_OPTAB_FN): Likewise but for narrowing.
> (IFN_VEC_WIDEN_PLUS, IFN_VEC_WIDEN_PLUS_HI, IFN_VEC_WIDEN_PLUS_LO,
>  IFN_VEC_WIDEN_PLUS_EVEN, IFN_VEC_WIDEN_PLUS_ODD,
>  IFN_VEC_WIDEN_MINUS, IFN_VEC_WIDEN_MINUS_HI, 
> IFN_VEC_WIDEN_MINUS_LO,
>  IFN_VEC_WIDEN_MINUS_ODD, IFN_VEC_WIDEN_MINUS_EVEN): Define widening
>plus,minus functions.
>   * internal-fn.h (direct_internal_fn_optab): Declare new prototype.
>   (lookup_hilo_internal_fn): Likewise.
>   (widening_fn_p): Likewise.
>   (Narrowing_fn_p): Likewise.
>   * optabs.cc (commutative_optab_p): Add widening plus optabs.
>   * optabs.def (OPTAB_D): Define widen add, sub optabs.
> * tree-cfg.cc (verify_gimple_call): Add checks for widening ifns.
> * tree-inline.cc (estimate_num_insns): Return same
> cost for widen add and sub IFNs as previous tree_codes.
>   * tree-vect-patterns.cc (vect_recog_widen_op_pattern): Support
> patterns with a hi/lo or even/odd split.
> (vect_recog_sad_pattern): Refactor to use new IFN codes.
> (vect_recog_widen_plus_pattern): Likewise.
> (vect_recog_widen_minus_pattern): Likewise.
> (vect_recog_average_pattern): Likewise.
>   * tree-vect-stmts.cc (vectorizable_conversion): Add support for
>_HILO IFNs.
>   (supportable_widening_operation): Likewise.
> * tree.def (WIDEN_SUM_EXPR): Update example to use new IFNs.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/aarch64/vect-widen-add.c: Test that new
> IFN_VEC_WIDEN_PLUS is being used.
>   * gcc.target/aarch64/vect-widen-sub.c: Test that new
> IFN_VEC_WIDEN_MINUS is being used.
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


Re: [PATCH v1] tree-ssa-sink: Improve code sinking pass.

2023-05-22 Thread Richard Biener via Gcc-patches
On Thu, May 18, 2023 at 9:14 AM Ajit Agarwal  wrote:
>
> Hello All:
>
> This patch improves code sinking pass to sink statements before call to reduce
> register pressure.
> Review comments are incorporated.
>
> Bootstrapped and regtested on powerpc64-linux-gnu.
>
> Thanks & Regards
> Ajit
>
>
> tree-ssa-sink: Improve code sinking pass.
>
> Code Sinking sinks the blocks after call. This increases
> register pressure for callee-saved registers. Improves
> code sinking before call in the use blocks or immediate
> dominator of use blocks.
>
> 2023-05-18  Ajit Kumar Agarwal  
>
> gcc/ChangeLog:
>
> * tree-ssa-sink.cc (statement_sink_location): Modifed to
> move statements before calls.
> (block_call_p): New function.
> (def_use_same_block): New function.
> (select_best_block): Add heuristics to select the best
> blocks in the immediate post dominator.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
> * gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c |  16 ++
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c |  20 +++
>  gcc/tree-ssa-sink.cc| 159 ++--
>  3 files changed, 185 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
> new file mode 100644
> index 000..716bc1f9257
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-sink -fdump-tree-optimized 
> -fdump-tree-sink-stats" } */
> +
> +void bar();
> +int j;
> +void foo(int a, int b, int c, int d, int e, int f)
> +{
> +  int l;
> +  l = a + b + c + d +e + f;
> +  if (a != 5)
> +{
> +  bar();
> +  j = l;
> +}
> +}
> +/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */

this doesn't verify the place we sink to?

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
> new file mode 100644
> index 000..ff41e2ea8ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-sink-stats -fdump-tree-sink-stats" } */
> +
> +void bar();
> +int j, x;
> +void foo(int a, int b, int c, int d, int e, int f)
> +{
> +  int l;
> +  l = a + b + c + d +e + f;
> +  if (a != 5)
> +{
> +  bar();
> +  if (b != 3)
> +x = 3;
> +  else
> +x = 5;
> +  j = l;
> +}
> +}
> +/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */

likewise.  So both tests already pass before the patch?

> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
> index 87b1d40c174..76556e7795b 100644
> --- a/gcc/tree-ssa-sink.cc
> +++ b/gcc/tree-ssa-sink.cc
> @@ -171,6 +171,72 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
> bool *debug_stmts)
>return commondom;
>  }
>
> +/* Return TRUE if immediate uses of the defs in
> +   USE occur in the same block as USE, FALSE otherwise.  */
> +
> +bool
> +def_use_same_block (gimple *stmt)
> +{
> +  use_operand_p use_p;
> +  def_operand_p def_p;
> +  imm_use_iterator imm_iter;
> +  ssa_op_iter iter;
> +
> +  FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, iter, SSA_OP_DEF)
> +{
> +  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
> +   {
> + if (is_gimple_debug (USE_STMT (use_p)))
> +   continue;
> +
> + if (use_p

use_p is never null

> + && (gimple_bb (USE_STMT (use_p)) == gimple_bb (stmt)))
> +   return true;

the function behavior is obviously odd ...

> +   }
> + }
> +  return false;
> +}
> +
> +/* Return TRUE if the block has only calls, FALSE otherwise. */
> +
> +bool
> +block_call_p (basic_block bb)
> +{
> +  int i = 0;
> +  bool is_call = false;
> +  gimple_stmt_iterator gsi = gsi_last_bb (bb);
> +  gimple *last_stmt = gsi_stmt (gsi);
> +
> +  if (last_stmt && gimple_code (last_stmt) == GIMPLE_COND)
> +{
> +  if (!gsi_end_p (gsi))
> +   gsi_prev ();
> +
> +   for (; !gsi_end_p (gsi);)
> +{
> +  gimple *stmt = gsi_stmt (gsi);
> +
> +  /* We have already seen a call.  */
> +  if (is_call)
> +return false;

Likewise.  Do you want to check whether a block has
a single stmt and that is a call and that is followed by
a condition?  It looks like a very convoluted way to write this.

> +
> +  if (is_gimple_call (stmt))
> +is_call = true;
> +  else
> +return false;
> +
> +  if (!gsi_end_p (gsi))
> +gsi_prev ();
> +
> +   ++i;
> +   }
> + }
> +  if (is_call && i == 1)
> +return true;
> +
> +  return 

[PATCH] libgomp: Fix build for -fshort-enums

2023-05-22 Thread Sebastian Huber
Make sure that the API enums have at least the size of int.  Otherwise the
following build error may occur:

In file included from gcc/libgomp/env.c:34:
./libgomp_f.h: In function 'omp_check_defines':
./libgomp_f.h:77:8: error: size of array 'test' is negative
   77 |   char test[(28 != sizeof (omp_lock_t)
  |^~~~

libgomp/ChangeLog:

* omp.h.in (omp_alloctrait_key_t):  Add __omp_alloctrait_key_t_max__
with a value of the int type maximum.
---
 libgomp/omp.h.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index bd1286c2a3f..3b1612fcb15 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -146,7 +146,8 @@ typedef enum omp_alloctrait_key_t
   omp_atk_fallback = 5,
   omp_atk_fb_data = 6,
   omp_atk_pinned = 7,
-  omp_atk_partition = 8
+  omp_atk_partition = 8,
+  __omp_alloctrait_key_t_max__ = __INT_MAX__
 } omp_alloctrait_key_t;
 
 typedef enum omp_alloctrait_value_t
-- 
2.35.3



Re: Re: [PATCH] RISC-V: Add RVV comparison autovectorization

2023-05-22 Thread juzhe.zh...@rivai.ai
I will first send refactor patch soon. Then second send comparison patch.
The refactor patch will be applicable for all future use, and they should come
first since I have implemented the all RVV auto-vectorization patterns and I 
know
what we will need in the future use.

Thanks.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-05-22 20:26
To: juzhe.zh...@rivai.ai; gcc-patches
CC: rdapp.gcc; Kito.cheng; palmer; jeffreyalaw; richard.sandiford
Subject: Re: [PATCH] RISC-V: Add RVV comparison autovectorization
> I do refactoring since we are going to have many different
> auto-vectorization patterns, for example: cond_addetc.
> 
> I should make the current framework suitable for all of them to
> simplify the future work.
 
That's good in general but can't it wait until the respective
changes go in?  I don't know how much you intend to change but
it will be easier to review as well if we don't change parts now
that might be used differently in the future. On top, we won't
get everything right with the first shot anyway.
 
Regards
Robin
 


Re: [PATCH] RISC-V: Add RVV comparison autovectorization

2023-05-22 Thread Robin Dapp via Gcc-patches
> I do refactoring since we are going to have many different
> auto-vectorization patterns, for example: cond_addetc.
> 
> I should make the current framework suitable for all of them to
> simplify the future work.

That's good in general but can't it wait until the respective
changes go in?  I don't know how much you intend to change but
it will be easier to review as well if we don't change parts now
that might be used differently in the future. On top, we won't
get everything right with the first shot anyway.

Regards
 Robin


Re: Re: [PATCH] RISC-V: Add RVV comparison autovectorization

2023-05-22 Thread juzhe.zh...@rivai.ai
Yes, I am working on it, but I noticed that the current framework is really 
ugly and bad.
I am gonna refactor it before I send comparison support.

I do refactoring since we are going to have many different auto-vectorization 
patterns,
for example: cond_addetc.

I should make the current framework suitable for all of them to simplify the 
future work.

Thanks. 


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-05-22 20:14
To: juzhe.zh...@rivai.ai; gcc-patches
CC: rdapp.gcc; Kito.cheng; palmer; jeffreyalaw; richard.sandiford
Subject: Re: [PATCH] RISC-V: Add RVV comparison autovectorization
> Thanks Robin. Address comment.
 
Did you intend to send an update here already or are you working
on it?  Just wondering because you just sent another refactoring
patch.
 
Regards
Robin
 


Re: [PATCH] Fix handling of non-integral bit-fields in native_encode_initializer

2023-05-22 Thread Richard Biener via Gcc-patches
On Mon, May 22, 2023 at 10:10 AM Eric Botcazou via Gcc-patches
 wrote:
>
> Hi,
>
> the encoder for CONSTRUCTORs assumes that all bit-fields (DECL_BIT_FIELD) have
> integral types, but that's not the case in Ada where they may have pretty much
> any type, resulting in a wrong encoding for them.
>
> The attached fix filters out non-integral bit-fields, except if they start and
> end on a byte boundary because they are correctly handled in this case.
>
> Bootstrapped/regtested on x86-64/Linux, OK for mainline and 13 branch?

OK.

Can we handle non-integer bitfields by recursing with a temporary buffer to
encode it byte-aligned and then apply shifting and masking to get it in place?
Or is that not worth it?

Thanks,
Richard.

>
>
> 2023-05-22  Eric Botcazou  
>
> * fold-const.cc (native_encode_initializer) : Apply the
> specific treatment for bit-fields only if they have an integral type
> and filter out non-integral bit-fields that do not start and end on
> a byte boundary.
>
>
> 2023-05-22  Eric Botcazou  
>
> * gnat.dg/opt101.adb: New test.
> * gnat.dg/opt101_pkg.ads: New helper.
>
> --
> Eric Botcazou


Re: [PATCH] RISC-V: Add RVV comparison autovectorization

2023-05-22 Thread Robin Dapp via Gcc-patches
> Thanks Robin. Address comment.

Did you intend to send an update here already or are you working
on it?  Just wondering because you just sent another refactoring
patch.

Regards
 Robin


[PATCH] RISC-V: Add "m_" prefix for private member

2023-05-22 Thread juzhe . zhong
From: Juzhe-Zhong 

Since the current framework is hard to maintain and
hard to be used in the future possible auto-vectorization patterns.

We will need to keep adding more helpers and arguments during the 
auto-vectorization supporting. We should refactor the framework
now for the future use since the we don't support too much auto-vectorization
patterns for now.

Start with this simple patch, this patch is adding "m_" prefix for private the 
members.

gcc/ChangeLog:

* config/riscv/riscv-v.cc: Add "m_" prefix.

---
 gcc/config/riscv/riscv-v.cc | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index d65e7300303..e0b19bc1754 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -66,7 +66,7 @@ const_vlmax_p (machine_mode mode)
 template  class insn_expander
 {
 public:
-  insn_expander () : m_opno (0), has_dest(false) {}
+  insn_expander () : m_opno (0), m_has_dest_p(false) {}
   void add_output_operand (rtx x, machine_mode mode)
   {
 create_output_operand (_ops[m_opno++], x, mode);
@@ -99,41 +99,41 @@ public:
 
   void set_dest_and_mask (rtx mask, rtx dest, machine_mode mask_mode)
   {
-dest_mode = GET_MODE (dest);
-has_dest = true;
+m_dest_mode = GET_MODE (dest);
+m_has_dest_p = true;
 
-add_output_operand (dest, dest_mode);
+add_output_operand (dest, m_dest_mode);
 
 if (mask)
   add_input_operand (mask, GET_MODE (mask));
 else
   add_all_one_mask_operand (mask_mode);
 
-add_vundef_operand (dest_mode);
+add_vundef_operand (m_dest_mode);
   }
 
   void set_len_and_policy (rtx len, bool force_vlmax = false)
 {
   bool vlmax_p = force_vlmax || !len;
-  gcc_assert (has_dest);
+  gcc_assert (m_has_dest_p);
 
-  if (vlmax_p && const_vlmax_p (dest_mode))
+  if (vlmax_p && const_vlmax_p (m_dest_mode))
{
  /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of the
 vsetvli to obtain the value of vlmax.  */
- poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
+ poly_uint64 nunits = GET_MODE_NUNITS (m_dest_mode);
  len = gen_int_mode (nunits, Pmode);
  vlmax_p = false; /* It has became NONVLMAX now.  */
}
   else if (!len)
{
  len = gen_reg_rtx (Pmode);
- emit_vlmax_vsetvl (dest_mode, len);
+ emit_vlmax_vsetvl (m_dest_mode, len);
}
 
   add_input_operand (len, Pmode);
 
-  if (GET_MODE_CLASS (dest_mode) != MODE_VECTOR_BOOL)
+  if (GET_MODE_CLASS (m_dest_mode) != MODE_VECTOR_BOOL)
add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy 
());
 
   add_avl_type_operand (vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
@@ -152,8 +152,8 @@ public:
 
 private:
   int m_opno;
-  bool has_dest;
-  machine_mode dest_mode;
+  bool m_has_dest_p;
+  machine_mode m_dest_mode;
   expand_operand m_ops[MAX_OPERANDS];
 };
 
-- 
2.36.3



Re: [PATCH] avr: Set param_min_pagesize to 0 [PR105523]

2023-05-22 Thread Richard Biener via Gcc-patches
On Fri, May 19, 2023 at 7:58 AM  wrote:
>
> On 26/04/23, 5:51 PM, "Richard Biener"  > wrote:
> > On Wed, Apr 26, 2023 at 12:56 PM  > > wrote:
> > >
> > > On Wed, Apr 26, 2023 at 3:15 PM Richard Biener via Gcc-patches 
> > > mailto:gcc-patches@gcc.gnu.org>> wrote:
> > > >
> > > > On Wed, Apr 26, 2023 at 11:42 AM Richard Biener
> > > > mailto:richard.guent...@gmail.com>> wrote:
> > > > >
> > > > > On Wed, Apr 26, 2023 at 11:01 AM SenthilKumar.Selvaraj--- via
> > > > > Gcc-patches  > > > > > wrote:
> > > > > >
> > > > > > Hi,
> > > > > >
> > > > > > This patch fixes PR 105523 by setting param_min_pagesize to 0 for 
> > > > > > the
> > > > > > avr target. For this target, zero and offsets from zero are 
> > > > > > perfectly
> > > > > > valid addresses, and the default value of param_min_pagesize ends up
> > > > > > triggering warnings on valid memory accesses.
> > > > >
> > > > > I think the proper configuration is to have
> > > > > DEFAULT_ADDR_SPACE_ZERO_ADDRESS_VALID
> > > >
> > > > Err, TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
> > >
> > > That worked. Ok for trunk and backporting to 13 and 12 branches
> > > (pending regression testing)?
> >
> >
> > OK, but please let Denis time to comment.
>
> Didn't hear from Denis. When running regression tests with this patch,
> I found that some tests with -fdelete-null-pointer-checks were
> failing. Commit 19416210b37db0584cd0b3f3b3961324b8973d25 made
> -fdelete-null-pointer-checks false by default, while still allowing it
> to be overridden from the command line (it was previously
> unconditionally false).
>
> To keep the same behavior, I modified the hook to report zero
> addresses as valid only if -fdelete-null-pointer-checks is not set.
> With this change, all regression tests pass.
>
> Ok for trunk and backporting to 13 and 12 branches?

I think that's bit backwards - this hook conveys more precise information
(it's address-space specific) and it is also more specific.  Instead I'd
suggest to set the flag to zero in the target like nios2 or msp430 do.
In fact we should probably initialize it using this hook (and using the
default address space).

Richard.

> Regards
> Senthil
>
> PR 105523
>
> gcc/ChangeLog:
>
> * config/avr/avr.cc (avr_addr_space_zero_address_valid):
> (TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID): Return true if
> flag_delete_null_pointer_checks is not set.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/avr/pr105523.c: New test.
>
>
> diff --git gcc/config/avr/avr.cc gcc/config/avr/avr.cc
> index d5af40f..4c9eb84 100644
> --- gcc/config/avr/avr.cc
> +++ gcc/config/avr/avr.cc
> @@ -9787,6 +9787,18 @@ avr_addr_space_diagnose_usage (addr_space_t as, 
> location_t loc)
>(void) avr_addr_space_supported_p (as, loc);
>  }
>
> +/* Implement `TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID. Zero is a valid
> +   address in all address spaces. Even in ADDR_SPACE_FLASH1 etc..,
> +   a zero address is valid and means 0x, where RAMPZ is
> +   set to the appropriate segment value.
> +   If the user explicitly passes in -fdelete-null-pointer-checks though,
> +   assume zero addresses are invalid.*/
> +
> +static bool
> +avr_addr_space_zero_address_valid (addr_space_t as ATTRIBUTE_UNUSED)
> +{
> +  return flag_delete_null_pointer_checks == 0;
> +}
>
>  /* Look if DECL shall be placed in program memory space by
> means of attribute `progmem' or some address-space qualifier.
> @@ -14687,6 +14699,9 @@ avr_float_lib_compare_returns_bool (machine_mode 
> mode, enum rtx_code)
>  #undef  TARGET_ADDR_SPACE_DIAGNOSE_USAGE
>  #define TARGET_ADDR_SPACE_DIAGNOSE_USAGE avr_addr_space_diagnose_usage
>
> +#undef  TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
> +#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID 
> avr_addr_space_zero_address_valid
> +
>  #undef  TARGET_MODE_DEPENDENT_ADDRESS_P
>  #define TARGET_MODE_DEPENDENT_ADDRESS_P avr_mode_dependent_address_p
>
> diff --git gcc/testsuite/gcc.target/avr/pr105523.c 
> gcc/testsuite/gcc.target/avr/pr105523.c
> new file mode 100644
> index 000..fbbf7bf
> --- /dev/null
> +++ gcc/testsuite/gcc.target/avr/pr105523.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os -Wall" } */
> +
> +/* Verify no "array subscript 0 is outside array bounds of" is generated
> +   for accessing memory addresses in the 0-4096 range. */
> +
> +typedef __UINT8_TYPE__ uint8_t;
> +
> +#define SREG (*(volatile uint8_t*) (0x3F + __AVR_SFR_OFFSET__ ))
> +
> +void bar (void)
> +{
> +SREG = 0;
> +}
>


Re: [PATCH 1/2] Improve do_store_flag for single bit comparison against 0

2023-05-22 Thread Richard Biener via Gcc-patches
On Fri, May 19, 2023 at 4:15 AM Andrew Pinski via Gcc-patches
 wrote:
>
> While working something else, I noticed we could improve
> the following function code generation:
> ```
> unsigned f(unsigned t)
> {
>   if (t & ~(1<<30)) __builtin_unreachable();
>   return t != 0;
> }
> ```
> Right know we just emit a comparison against 0 instead
> of just a shift right by 30.
> There is code in do_store_flag which already optimizes
> `(t & 1<<30) != 0` to `(t >> 30) & 1`. This patch
> extends it to handle the case where we know t has a
> nonzero of just one bit set.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> gcc/ChangeLog:
>
> * expr.cc (do_store_flag): Extend the one bit checking case
> to handle the case where we don't have an and but rather still
> one bit is known to be non-zero.
> ---
>  gcc/expr.cc | 27 +--
>  1 file changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index 5ede094e705..91528e734e7 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -13083,15 +13083,30 @@ do_store_flag (sepops ops, rtx target, machine_mode 
> mode)
>&& integer_zerop (arg1)
>&& (TYPE_PRECISION (ops->type) != 1 || TYPE_UNSIGNED (ops->type)))
>  {
> -  gimple *srcstmt = get_def_for_expr (arg0, BIT_AND_EXPR);
> -  if (srcstmt
> - && integer_pow2p (gimple_assign_rhs2 (srcstmt)))
> +  wide_int nz = tree_nonzero_bits (arg0);
> +
> +  if (wi::popcount (nz) == 1)
> {
> + tree op0;
> + tree op1;
> + gimple *srcstmt = get_def_for_expr (arg0, BIT_AND_EXPR);
> + /* If the defining statement was (x & POW2), then remove the and
> +as we are going to add it back. */
> + if (srcstmt
> + && integer_pow2p (gimple_assign_rhs2 (srcstmt)))
> +   {
> + op0 = gimple_assign_rhs1 (srcstmt);
> + op1 = gimple_assign_rhs2 (srcstmt);
> +   }
> + else
> +   {
> + op0 = arg0;
> + op1 = wide_int_to_tree (TREE_TYPE (op0), nz);
> +   }
>   enum tree_code tcode = code == NE ? NE_EXPR : EQ_EXPR;
>   type = lang_hooks.types.type_for_mode (mode, unsignedp);
> - tree temp = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (arg1),
> -  gimple_assign_rhs1 (srcstmt),
> -  gimple_assign_rhs2 (srcstmt));
> + tree temp = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (op0),
> +  op0, op1);
>   temp = fold_single_bit_test (loc, tcode, temp, arg1, type);
>   if (temp)
> return expand_expr (temp, target, VOIDmode, EXPAND_NORMAL);

I wonder if, instead of expanding expand with these kind of tricks we
want to instead
add to ISEL and use direct optab IFNs for things we matched?  In
particular I think
we do want to get rid of TER but the above adds another use of get_def_for_expr.

As Jeff says the above doesn't look like it includes costing so that would be an
argument to make it a generic match.pd transform (it appears to be "simpler")?

Richard.

> --
> 2.31.1
>


RE: [PATCH] RISC-V: Fix typo of multiple_rgroup-2.h

2023-05-22 Thread Li, Pan2 via Gcc-patches
Committed, thanks Kito and Juzhe and sorry for inconvenient.

Pan

-Original Message-
From: Kito Cheng  
Sent: Monday, May 22, 2023 6:05 PM
To: juzhe.zh...@rivai.ai
Cc: gcc-patches@gcc.gnu.org; kito.ch...@sifive.com; pal...@dabbelt.com; 
pal...@rivosinc.com; jeffreya...@gmail.com; rdapp@gmail.com; Li, Pan2 

Subject: Re: [PATCH] RISC-V: Fix typo of multiple_rgroup-2.h

ok

On Mon, May 22, 2023 at 6:02 PM  wrote:
>
> From: Juzhe-Zhong 
>
> Just notice this following fail in the regression:
> FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.c (test for 
> excess errors)
> FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c (test for 
> excess errors)
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h: Fix typo
>
> ---
>  .../gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h| 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git 
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
> index 7b12c656779..045a76de45f 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
> @@ -487,7 +487,7 @@
> __builtin_abort ();   
>  \
>  }
>
> -#defitree-vect-loop.ccne run_10(TYPE1, TYPE2, TYPE3) 
> \
> +#define run_10(TYPE1, TYPE2, TYPE3)  
>\
>int n_10_##TYPE1_##TYPE2_##TYPE3 = 777;
>\
>TYPE1 x_10_##TYPE1 = 222;  
>\
>TYPE1 x2_10_##TYPE1 = 111; 
>\
> --
> 2.36.3
>


Re: Re: [PATCH V11] VECT: Add decrement IV support in Loop Vectorizer

2023-05-22 Thread juzhe.zh...@rivai.ai

>> Not sure if you've covered this already in another thread but IIRC
>> RVV uses "with-len" not only for loads and stores but for arithmetic
>> instructions as well which is where (3) fails.  Fortunately RVV uses
>> element counts(?)

Yes, RVV uses element count. But I did discover we have bugs for some 
arithmetic operations.
For example, Division, we definitely need len_div (...) like cond_div in ARM 
SVE.
But this is another story. I have support full features of RVV in my downstream 
GCC and works
well for a year (I think fix all potential issue for RVV). 
So you could image I will post more middle-end patches for RVV 
auto-vectorization in the future.

Thanks. 


juzhe.zh...@rivai.ai
 
From: Richard Biener
Date: 2023-05-22 18:12
To: Richard Sandiford; juzhe.zh...@rivai.ai; gcc-patches; rguenther
Subject: Re: [PATCH V11] VECT: Add decrement IV support in Loop Vectorizer
On Fri, May 19, 2023 at 12:59 PM Richard Sandiford via Gcc-patches
 wrote:
>
> "juzhe.zh...@rivai.ai"  writes:
> >>> I don't think this is a property of decrementing IVs.  IIUC it's really
> >>> a property of rgl->factor == 1 && factor == 1, where factor would need
> >>> to be passed in by the caller.  Because of that, it should probably be
> >>> a separate patch.
> > Is it right that I just post this part code as a seperate patch then merge 
> > it?
>
> No, not in its current form.  Like I say, the test should be based on
> factors rather than TYPE_VECTOR_SUBPARTS.  But a fix for this problem
> should come before the changes to IVs.
>
> >>> That is, current LOAD_LEN targets have two properties (IIRC):
> >>> (1) all vectors used in a given piece of vector code have the same byte 
> >>> size
> >>> (2) lengths are measured in bytes rather than elements
> >>> For all cases, including SVE, the number of controls needed for a scalar
> >>> statement is equal to the number of vectors needed for that scalar
> >>> statement.
> >>> Because of (1), on current LOADL_LEN targets, the number of controls
> >>> needed for a scalar statement is also proportional to the total number
> >>> of bytes occupied by the vectors generated for that scalar statement.
> >>> And because of (2), the total number of bytes is the only thing that
> >>> matters, so all users of a particular control can use the same control
> >>> value.
> >>> E.g. on current LOAD_LEN targets, 2xV16QI and 2xV8HI would use the same
> >>> control (with no adjustment).  2xV16QI means 32 elements, while 2xV8HI
> >>> means 16 elements.  V16QI's nscalars_per_iter would therefore be double
> >>> V8HI's, but V8HI's factor would be double V16QI's (2 vs 1), so things
> >>> even out.
> >>> The code structurally supports targets that count in elements rather
> >>> than bytes, so that factor==1 for all element types.  See the
> >>> "rgl->factor == 1 && factor == 1" case in:
> >  >>  if (rgl->max_nscalars_per_iter < nscalars_per_iter)  >>   {  >> /* 
> > For now, we only support cases in which all loads and stores fall back 
> > to VnQI or none do.  */
> >>>gcc_assert (!rgl->max_nscalars_per_iter>>  || 
> > (rgl->factor == 1 && factor == 1)
> > || (rgl->max_nscalars_per_iter * rgl->factor
> >>>   == nscalars_per_iter * factor));
> >  >>  rgl->max_nscalars_per_iter = nscalars_per_iter; >>  rgl->type 
> > = vectype; >>  rgl->factor = factor;  >>   }>> But it hasn't been 
> > tested, since no current target uses it.
> >>> I think the above part of the patch shows that the current "factor is
> >>> always 1" path is in fact broken, and the patch is a correctness fix on
> >>> targets that measure in elements rather than bytes.
> >>> So I think the above part of the patch should go in ahead of the IV 
> >>> changes.
> >>> But the test should be based on factor rather than TYPE_VECTOR_SUBPARTS.
> > Since the length control measured by bytes instead of bytes is not
> > appropriate for RVV.You mean I can't support RVV auto-vectorization in
> > upstream GCC middle-end and I can only support it in my downstream, is
> > that right?
>
> No.  I haven't said in this or previous reviews that something cannot be
> supported in upstream GCC.
>
> I'm saying that the code in theory supports counting in bytes *or*
> counting in elements.  But only the first one has actually been tested.
> And so, perhaps not surprisingly, the support for counting elements
> needs a fix.
>
> The fix in your patch looks like it's on the right lines, but it should be
> based on factor rather than TYPE_VECTOR_SUBPARTS.
>
> See get_len_load_store_mode for how this selection happens:
>
> (1) IFN_LOAD_LEN itself always counts in elements rather than bytes.
>
> (2) If a target has instructions that count in elements, it should
> define load_len patterns for all vector modes that it supports.
>
> (3) If a target has instructions that count in bytes, it should define
> load_len patterns only for byte modes.  The vectoriser will then
> use byte loads for all vector 

Re: [committed] Enable LRA on several ports

2023-05-22 Thread Richard Biener via Gcc-patches
On Fri, May 19, 2023 at 1:45 PM Maciej W. Rozycki  wrote:
>
> On Tue, 2 May 2023, Jeff Law via Gcc-patches wrote:
>
> > Well, I'd say that my plan would be to deprecate any target that is not
> > converted by the end of this development cycle.  So the change keeps cris 
> > from
> > falling into that bucket.
>
>  As I noted in the other thread it is highly unlikely I will make it with
> the VAX target in this release cycle, owing to the catastrophic breakage
> of the exception unwinder, recently discovered, which I consider higher
> priority as a show-stopper for important software such as current GDB.  I
> will appreciate your taking this into consideration.

You might end up with VAX working fine with reload for GCC 14 but
marked as deprecated.  You then have the full next cycle to GCC 15
to improve the code quality with LRA - note that reload is likely removed
early in the development cycle.

>  That written the VAX target does build its target libraries with `-mlra',
> but there are ICE regressions in the test suite and overall code produced
> is brown paperbag quality.  And removing `-mno-lra' before that has been
> sorted will make making LRA match old reload quality much tougher.

You can always compare to GCC 14 then or even work based off the
release branch.

Richard.

>   Maciej


Re: [PATCH V11] VECT: Add decrement IV support in Loop Vectorizer

2023-05-22 Thread Richard Biener via Gcc-patches
On Fri, May 19, 2023 at 12:59 PM Richard Sandiford via Gcc-patches
 wrote:
>
> "juzhe.zh...@rivai.ai"  writes:
> >>> I don't think this is a property of decrementing IVs.  IIUC it's really
> >>> a property of rgl->factor == 1 && factor == 1, where factor would need
> >>> to be passed in by the caller.  Because of that, it should probably be
> >>> a separate patch.
> > Is it right that I just post this part code as a seperate patch then merge 
> > it?
>
> No, not in its current form.  Like I say, the test should be based on
> factors rather than TYPE_VECTOR_SUBPARTS.  But a fix for this problem
> should come before the changes to IVs.
>
> >>> That is, current LOAD_LEN targets have two properties (IIRC):
> >>> (1) all vectors used in a given piece of vector code have the same byte 
> >>> size
> >>> (2) lengths are measured in bytes rather than elements
> >>> For all cases, including SVE, the number of controls needed for a scalar
> >>> statement is equal to the number of vectors needed for that scalar
> >>> statement.
> >>> Because of (1), on current LOADL_LEN targets, the number of controls
> >>> needed for a scalar statement is also proportional to the total number
> >>> of bytes occupied by the vectors generated for that scalar statement.
> >>> And because of (2), the total number of bytes is the only thing that
> >>> matters, so all users of a particular control can use the same control
> >>> value.
> >>> E.g. on current LOAD_LEN targets, 2xV16QI and 2xV8HI would use the same
> >>> control (with no adjustment).  2xV16QI means 32 elements, while 2xV8HI
> >>> means 16 elements.  V16QI's nscalars_per_iter would therefore be double
> >>> V8HI's, but V8HI's factor would be double V16QI's (2 vs 1), so things
> >>> even out.
> >>> The code structurally supports targets that count in elements rather
> >>> than bytes, so that factor==1 for all element types.  See the
> >>> "rgl->factor == 1 && factor == 1" case in:
> >  >>  if (rgl->max_nscalars_per_iter < nscalars_per_iter)  >>   {  >> /* 
> > For now, we only support cases in which all loads and stores fall back 
> > to VnQI or none do.  */
> >>>gcc_assert (!rgl->max_nscalars_per_iter>>  || 
> > (rgl->factor == 1 && factor == 1)
> > || (rgl->max_nscalars_per_iter * rgl->factor
> >>>   == nscalars_per_iter * factor));
> >  >>  rgl->max_nscalars_per_iter = nscalars_per_iter; >>  rgl->type 
> > = vectype; >>  rgl->factor = factor;  >>   }>> But it hasn't been 
> > tested, since no current target uses it.
> >>> I think the above part of the patch shows that the current "factor is
> >>> always 1" path is in fact broken, and the patch is a correctness fix on
> >>> targets that measure in elements rather than bytes.
> >>> So I think the above part of the patch should go in ahead of the IV 
> >>> changes.
> >>> But the test should be based on factor rather than TYPE_VECTOR_SUBPARTS.
> > Since the length control measured by bytes instead of bytes is not
> > appropriate for RVV.You mean I can't support RVV auto-vectorization in
> > upstream GCC middle-end and I can only support it in my downstream, is
> > that right?
>
> No.  I haven't said in this or previous reviews that something cannot be
> supported in upstream GCC.
>
> I'm saying that the code in theory supports counting in bytes *or*
> counting in elements.  But only the first one has actually been tested.
> And so, perhaps not surprisingly, the support for counting elements
> needs a fix.
>
> The fix in your patch looks like it's on the right lines, but it should be
> based on factor rather than TYPE_VECTOR_SUBPARTS.
>
> See get_len_load_store_mode for how this selection happens:
>
> (1) IFN_LOAD_LEN itself always counts in elements rather than bytes.
>
> (2) If a target has instructions that count in elements, it should
> define load_len patterns for all vector modes that it supports.
>
> (3) If a target has instructions that count in bytes, it should define
> load_len patterns only for byte modes.  The vectoriser will then
> use byte loads for all vector types (even things like V8HI).

Not sure if you've covered this already in another thread but IIRC
RVV uses "with-len" not only for loads and stores but for arithmetic
instructions as well which is where (3) fails.  Fortunately RVV uses
element counts(?)

> For (2), the loop controls will always have a factor of 1.
> For (3), the loop controls will have a factor equal to the element
> size in bytes.  See:
>
>   machine_mode vmode;
>   if (get_len_load_store_mode (vecmode, is_load).exists ())
> {
>   nvectors = group_memory_nvectors (group_size * vf, nunits);
>   vec_loop_lens *lens = _VINFO_LENS (loop_vinfo);
>   unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
>   vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
>   using_partial_vectors_p = true;
> }
>
> This part should work correctly for RVV and any 

Re: [PATCH] RISC-V: Fix typo of multiple_rgroup-2.h

2023-05-22 Thread Kito Cheng via Gcc-patches
ok

On Mon, May 22, 2023 at 6:02 PM  wrote:
>
> From: Juzhe-Zhong 
>
> Just notice this following fail in the regression:
> FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.c (test for 
> excess errors)
> FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c (test for 
> excess errors)
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h: Fix typo
>
> ---
>  .../gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h| 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git 
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
> index 7b12c656779..045a76de45f 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
> @@ -487,7 +487,7 @@
> __builtin_abort ();   
>  \
>  }
>
> -#defitree-vect-loop.ccne run_10(TYPE1, TYPE2, TYPE3) 
> \
> +#define run_10(TYPE1, TYPE2, TYPE3)  
>\
>int n_10_##TYPE1_##TYPE2_##TYPE3 = 777;
>\
>TYPE1 x_10_##TYPE1 = 222;  
>\
>TYPE1 x2_10_##TYPE1 = 111; 
>\
> --
> 2.36.3
>


Re: [PATCH] c-family: implement -ffp-contract=on

2023-05-22 Thread Richard Biener via Gcc-patches
On Thu, May 18, 2023 at 11:04 PM Alexander Monakov via Gcc-patches
 wrote:
>
> Implement -ffp-contract=on for C and C++ without changing default
> behavior (=off for -std=cNN, =fast for C++ and -std=gnuNN).

The documentation changes mention the defaults are changed for
standard modes, I suppose you want to remove that hunk.

> gcc/c-family/ChangeLog:
>
> * c-gimplify.cc (fma_supported_p): New helper.
> (c_gimplify_expr) [PLUS_EXPR, MINUS_EXPR]: Implement FMA
> contraction.
>
> gcc/ChangeLog:
>
> * common.opt (fp_contract_mode) [on]: Remove fallback.
> * config/sh/sh.md (*fmasf4): Correct flag_fp_contract_mode test.
> * doc/invoke.texi (-ffp-contract): Update.
> * trans-mem.cc (diagnose_tm_1): Skip internal function calls.
> ---
>  gcc/c-family/c-gimplify.cc | 78 ++
>  gcc/common.opt |  3 +-
>  gcc/config/sh/sh.md|  2 +-
>  gcc/doc/invoke.texi|  8 ++--
>  gcc/trans-mem.cc   |  3 ++
>  5 files changed, 88 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
> index ef5c7d919f..f7635d3b0c 100644
> --- a/gcc/c-family/c-gimplify.cc
> +++ b/gcc/c-family/c-gimplify.cc
> @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "c-ubsan.h"
>  #include "tree-nested.h"
>  #include "context.h"
> +#include "tree-pass.h"
> +#include "internal-fn.h"
>
>  /*  The gimplification pass converts the language-dependent trees
>  (ld-trees) emitted by the parser into language-independent trees
> @@ -686,6 +688,14 @@ c_build_bind_expr (location_t loc, tree block, tree body)
>return bind;
>  }
>
> +/* Helper for c_gimplify_expr: test if target supports fma-like FN.  */
> +
> +static bool
> +fma_supported_p (enum internal_fn fn, tree type)
> +{
> +  return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
> +}
> +
>  /* Gimplification of expression trees.  */
>
>  /* Do C-specific gimplification on *EXPR_P.  PRE_P and POST_P are as in
> @@ -739,6 +749,74 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p 
> ATTRIBUTE_UNUSED,
> break;
>}
>
> +case PLUS_EXPR:
> +case MINUS_EXPR:
> +  {
> +   tree type = TREE_TYPE (*expr_p);
> +   /* For -ffp-contract=on we need to attempt FMA contraction only
> +  during initial gimplification.  Late contraction across statement
> +  boundaries would violate language semantics.  */
> +   if (SCALAR_FLOAT_TYPE_P (type)
> +   && flag_fp_contract_mode == FP_CONTRACT_ON
> +   && cfun && !(cfun->curr_properties & PROP_gimple_any)
> +   && fma_supported_p (IFN_FMA, type))
> + {
> +   bool neg_mul = false, neg_add = code == MINUS_EXPR;
> +
> +   tree *op0_p = _OPERAND (*expr_p, 0);
> +   tree *op1_p = _OPERAND (*expr_p, 1);
> +
> +   /* Look for ±(x * y) ± z, swapping operands if necessary.  */
> +   if (TREE_CODE (*op0_p) == NEGATE_EXPR
> +   && TREE_CODE (TREE_OPERAND (*op0_p, 0)) == MULT_EXPR)
> + /* '*EXPR_P' is '-(x * y) ± z'.  This is fine.  */;
> +   else if (TREE_CODE (*op0_p) != MULT_EXPR)
> + {
> +   std::swap (op0_p, op1_p);
> +   std::swap (neg_mul, neg_add);
> + }
> +   if (TREE_CODE (*op0_p) == NEGATE_EXPR)
> + {
> +   op0_p = _OPERAND (*op0_p, 0);
> +   neg_mul = !neg_mul;
> + }
> +   if (TREE_CODE (*op0_p) != MULT_EXPR)
> + break;
> +   auto_vec ops (3);
> +   ops.quick_push (TREE_OPERAND (*op0_p, 0));
> +   ops.quick_push (TREE_OPERAND (*op0_p, 1));
> +   ops.quick_push (*op1_p);
> +
> +   enum internal_fn ifn = IFN_FMA;
> +   if (neg_mul)
> + {
> +   if (fma_supported_p (IFN_FNMA, type))
> + ifn = IFN_FNMA;
> +   else
> + ops[0] = build1 (NEGATE_EXPR, type, ops[0]);
> + }
> +   if (neg_add)
> + {
> +   enum internal_fn ifn2 = ifn == IFN_FMA ? IFN_FMS : IFN_FNMS;
> +   if (fma_supported_p (ifn2, type))
> + ifn = ifn2;
> +   else
> + ops[2] = build1 (NEGATE_EXPR, type, ops[2]);
> + }
> +   for (auto & : ops)
> + if (gimplify_expr (, pre_p, post_p, is_gimple_val, fb_rvalue)
> + == GS_ERROR)
> +   return GS_ERROR;
> +
> +   gcall *call = gimple_build_call_internal_vec (ifn, ops);
> +   gimple_seq_add_stmt_without_update (pre_p, call);
> +   *expr_p = create_tmp_var (type);
> +   gimple_call_set_lhs (call, *expr_p);

it would be possible to do

  *expr_p = build_call_expr_internal (ifn, type, ops[0], ops[1]. ops[2]);
  return GS_OK;

and not worry about temporary creation and gimplifying of 

[PATCH] RISC-V: Fix typo of multiple_rgroup-2.h

2023-05-22 Thread juzhe . zhong
From: Juzhe-Zhong 

Just notice this following fail in the regression:
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.c (test for excess 
errors)
FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c (test for 
excess errors)

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h: Fix typo

---
 .../gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
index 7b12c656779..045a76de45f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/multiple_rgroup-2.h
@@ -487,7 +487,7 @@
__builtin_abort ();\
 }
 
-#defitree-vect-loop.ccne run_10(TYPE1, TYPE2, TYPE3)   
  \
+#define run_10(TYPE1, TYPE2, TYPE3)
 \
   int n_10_##TYPE1_##TYPE2_##TYPE3 = 777;  
 \
   TYPE1 x_10_##TYPE1 = 222;
 \
   TYPE1 x2_10_##TYPE1 = 111;   
 \
-- 
2.36.3



Re: [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694]

2023-05-22 Thread Kewen.Lin via Gcc-patches
Hi Haochen,

on 2023/2/8 13:08, HAO CHEN GUI wrote:
> Hi,
>   The logical operations for TImode is split after reload pass right now. Some
> potential optimizations miss as the split is too late. This patch removes
> TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical
> operations can be split at expand pass. The new test case illustrates the
> optimization.
> 
>   Two test cases of pr92398 are merged into one as all sub-targets generates
> the same sequence of instructions with the patch.

IIUC, this can also help PR target/93123.  Add it to the PR marker too if so.

This patch aligns with what the other ports do, I think it's good, but note that
it can regress some case like:

```
vector unsigned __int128 test(unsigned __int128 *a, unsigned __int128 *b,
  unsigned __int128 *c, unsigned __int128 *d) {

  unsigned __int128 t1 = *a | *b;
  unsigned __int128 t2 = *c & *d;
  unsigned __int128 t3 = t1 ^ t2;

  return (vector unsigned __int128)t3;
}
```

w/o the proposed patch:

lxv 32,0(5)
lxv 0,0(6)
lxv 45,0(3)
lxv 33,0(4)
xxland 32,32,0
vor 2,1,13
vxor 2,2,0

vs.

w/ this patch:

ld 9,8(6)
ld 8,0(5)
ld 10,8(5)
ld 0,0(6)
ld 11,0(3)
ld 6,8(3)
ld 5,0(4)
ld 7,8(4)
and 8,8,0
and 10,10,9
or 9,5,11
xor 9,9,8
or 8,7,6
xor 8,8,10
mtvsrdd 34,8,9

It can get the optimal insn seq before, but fails to with the proposed patch.
Apparently we don't have some support to get back the operation in vector
when it's beneficial for now.

I guess the cases in PR100694 and PR93123 are dominated and the regressed
case is corner.  So we can probably install this patch first and open a bug
for further enhancement.

Segher, what do you think of this?

BR,
Kewen

> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> 
> Thanks
> Gui Haochen
> 
> 
> ChangeLog
> 2023-02-08  Haochen Gui 
> 
> gcc/
>   PR target/100694>   * config/rs6000/rs6000.md (BOOL_128_V): New 
> mode iterator for 128-bit
>   vector types.
>   (and3): Replace BOOL_128 with BOOL_128_V.
>   (ior3): Likewise.
>   (xor3): Likewise.
>   (one_cmpl2 expander): New expander with BOOL_128_V.
>   (one_cmpl2 insn_and_split): Rename to ...
>   (*one_cmpl2): ... this.
> 
> gcc/testsuite/
>   PR target/100694
>   * gcc.target/powerpc/pr100694.c: New.
>   * gcc.target/powerpc/pr92398.c: New.
>   * gcc.target/powerpc/pr92398.h: Remove.
>   * gcc.target/powerpc/pr92398.p9-.c: Remove.
>   * gcc.target/powerpc/pr92398.p9+.c: Remove.
> 
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 4bd1dfd3da9..455b7329643 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128   [TI
>(V2DF  "TARGET_ALTIVEC")
>(V1TI  "TARGET_ALTIVEC")])
> 
> +;; Mode iterator for logical operations on 128-bit vector types
> +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC")
> +  (V8HI  "TARGET_ALTIVEC")
> +  (V4SI  "TARGET_ALTIVEC")
> +  (V4SF  "TARGET_ALTIVEC")
> +  (V2DI  "TARGET_ALTIVEC")
> +  (V2DF  "TARGET_ALTIVEC")
> +  (V1TI  "TARGET_ALTIVEC")])
> +
>  ;; For the GPRs we use 3 constraints for register outputs, two that are the
>  ;; same as the output register, and a third where the output register is an
>  ;; early clobber, so we don't have to deal with register overlaps.  For the
> @@ -7135,23 +7144,23 @@ (define_expand "subti3"
>  ;; 128-bit logical operations expanders
> 
>  (define_expand "and3"
> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
> - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
> -   (match_operand:BOOL_128 2 "vlogical_operand")))]
> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
> + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
> + (match_operand:BOOL_128_V 2 "vlogical_operand")))]
>""
>"")
> 
>  (define_expand "ior3"
> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
> -(ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")
> -   (match_operand:BOOL_128 2 "vlogical_operand")))]
> +  [(set (match_operand:BOOL_128_V 0 "vlogical_operand")
> + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand")
> + (match_operand:BOOL_128_V 2 "vlogical_operand")))]
>""
>"")
> 
>  (define_expand "xor3"
> -  [(set (match_operand:BOOL_128 0 "vlogical_operand")
> -(xor:BOOL_128 

Re: [PATCH] RISC-V: Implement autovec abs, vneg, vnot.

2023-05-22 Thread Kito Cheng via Gcc-patches
> > So I expect you will also apply those refactor on Juzhe's new changes?
> > If so I would like to have a separated NFC refactor patch if possible.
>
> What's NFC? :)  Do you mean to just have the refactor part as a separate
> patch?  If yes, I agree.

NFC: non-functional-change, that's a term used in LLVM, I just forgot
that's kind of rare term used here,


Re: [PATCH] RISC-V: Add missing torture-init and torture-finish for rvv.exp

2023-05-22 Thread Kito Cheng via Gcc-patches
Ooops, seems still some issue around here, but I found something might
related this issue:

https://github.com/gcc-mirror/gcc/commit/d6654a4be3ba44c0d57be7c8a51d76d9721345e1
https://github.com/gcc-mirror/gcc/commit/23c49bb8d09bc3bfce9a08be637cf32ac014de56

On Mon, May 22, 2023 at 2:42 PM Kito Cheng  wrote:
>
> Hi Vineet:
>
> Could you help to test this patch, this could resolve that issue on our
> machine, but I would like to also work for other env.
>
> Thanks :)
>
> ---
>
> We got bunch of following error message for multi-lib run:
>
> ERROR: torture-init: torture_without_loops is not empty as expected
> ERROR: tcl error code NONE
>
> And seems we need torture-init and torture-finish around the test
> loop.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/rvv.exp: Add torture-init and
> torture-finish.
> ---
>  gcc/testsuite/gcc.target/riscv/rvv/rvv.exp | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp 
> b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> index bc99cc0c3cf4..19179564361a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> @@ -39,6 +39,7 @@ if [istarget riscv32-*-*] then {
>
>  # Initialize `dg'.
>  dg-init
> +torture-init
>
>  # Main loop.
>  set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3"
> @@ -69,5 +70,7 @@ foreach op $AUTOVEC_TEST_OPTS {
>  dg-runtest [lsort [glob -nocomplain 
> $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
> "-std=c99 -O3 -ftree-vectorize --param 
> riscv-autovec-preference=fixed-vlmax" $CFLAGS
>
> +torture-finish
> +
>  # All done.
>  dg-finish
> --
> 2.40.1
>


Re: Re: [PATCH] RISC-V: Implement autovec abs, vneg, vnot.

2023-05-22 Thread juzhe.zh...@rivai.ai
Yeah, I agree wit kito.
For example, I see you have rename "get_prefer_***" into "get_preferred_**"
I think this NFC patch should be  separated patch.

Thanks.


juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-05-22 17:05
To: Robin Dapp
CC: 钟居哲; gcc-patches; palmer; Michael Collison; Jeff Law
Subject: Re: [PATCH] RISC-V: Implement autovec abs, vneg, vnot.
So I expect you will also apply those refactor on Juzhe's new changes?
If so I would like to have a separated NFC refactor patch if possible.
 
e.g.
Juzhe's vec_cmp/vcond -> NFC refactor patch -> abs, vneg, vnot
 
On Mon, May 22, 2023 at 4:59 PM Robin Dapp  wrote:
>
> As discussed with Juzhe off-list, I will rebase this patch against
> Juzhe's vec_cmp/vcond patch once that hits the trunk.
>
> Regards
>  Robin
 


Re: [PATCH] RISC-V: Implement autovec abs, vneg, vnot.

2023-05-22 Thread Robin Dapp via Gcc-patches
> So I expect you will also apply those refactor on Juzhe's new changes?
> If so I would like to have a separated NFC refactor patch if possible.

What's NFC? :)  Do you mean to just have the refactor part as a separate
patch?  If yes, I agree.

> e.g.
> Juzhe's vec_cmp/vcond -> NFC refactor patch -> abs, vneg, vnot



[COMMITTED] ada: Reuse idiomatic procedure in CStand

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

This change replaces a call to Set_Name_Entity_Id with a call to
the higher-level Set_Current_Entity.

gcc/ada/

* cstand.adb: Use more idiomatic procedure.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/cstand.adb | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/ada/cstand.adb b/gcc/ada/cstand.adb
index 3646003b330..fbd5888b198 100644
--- a/gcc/ada/cstand.adb
+++ b/gcc/ada/cstand.adb
@@ -1642,8 +1642,7 @@ package body CStand is
 
   for E in Standard_Entity_Type loop
  if Ekind (Standard_Entity (E)) /= E_Operator then
-Set_Name_Entity_Id
-  (Chars (Standard_Entity (E)), Standard_Entity (E));
+Set_Current_Entity (Standard_Entity (E));
 Set_Homonym (Standard_Entity (E), Empty);
  end if;
 
-- 
2.40.0



Re: [PATCH] RISC-V: Implement autovec abs, vneg, vnot.

2023-05-22 Thread Kito Cheng via Gcc-patches
So I expect you will also apply those refactor on Juzhe's new changes?
If so I would like to have a separated NFC refactor patch if possible.

e.g.
Juzhe's vec_cmp/vcond -> NFC refactor patch -> abs, vneg, vnot

On Mon, May 22, 2023 at 4:59 PM Robin Dapp  wrote:
>
> As discussed with Juzhe off-list, I will rebase this patch against
> Juzhe's vec_cmp/vcond patch once that hits the trunk.
>
> Regards
>  Robin


[COMMITTED] ada: Incorrect constant folding in postcondition involving 'Old

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Justin Squirek 

The following patch fixes an issue in the compiler whereby certain flavors of
access comparisons may be incorrectly constant-folded out of contract
expressions - notably in postcondition expressions featuring a reference to
'Old.

gcc/ada/

* checks.adb (Install_Null_Excluding_Check): Avoid non-null
optimizations when assertions are enabled.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/checks.adb | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/checks.adb b/gcc/ada/checks.adb
index 9f3c679ed7e..0d472964ff5 100644
--- a/gcc/ada/checks.adb
+++ b/gcc/ada/checks.adb
@@ -8437,7 +8437,18 @@ package body Checks is
   Right_Opnd => Make_Null (Loc)),
   Reason => CE_Access_Check_Failed));
 
-  Mark_Non_Null;
+  --  Mark the entity of N "non-null" except when assertions are enabled -
+  --  since expansion becomes much more complicated (especially when it
+  --  comes to contracts) due to the generation of wrappers and wholesale
+  --  moving of declarations and statements which may happen.
+
+  --  Additionally, it is assumed that extra checks will exist with
+  --  assertions enabled so some potentially redundant checks are
+  --  acceptable.
+
+  if not Assertions_Enabled then
+ Mark_Non_Null;
+  end if;
end Install_Null_Excluding_Check;
 
-
-- 
2.40.0



Re: [PATCH] rs6000: Fix __builtin_vec_xst_trunc definition

2023-05-22 Thread Kewen.Lin via Gcc-patches
Hi Carl,

on 2023/5/11 02:06, Carl Love via Gcc-patches wrote:
> GCC maintainers:
> 
> The following patch fixes errors in the arguments in the
> __builtin_altivec_tr_stxvrhx, __builtin_altivec_tr_stxvrwx builtin
> definitions.  Note, these builtins are used by the overloaded
> __builtin_vec_xst_trunc builtin.
> 
> The patch adds a new overloaded builtin definition for
> __builtin_vec_xst_trunc for the third argument to be unsigned and
> signed long int.
> 
> A new testcase is added for the various overloaded versions of
> __builtin_vec_xst_trunc.
> 
> The patch has been tested on Power 10 with no new regressions.
> 
> Please let me know if the patch is acceptable for mainline.  Thanks.
> 
> Carl
> 
> ---
> rs6000: Fix __builtin_vec_xst_trunc definition
> 
> Built-in __builtin_vec_xst_trunc calls __builtin_altivec_tr_stxvrhx
> and __builtin_altivec_tr_stxvrwx to handle the short and word cases.  The
> arguments for these two builtins are wrong.  This patch fixes the wrong
> arguments for the builtins.
> 
> Additionally, the patch adds a new __builtin_vec_xst_trunc overloaded
> version for the destination being signed or unsigned long int.
> 
> A runnable test case is added to test each of the overloaded definitions
> of __builtin_vec_xst_tru
> 
> gcc/
>   * config/rs6000/builtins.def (__builtin_altivec_tr_stxvrhx,
>   __builtin_altivec_tr_stxvrwx): Fix type of second argument.
>   Add, definition for send argument to be signed long.
>   * config/rs6000/rs6000-overload.def (__builtin_vec_xst_trunc):
>   add definition with thrird arument signed and unsigned long.
>   * doc/extend.texi (__builtin_vec_xst_trunc): Add documentation for
>   new unsinged long and signed long versions.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/vsx-builtin-vec_xst_trunc.c: New test case
>   for __builtin_vec_xst_trunc builtin.
> ---
>  gcc/config/rs6000/rs6000-builtins.def |   7 +-
>  gcc/config/rs6000/rs6000-overload.def |   4 +
>  gcc/doc/extend.texi   |   2 +
>  .../powerpc/vsx-builtin-vec_xst_trunc.c   | 217 ++
>  4 files changed, 228 insertions(+), 2 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/vsx-builtin-vec_xst_trunc.c
> 
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index 638d0bc72ca..a378491b358 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -3161,12 +3161,15 @@
>void __builtin_altivec_tr_stxvrbx (vsq, signed long, signed char *);
>  TR_STXVRBX vsx_stxvrbx {stvec}
>  
> -  void __builtin_altivec_tr_stxvrhx (vsq, signed long, signed int *);
> +  void __builtin_altivec_tr_stxvrhx (vsq, signed long, signed short *);
>  TR_STXVRHX vsx_stxvrhx {stvec}
>  
> -  void __builtin_altivec_tr_stxvrwx (vsq, signed long, signed short *);
> +  void __builtin_altivec_tr_stxvrwx (vsq, signed long, signed int *);
>  TR_STXVRWX vsx_stxvrwx {stvec}

Good catching!

>  
> +  void __builtin_altivec_tr_stxvrlx (vsq, signed long, signed long *);
> +TR_STXVRLX vsx_stxvrdx {stvec}
> +

This is mapped to the one used for type long long, it's a hard mapping,
IMHO it's wrong and not consistent with what the users expect, since on Power
the size of type long int is 4 bytes at -m32 while 8 bytes at -m64, this
implementation binding to 8 bytes can cause trouble in 32-bit.  I wonder if
it's a good idea to add one overloaded version for type long int, for now
openxl also emits error message for long int type pointer (see its doc [1]),
users can use casting to make it to the acceptable pointer types (long long
or int as its size).

[1] 
https://www.ibm.com/docs/en/openxl-c-and-cpp-lop/17.1.1?topic=functions-vec-xst-trunc


>void __builtin_altivec_tr_stxvrdx (vsq, signed long, signed long long *);
>  TR_STXVRDX vsx_stxvrdx {stvec}
>  
> diff --git a/gcc/config/rs6000/rs6000-overload.def 
> b/gcc/config/rs6000/rs6000-overload.def
> index c582490c084..54b7ae5e51b 100644
> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -4872,6 +4872,10 @@
>  TR_STXVRWX  TR_STXVRWX_S
>void __builtin_vec_xst_trunc (vuq, signed long long, unsigned int *);
>  TR_STXVRWX  TR_STXVRWX_U
> +  void __builtin_vec_xst_trunc (vsq, signed long long, signed long *);
> +TR_STXVRLX  TR_STXVRLX_S
> +  void __builtin_vec_xst_trunc (vuq, signed long long, unsigned long *);
> +TR_STXVRLX  TR_STXVRLX_U
>void __builtin_vec_xst_trunc (vsq, signed long long, signed long long *);
>  TR_STXVRDX  TR_STXVRDX_S
>void __builtin_vec_xst_trunc (vuq, signed long long, unsigned long long *);
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index e426a2eb7d8..7e2ae790ab3 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -18570,10 +18570,12 @@ instructions.
>  @defbuiltin{{void} 

[COMMITTED] ada: Fix crash caused by incorrect expansion of iterated component

2023-05-22 Thread Marc Poulhiès via Gcc-patches
The way iterated component are expanded could lead to inconsistent tree.

This change fixes 2 issues:

- in an early step during Pre_Analyze, the loop variable still has
Any_Type and the compiler must not emit an error. A later full Analyze
is supposed to correctly set the Etype, and only then should the
compiler emit an error if Any_Type is still used.

- when expanding into a loop with assignments statement, the expression
is analyzed in an early context (where the loop variable still has
Any_Type Etype) and then copied. The compiler would crash because this
Any_Type is never changed because the expression node has its Analyzed
flag set. Resetting the flag ensures the later Analyze call also
analyzes these nodes and set Etype correctly.

gcc/ada/

* exp_aggr.adb (Process_Transient_Component): Reset Analyzed flag
for the copy of the initialization expression.
* sem_attr.adb (Validate_Non_Static_Attribute_Function_Call): Skip
error emission during Pre_Analyze.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 11 ++-
 gcc/ada/sem_attr.adb |  4 +++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index 40dd1c4d41b..f3ad8a9e1ae 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -9840,6 +9840,7 @@ package body Exp_Aggr is
   Res_Decl: Node_Id;
   Res_Id  : Entity_Id;
   Res_Typ : Entity_Id;
+  Copy_Init_Expr : constant Node_Id := New_Copy_Tree (Init_Expr);
 
--  Start of processing for Process_Transient_Component
 
@@ -9890,7 +9891,15 @@ package body Exp_Aggr is
   Constant_Present=> True,
   Object_Definition   => New_Occurrence_Of (Res_Typ, Loc),
   Expression  =>
-Make_Reference (Loc, New_Copy_Tree (Init_Expr)));
+Make_Reference (Loc, Copy_Init_Expr));
+
+  --  In some cases, like iterated component, the Init_Expr may have been
+  --  analyzed in a context where all the Etype fields are not correct yet
+  --  and a later call to Analyze is expected to set them.
+  --  Resetting the Analyzed flag ensures this later call doesn't skip this
+  --  node.
+
+  Reset_Analyzed_Flags (Copy_Init_Expr);
 
   Add_Item (Res_Decl);
 
diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
index a07e91b839d..bc4e3cf019e 100644
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -3319,7 +3319,9 @@ package body Sem_Attr is
 
 --  Check for missing/bad expression (result of previous error)
 
-if No (E1) or else Etype (E1) = Any_Type then
+if No (E1)
+  or else (Etype (E1) = Any_Type and then Full_Analysis)
+then
Check_Error_Detected;
raise Bad_Attribute;
 end if;
-- 
2.40.0



Re: Re: [PATCH] RISC-V: Add RVV comparison autovectorization

2023-05-22 Thread juzhe.zh...@rivai.ai
Thanks Robin. Address comment.



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-05-22 16:07
To: juzhe.zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; palmer; jeffreyalaw; Richard Sandiford
Subject: Re: [PATCH] RISC-V: Add RVV comparison autovectorization
Hi Juzhe,
 
thanks.  Some remarks inline.
 
> +;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
> +;; depends on the comparison; leave it to riscv_vector::expand_vcond instead.
> +(define_expand "vcond"
> +  [(set (match_operand:V 0 "register_operand")
> + (if_then_else:V
> +   (match_operator 3 "comparison_operator"
> + [(match_operand:VI 4 "register_operand")
> +  (match_operand:VI 5 "nonmemory_operand")])
> +   (match_operand:V 1 "nonmemory_operand")
> +   (match_operand:V 2 "nonmemory_operand")))]
> +  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (mode),
> +  GET_MODE_NUNITS (mode))"
> +  {
> +riscv_vector::expand_vcond (mode, operands);
> +DONE;
> +  }
> +)
> +
> +;; Integer vcondu.  Don't enforce an immediate range here, since it
> +;; depends on the comparison; leave it to riscv_vector::expand_vcond instead.
> +(define_expand "vcondu"
> +  [(set (match_operand:V 0 "register_operand")
> + (if_then_else:V
> +   (match_operator 3 "comparison_operator"
> + [(match_operand:VI 4 "register_operand")
> +  (match_operand:VI 5 "nonmemory_operand")])
> +   (match_operand:V 1 "nonmemory_operand")
> +   (match_operand:V 2 "nonmemory_operand")))]
> +  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (mode),
> +  GET_MODE_NUNITS (mode))"
> +  {
> +riscv_vector::expand_vcond (mode, operands);
> +DONE;
> +  }
> +)
 
These do exactly the same (as do their aarch64 heirs).  As you are a friend
of iterators usually I guess you didn't use one for clarity here?  Also, I
didn't see that we do much of immediate-range enforcement in expand_vcond.
 
> +
> +;; Floating-point vcond.  Don't enforce an immediate range here, since it
> +;; depends on the comparison; leave it to riscv_vector::expand_vcond instead.
> +(define_expand "vcond"
> +  [(set (match_operand:V 0 "register_operand")
> + (if_then_else:V
> +   (match_operator 3 "comparison_operator"
> + [(match_operand:VF 4 "register_operand")
> +  (match_operand:VF 5 "nonmemory_operand")])
> +   (match_operand:V 1 "nonmemory_operand")
> +   (match_operand:V 2 "nonmemory_operand")))]
> +  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (mode),
> +  GET_MODE_NUNITS (mode))"
> +  {
> +riscv_vector::expand_vcond (mode, operands);
> +DONE;
> +  }
> +)
 
It comes a bit as a surprise to add float comparisons before any other
float autovec patterns are in.  I'm not against it but would wait for
other comments here.  If the tests are source from aarch64 they have
been reviewed often enough that we can be fairly sure to do the right
thing though.  I haven't checked the expander and inversion things
closely now though.
 
> +
> +;; -
> +;;  [INT,FP] Select based on masks
> +;; -
> +;; Includes merging patterns for:
> +;; - vmerge.vv
> +;; - vmerge.vx
> +;; - vfmerge.vf
> +;; -
> +
> +(define_expand "vcond_mask_"
> +  [(match_operand:V 0 "register_operand")
> +   (match_operand: 3 "register_operand")
> +   (match_operand:V 1 "nonmemory_operand")
> +   (match_operand:V 2 "register_operand")]
> +  "TARGET_VECTOR"
> +  {
> +riscv_vector::emit_merge_op (operands[0], operands[2],
> +operands[1], operands[3]);
> +DONE;
> +  }
> +)
 
Order of operands is a bit surprising, see below.
 
> +  void add_fixed_operand (rtx x)
> +  {
> +create_fixed_operand (_ops[m_opno++], x);
> +gcc_assert (m_opno <= MAX_OPERANDS);
> +  }
> +  void add_integer_operand (rtx x)
> +  {
> +create_integer_operand (_ops[m_opno++], INTVAL (x));
> +gcc_assert (m_opno <= MAX_OPERANDS);
> +  }
>void add_all_one_mask_operand (machine_mode mode)
>{
>  add_input_operand (CONSTM1_RTX (mode), mode);
> @@ -85,11 +95,14 @@ public:
>{
>  add_input_operand (RVV_VUNDEF (mode), mode);
>}
> -  void add_policy_operand (enum tail_policy vta, enum mask_policy vma)
> +  void add_policy_operand (enum tail_policy vta)
>{
>  rtx tail_policy_rtx = gen_int_mode (vta, Pmode);
> -rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
>  add_input_operand (tail_policy_rtx, Pmode);
> +  }
> +  void add_policy_operand (enum mask_policy vma)
> +  {
> +rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
>  add_input_operand (mask_policy_rtx, Pmode);
>}
>void add_avl_type_operand (avl_type type)
> @@ -97,7 +110,8 @@ public:
>  add_input_operand (gen_int_mode (type, Pmode), Pmode);
>}
 
My idea would be to have the policy operands hidden a bit more as
in my last patch.  It comes down to a matter of taste.  We can discuss
once this is in and I 

[COMMITTED] ada: Rename Is_Past_Self_Hiding_Point flag to be Is_Not_Self_Hidden

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Bob Duff 

...which seems clearer.

Still work in progress.

gcc/ada/

* cstand.adb (Is_Past_Self_Hiding_Point): Rename to be
Is_Not_Self_Hidden.
* einfo.ads: Likewise.
* exp_aggr.adb: Likewise.
* gen_il-fields.ads: Likewise.
* gen_il-gen-gen_entities.adb: Likewise.
* sem.adb: Likewise.
* sem_aggr.adb: Likewise.
* sem_ch11.adb: Likewise.
* sem_ch12.adb: Likewise.
* sem_ch5.adb: Likewise.
* sem_ch6.adb: Likewise.
* sem_ch7.adb: Likewise.
* sem_prag.adb: Likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/cstand.adb  | 4 ++--
 gcc/ada/einfo.ads   | 4 ++--
 gcc/ada/exp_aggr.adb| 2 +-
 gcc/ada/gen_il-fields.ads   | 2 +-
 gcc/ada/gen_il-gen-gen_entities.adb | 2 +-
 gcc/ada/sem.adb | 6 +++---
 gcc/ada/sem_aggr.adb| 6 +++---
 gcc/ada/sem_ch11.adb| 2 +-
 gcc/ada/sem_ch12.adb| 8 
 gcc/ada/sem_ch5.adb | 8 
 gcc/ada/sem_ch6.adb | 4 ++--
 gcc/ada/sem_ch7.adb | 4 ++--
 gcc/ada/sem_prag.adb| 2 +-
 13 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/gcc/ada/cstand.adb b/gcc/ada/cstand.adb
index f53015d1e0c..3646003b330 100644
--- a/gcc/ada/cstand.adb
+++ b/gcc/ada/cstand.adb
@@ -1784,7 +1784,7 @@ package body CStand is
 
   Set_Is_Immediately_Visible  (Ident_Node, True);
   Set_Is_Intrinsic_Subprogram (Ident_Node, True);
-  Set_Is_Past_Self_Hiding_Point (Ident_Node);
+  Set_Is_Not_Self_Hidden (Ident_Node);
 
   Set_Name_Entity_Id (Op, Ident_Node);
   Append_Entity (Ident_Node, Standard_Standard);
@@ -1810,7 +1810,7 @@ package body CStand is
   --  frozen and not self-hidden as soon as they are created.
 
   Set_Is_Frozen (E);
-  Set_Is_Past_Self_Hiding_Point (E);
+  Set_Is_Not_Self_Hidden (E);
 
   --  Set debug information required for all standard types
 
diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads
index c67731c1298..0cc4b495bd9 100644
--- a/gcc/ada/einfo.ads
+++ b/gcc/ada/einfo.ads
@@ -3104,7 +3104,7 @@ package Einfo is
 --   procedure which verifies the invariants of the partial view of a
 --   private type or private extension.
 
---Is_Past_Self_Hiding_Point
+--Is_Not_Self_Hidden
 --   Defined in all entities. Roughly speaking, this is False if the
 --   declaration of the entity is hidden from all visibility because
 --   we are within its declaration, as defined by 8.3(16-18). When
@@ -4957,7 +4957,7 @@ package Einfo is
--Is_Obsolescent
--Is_Package_Body_Entity
--Is_Packed_Array_Impl_Type
-   --Is_Past_Self_Hiding_Point
+   --Is_Not_Self_Hidden
--Is_Potentially_Use_Visible
--Is_Preelaborated
--Is_Primitive_Wrapper
diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index e2f0ccdb34a..40dd1c4d41b 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -2057,7 +2057,7 @@ package body Exp_Aggr is
 Set_Etype (L_J, Any_Type);
 
 Mutate_Ekind (L_J, E_Variable);
-Set_Is_Past_Self_Hiding_Point (L_J);
+Set_Is_Not_Self_Hidden (L_J);
 Set_Scope (L_J, Ent);
  else
 L_J := Make_Temporary (Loc, 'J', L);
diff --git a/gcc/ada/gen_il-fields.ads b/gcc/ada/gen_il-fields.ads
index 19ebf6744d0..fd89fac869d 100644
--- a/gcc/ada/gen_il-fields.ads
+++ b/gcc/ada/gen_il-fields.ads
@@ -752,7 +752,7 @@ package Gen_IL.Fields is
   Is_Package_Body_Entity,
   Is_Packed,
   Is_Packed_Array_Impl_Type,
-  Is_Past_Self_Hiding_Point,
+  Is_Not_Self_Hidden,
   Is_Param_Block_Component_Type,
   Is_Partial_Invariant_Procedure,
   Is_Potentially_Use_Visible,
diff --git a/gcc/ada/gen_il-gen-gen_entities.adb 
b/gcc/ada/gen_il-gen-gen_entities.adb
index 6356de0ee2e..d531e4a8efa 100644
--- a/gcc/ada/gen_il-gen-gen_entities.adb
+++ b/gcc/ada/gen_il-gen-gen_entities.adb
@@ -177,7 +177,7 @@ begin -- Gen_IL.Gen.Gen_Entities
 Sm (Is_Package_Body_Entity, Flag),
 Sm (Is_Packed, Flag, Impl_Base_Type_Only),
 Sm (Is_Packed_Array_Impl_Type, Flag),
-Sm (Is_Past_Self_Hiding_Point, Flag),
+Sm (Is_Not_Self_Hidden, Flag),
 Sm (Is_Potentially_Use_Visible, Flag),
 Sm (Is_Preelaborated, Flag),
 Sm (Is_Private_Descendant, Flag),
diff --git a/gcc/ada/sem.adb b/gcc/ada/sem.adb
index b0b492b0099..3bff8d26a0d 100644
--- a/gcc/ada/sem.adb
+++ b/gcc/ada/sem.adb
@@ -760,7 +760,7 @@ package body Sem is
 
   Debug_A_Exit ("analyzing  ", N, "  (done)");
 
-  --  Set Is_Past_Self_Hiding_Point flag. RM-8.3(16) says a declaration
+  --  Set Is_Not_Self_Hidden flag. RM-8.3(16) says a declaration
   --  is no longer hidden from all visibility after "the end of the
   --  declaration", so we set the 

[COMMITTED] ada: Small cleanup in support for protected subprograms

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This moves the propagation of the Uses_Sec_Stack flag, from the original to
the rewritten subprogram, to the point where the latter is expanded, along
with the propagation of the Has_Nested_Subprogram flag, as well as addresses
a ??? comment in the same block of code.  No functional changes.

gcc/ada/

* inline.adb (Cleanup_Scopes): Do not propagate the Uses_Sec_Stack
flag from original to rewritten protected subprograms here...
* exp_ch9.adb (Expand_N_Protected_Body) :
...but here instead. Add local variables and remove a useless
test.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch9.adb | 97 +++--
 gcc/ada/inline.adb  | 11 -
 2 files changed, 49 insertions(+), 59 deletions(-)

diff --git a/gcc/ada/exp_ch9.adb b/gcc/ada/exp_ch9.adb
index 50b9d072d84..b51c60ea506 100644
--- a/gcc/ada/exp_ch9.adb
+++ b/gcc/ada/exp_ch9.adb
@@ -8393,9 +8393,11 @@ package body Exp_Ch9 is
   Current_Node : Node_Id;
   Disp_Op_Body : Node_Id;
   New_Op_Body  : Node_Id;
+  New_Op_Spec  : Node_Id;
   Op_Body  : Node_Id;
   Op_Decl  : Node_Id;
   Op_Id: Entity_Id;
+  Op_Spec  : Entity_Id;
 
   function Build_Dispatching_Subprogram_Body
 (N: Node_Id;
@@ -8512,11 +8514,12 @@ package body Exp_Ch9 is
null;
 
 when N_Subprogram_Body =>
+   Op_Spec := Corresponding_Spec (Op_Body);
 
--  Do not create bodies for eliminated operations
 
if not Is_Eliminated (Defining_Entity (Op_Body))
- and then not Is_Eliminated (Corresponding_Spec (Op_Body))
+ and then not Is_Eliminated (Op_Spec)
then
   if Lock_Free_Active then
  New_Op_Body :=
@@ -8531,7 +8534,9 @@ package body Exp_Ch9 is
   Current_Node := New_Op_Body;
   Analyze (New_Op_Body);
 
-  --  When the original protected body has nested subprograms,
+  New_Op_Spec := Corresponding_Spec (New_Op_Body);
+
+  --  When the original subprogram body has nested subprograms,
   --  the new body also has them, so set the flag accordingly
   --  and reset the scopes of the top-level nested subprograms
   --  and other declaration entities so that they now refer to
@@ -8541,58 +8546,54 @@ package body Exp_Ch9 is
   --  subprogram entity isn't available via Corresponding_Spec
   --  until after the above Analyze call.)
 
-  if Has_Nested_Subprogram (Corresponding_Spec (Op_Body)) then
- Set_Has_Nested_Subprogram
-   (Corresponding_Spec (New_Op_Body));
-
- Reset_Scopes_To
-   (New_Op_Body, Corresponding_Spec (New_Op_Body));
+  if Has_Nested_Subprogram (Op_Spec) then
+ Set_Has_Nested_Subprogram (New_Op_Spec);
+ Reset_Scopes_To (New_Op_Body, New_Op_Spec);
   end if;
 
+  --  Similarly, when the original subprogram body uses the
+  --  secondary stack, the new body also does. This is needed
+  --  when the cleanup actions of the subprogram are delayed
+  --  because it contains a package instance with a body.
+
+  Set_Uses_Sec_Stack (New_Op_Spec, Uses_Sec_Stack (Op_Spec));
+
   --  Build the corresponding protected operation. This is
   --  needed only if this is a public or private operation of
   --  the type.
 
-  --  Why do we need to test for Corresponding_Spec being
-  --  present here when it's assumed to be set further above
-  --  in the Is_Eliminated test???
-
-  if Present (Corresponding_Spec (Op_Body)) then
- Op_Decl :=
-   Unit_Declaration_Node (Corresponding_Spec (Op_Body));
-
- if Nkind (Parent (Op_Decl)) = N_Protected_Definition then
-if Lock_Free_Active then
-   New_Op_Body :=
- Build_Lock_Free_Protected_Subprogram_Body
-   (Op_Body, Pid, Specification (New_Op_Body));
-else
-   New_Op_Body :=
- Build_Protected_Subprogram_Body (
-   Op_Body, Pid, Specification (New_Op_Body));
-end if;
-
-Insert_After (Current_Node, New_Op_Body);
-Analyze (New_Op_Body);
-Current_Node := New_Op_Body;
-
---  Generate an overriding primitive operation body for
- 

[COMMITTED] ada: Use idiomatic construct in Expand_N_Package_Body

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

gcc/ada/

* exp_ch7.adb (Expand_N_Package_Body): Call Defining_Entity to get
the entity of the body.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch7.adb | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb
index 9ec03b7e4cd..db2644fb287 100644
--- a/gcc/ada/exp_ch7.adb
+++ b/gcc/ada/exp_ch7.adb
@@ -5262,16 +5262,7 @@ package body Exp_Ch7 is
 Fin_Id  => Fin_Id);
 
  if Present (Fin_Id) then
-declare
-   Body_Ent : Node_Id := Defining_Unit_Name (N);
-
-begin
-   if Nkind (Body_Ent) = N_Defining_Program_Unit_Name then
-  Body_Ent := Defining_Identifier (Body_Ent);
-   end if;
-
-   Set_Finalizer (Body_Ent, Fin_Id);
-end;
+Set_Finalizer (Defining_Entity (N), Fin_Id);
  end if;
   end if;
end Expand_N_Package_Body;
-- 
2.40.0



[COMMITTED] ada: Further fixes to GNATprove and CodePeer expression pretty-printer

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

The expression pretty-printer still crashes on several tests, but
already gives much better outputs for many previously unsupported
constructs.

gcc/ada/

* pprint.adb (Expression_Image): Handle several previously unsupported
constructs.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/pprint.adb | 326 +++--
 1 file changed, 198 insertions(+), 128 deletions(-)

diff --git a/gcc/ada/pprint.adb b/gcc/ada/pprint.adb
index 8fdb5d6916e..1b97630179b 100644
--- a/gcc/ada/pprint.adb
+++ b/gcc/ada/pprint.adb
@@ -27,6 +27,7 @@ with Atree;  use Atree;
 with Einfo;  use Einfo;
 with Einfo.Entities; use Einfo.Entities;
 with Einfo.Utils;use Einfo.Utils;
+with Errout; use Errout;
 with Namet;  use Namet;
 with Nlists; use Nlists;
 with Opt;use Opt;
@@ -63,8 +64,11 @@ package body Pprint is
   --  Expand_Type is True and Expr is a type, try to expand Expr (an
   --  internally generated type) into a user understandable name.
 
-  Max_List : constant := 3;
-  --  Limit number of list elements to dump
+  Max_List_Depth : constant := 3;
+  --  Limit number of nested lists to print
+
+  Max_List_Length : constant := 3;
+  --  Limit number of list elements to print
 
   Max_Expr_Elements : constant := 24;
   --  Limit number of elements in an expression for use by Expr_Name
@@ -72,94 +76,82 @@ package body Pprint is
   Num_Elements : Natural := 0;
   --  Current number of elements processed by Expr_Name
 
-  function List_Name
-(List  : Node_Id;
- Add_Space : Boolean := True;
- Add_Paren : Boolean := True) return String;
+  function List_Name (List : List_Id) return String;
   --  Return a string corresponding to List
 
   ---
   -- List_Name --
   ---
 
-  function List_Name
-(List  : Node_Id;
- Add_Space : Boolean := True;
- Add_Paren : Boolean := True) return String
-  is
- function Internal_List_Name
-   (List  : Node_Id;
-First : Boolean := True;
-Add_Space : Boolean := True;
-Add_Paren : Boolean := True;
-Num   : Natural := 1) return String;
- --  Created for purposes of recursing on embedded lists
-
- 
- -- Internal_List_Name --
- 
-
- function Internal_List_Name
-   (List  : Node_Id;
-First : Boolean := True;
-Add_Space : Boolean := True;
-Add_Paren : Boolean := True;
-Num   : Natural := 1) return String
- is
- begin
-if No (List) then
-   if First or else not Add_Paren then
-  return "";
-   else
-  return ")";
-   end if;
-elsif Num > Max_List then
-   if Add_Paren then
-  return ", ...)";
-   else
-  return ", ...";
-   end if;
-end if;
+  function List_Name (List : List_Id) return String is
+ Buf  : Bounded_String;
+ Elmt : Node_Id;
 
---  Continue recursing on the list - handling the first element
---  in a special way.
-
-return
-  (if First then
-  (if Add_Space and Add_Paren then " ("
-   elsif Add_Paren then "("
-   elsif Add_Space then " "
-   else "")
-   else ", ")
-   & Expr_Name (List)
-   & Internal_List_Name
-   (List  => Next (List),
-First => False,
-Add_Paren => Add_Paren,
-Num   => Num + 1);
- end Internal_List_Name;
-
-  --  Start of processing for List_Name
+ Printed_Elmts : Natural := 0;
 
   begin
- --  Prevent infinite recursion by limiting depth to 3
+ --  Give up if the printed list is too deep
 
- if List_Name_Count > 3 then
+ if List_Name_Count > Max_List_Depth then
 return "...";
  end if;
 
  List_Name_Count := List_Name_Count + 1;
 
- declare
-Result : constant String :=
-   Internal_List_Name
- (List  => List,
-  Add_Space => Add_Space,
-  Add_Paren => Add_Paren);
- begin
-List_Name_Count := List_Name_Count - 1;
-return Result;
- end;
+ Elmt := First (List);
+ while Present (Elmt) loop
+
+--  Print component_association as "x | y | z => 12345"
+
+if Nkind (Elmt) = N_Component_Association then
+   declare
+  

[COMMITTED] ada: Avoid repeated calls when looking for first/last slocs of a node

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

gcc/ada/

* errout.adb (First_Loc): Avoid repeated calls.
(Last_Loc): Likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/errout.adb | 34 ++
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
index 49281fdb05f..a82aff5266b 100644
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -1845,11 +1845,12 @@ package body Errout is

 
function First_Sloc (N : Node_Id) return Source_Ptr is
-  SI : constant Source_File_Index := Source_Index (Get_Source_Unit (N));
-  SF : constant Source_Ptr:= Source_First (SI);
-  SL : constant Source_Ptr:= Source_Last (SI);
-  F  : Node_Id;
-  S  : Source_Ptr;
+  SI  : constant Source_File_Index := Source_Index (Get_Source_Unit (N));
+  SF  : constant Source_Ptr:= Source_First (SI);
+  SL  : constant Source_Ptr:= Source_Last (SI);
+  Src : constant Source_Buffer_Ptr := Source_Text (SI);
+  F   : Node_Id;
+  S   : Source_Ptr;
 
begin
   F := First_Node (N);
@@ -1876,11 +1877,11 @@ package body Errout is
 Search_Loop : for K in 1 .. 12 loop
exit Search_Loop when S = SF;
 
-   if Source_Text (SI) (S - 1) = '(' then
+   if Src (S - 1) = '(' then
   S := S - 1;
   exit Search_Loop;
 
-   elsif Source_Text (SI) (S - 1) <= ' ' then
+   elsif Src (S - 1) <= ' ' then
   S := S - 1;
 
else
@@ -1963,11 +1964,12 @@ package body Errout is
---
 
function Last_Sloc (N : Node_Id) return Source_Ptr is
-  SI : constant Source_File_Index := Source_Index (Get_Source_Unit (N));
-  SF : constant Source_Ptr:= Source_First (SI);
-  SL : constant Source_Ptr:= Source_Last (SI);
-  F  : Node_Id;
-  S  : Source_Ptr;
+  SI  : constant Source_File_Index := Source_Index (Get_Source_Unit (N));
+  SF  : constant Source_Ptr:= Source_First (SI);
+  SL  : constant Source_Ptr:= Source_Last (SI);
+  Src : constant Source_Buffer_Ptr := Source_Text (SI);
+  F   : Node_Id;
+  S   : Source_Ptr;
 
begin
   F := Last_Node (N);
@@ -1980,7 +1982,7 @@ package body Errout is
   --  Skip past an identifier
 
   while S in SF .. SL - 1
-and then Source_Text (SI) (S + 1)
+and then Src (S + 1)
   in
 '0' .. '9' | 'a' .. 'z' | 'A' .. 'Z' | '.' | '_'
   loop
@@ -2000,11 +2002,11 @@ package body Errout is
 Search_Loop : for K in 1 .. 12 loop
exit Node_Loop when S = SL;
 
-   if Source_Text (SI) (S + 1) = ')' then
+   if Src (S + 1) = ')' then
   S := S + 1;
   exit Search_Loop;
 
-   elsif Source_Text (SI) (S + 1) <= ' ' then
+   elsif Src (S + 1) <= ' ' then
   S := S + 1;
 
else
@@ -2021,7 +2023,7 @@ package body Errout is
   --  Remove any trailing space
 
   while S in SF + 1 .. SL
-and then Source_Text (SI) (S) = ' '
+and then Src (S) = ' '
   loop
  S := S - 1;
   end loop;
-- 
2.40.0



[COMMITTED] ada: Fix missing finalization in separate package body

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This directly comes from a loophole in the implementation.

gcc/ada/

* exp_ch7.adb (Process_Package_Body): New procedure taken from...
(Build_Finalizer.Process_Declarations): ...here.  Call the above
procedure to deal with both package bodies and package body stubs.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch7.adb | 59 -
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb
index a02e28e4b34..9ec03b7e4cd 100644
--- a/gcc/ada/exp_ch7.adb
+++ b/gcc/ada/exp_ch7.adb
@@ -2138,6 +2138,9 @@ package body Exp_Ch7 is
  --  This variable is used to determine whether a nested package or
  --  instance contains at least one controlled object.
 
+ procedure Process_Package_Body (Decl : Node_Id);
+ --  Process an N_Package_Body node
+
  procedure Processing_Actions
(Has_No_Init  : Boolean := False;
 Is_Protected : Boolean := False);
@@ -2149,6 +2152,35 @@ package body Exp_Ch7 is
  --  Is_Protected should be set when the current declaration denotes a
  --  simple protected object.
 
+ --
+ -- Process_Package_Body --
+ --
+
+ procedure Process_Package_Body (Decl : Node_Id) is
+ begin
+--  Do not inspect an ignored Ghost package body because all
+--  code found within will not appear in the final tree.
+
+if Is_Ignored_Ghost_Entity (Defining_Entity (Decl)) then
+   null;
+
+elsif Ekind (Corresponding_Spec (Decl)) /= E_Generic_Package then
+   Old_Counter_Val := Counter_Val;
+   Process_Declarations (Declarations (Decl), Preprocess);
+
+   --  The nested package body is the last construct to contain
+   --  a controlled object.
+
+   if Preprocess
+ and then Top_Level
+ and then No (Last_Top_Level_Ctrl_Construct)
+ and then Counter_Val > Old_Counter_Val
+   then
+  Last_Top_Level_Ctrl_Construct := Decl;
+   end if;
+end if;
+ end Process_Package_Body;
+
  
  -- Processing_Actions --
  
@@ -2536,29 +2568,12 @@ package body Exp_Ch7 is
 --  Nested package bodies, avoid generics
 
 elsif Nkind (Decl) = N_Package_Body then
+   Process_Package_Body (Decl);
 
-   --  Do not inspect an ignored Ghost package body because all
-   --  code found within will not appear in the final tree.
-
-   if Is_Ignored_Ghost_Entity (Defining_Entity (Decl)) then
-  null;
-
-   elsif Ekind (Corresponding_Spec (Decl)) /= E_Generic_Package
-   then
-  Old_Counter_Val := Counter_Val;
-  Process_Declarations (Declarations (Decl), Preprocess);
-
-  --  The nested package body is the last construct to contain
-  --  a controlled object.
-
-  if Preprocess
-and then Top_Level
-and then No (Last_Top_Level_Ctrl_Construct)
-and then Counter_Val > Old_Counter_Val
-  then
- Last_Top_Level_Ctrl_Construct := Decl;
-  end if;
-   end if;
+elsif Nkind (Decl) = N_Package_Body_Stub
+  and then Present (Library_Unit (Decl))
+then
+   Process_Package_Body (Proper_Body (Unit (Library_Unit (Decl;
 
 --  Handle a rare case caused by a controlled transient object
 --  created as part of a record init proc. The variable is wrapped
-- 
2.40.0



[COMMITTED] ada: Cleanup redundant condition in resolution of entity names

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Code cleanup related to new contract for SPARK; semantics is unaffected.

gcc/ada/

* sem_res.adb (Resolve_Entity_Name): Combine two IF statements that
execute code only for references that come from source.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_res.adb | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
index 3eb13de38df..365c75041a9 100644
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -8022,7 +8022,7 @@ package body Sem_Res is
 
   if Comes_From_Source (N) then
 
- --  The following checks are only relevant when SPARK_Mode is on as
+ --  The following checks are only relevant when SPARK_Mode is On as
  --  they are not standard Ada legality rules.
 
  if SPARK_Mode = On then
@@ -8067,13 +8067,11 @@ package body Sem_Res is
  if Is_Ghost_Entity (E) then
 Check_Ghost_Context (E, N);
  end if;
-  end if;
 
-  --  We may be resolving an entity within expanded code, so a reference to
-  --  an entity should be ignored when calculating effective use clauses to
-  --  avoid inappropriate marking.
+ --  We may be resolving an entity within expanded code, so a reference
+ --  to an entity should be ignored when calculating effective use
+ --  clauses to avoid inappropriate marking.
 
-  if Comes_From_Source (N) then
  Mark_Use_Clauses (E);
   end if;
end Resolve_Entity_Name;
-- 
2.40.0



[COMMITTED] ada: Add missing word in comment

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

gcc/ada/

* par-ch3.adb: Add missing word in comment.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/par-ch3.adb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ada/par-ch3.adb b/gcc/ada/par-ch3.adb
index 7126afbfbeb..a71056b20a0 100644
--- a/gcc/ada/par-ch3.adb
+++ b/gcc/ada/par-ch3.adb
@@ -1466,7 +1466,7 @@ package body Ch3 is
  Save_Scan_State (Scan_State); -- at colon
  T_Colon;
 
-  --  If we have identifier followed by := then we assume that what is
+  --  If we have an identifier followed by := then we assume that what is
   --  really meant is an assignment statement. The assignment statement
   --  is scanned out and added to the list of declarations. An exception
   --  occurs if the := is followed by the keyword constant, in which case
-- 
2.40.0



Re: [PATCH] RISC-V: Implement autovec abs, vneg, vnot.

2023-05-22 Thread Robin Dapp via Gcc-patches
As discussed with Juzhe off-list, I will rebase this patch against
Juzhe's vec_cmp/vcond patch once that hits the trunk.

Regards
 Robin


[COMMITTED] ada: Add Is_Past_Self_Hiding_Point flag

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Bob Duff 

This patch adds a flag Is_Past_Self_Hiding_Point. When False,
this will replace E_Void as the indicator for a premature use of
a declaration within itself -- for example, "X : T := X;".

One might think this flag should be called something like
Is_Hidden_From_All_Visibility, reversing the sense of
Is_Past_Self_Hiding_Point. We don't do that because we want
Is_Past_Self_Hiding_Point to be initially False by default (and we have
no mechanism for defaulting to True), and because it doesn't exactly
match the RM definition of "hidden from all visibility" (for
example, for record components).

This is work in progress; more changes are needed before we
can remove all Mutate_Ekind(..., E_Void).

gcc/ada/

* einfo.ads (Is_Past_Self_Hiding_Point): Document.
* gen_il-fields.ads (Is_Past_Self_Hiding_Point): Add to list of
fields.
* gen_il-gen-gen_entities.adb (Is_Past_Self_Hiding_Point): Declare
in all entities.
* exp_aggr.adb: Set Is_Past_Self_Hiding_Point as appropriate.
* sem.adb: Likewise.
* sem_aggr.adb: Likewise.
* sem_ch11.adb: Likewise.
* sem_ch12.adb: Likewise.
* sem_ch5.adb: Likewise.
* sem_ch7.adb: Likewise.
* sem_prag.adb: Likewise.
* sem_ch6.adb: Likewise.
(Set_Formal_Mode): Minor cleanup: Move from spec.
* sem_ch6.ads:
(Set_Formal_Mode): Minor cleanup: Move to body.
* cstand.adb: Call Set_Is_Past_Self_Hiding_Point on all entities
as soon as they are created.
* comperr.adb (Compiler_Abort): Minor cleanup -- use 'in' instead
of 'or else'.
* debug.adb: Minor comment cleanups.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/comperr.adb |  6 ++
 gcc/ada/cstand.adb  |  4 +++-
 gcc/ada/debug.adb   | 23 +--
 gcc/ada/einfo.ads   | 13 +
 gcc/ada/exp_aggr.adb|  1 +
 gcc/ada/gen_il-fields.ads   |  1 +
 gcc/ada/gen_il-gen-gen_entities.adb |  1 +
 gcc/ada/sem.adb | 23 +++
 gcc/ada/sem_aggr.adb|  3 +++
 gcc/ada/sem_ch11.adb|  1 +
 gcc/ada/sem_ch12.adb|  5 +
 gcc/ada/sem_ch5.adb |  4 
 gcc/ada/sem_ch6.adb |  8 
 gcc/ada/sem_ch6.ads |  3 ---
 gcc/ada/sem_ch7.adb |  9 ++---
 gcc/ada/sem_prag.adb|  9 +
 16 files changed, 89 insertions(+), 25 deletions(-)

diff --git a/gcc/ada/comperr.adb b/gcc/ada/comperr.adb
index 4fc0e5d3baa..c52db7b0c23 100644
--- a/gcc/ada/comperr.adb
+++ b/gcc/ada/comperr.adb
@@ -177,10 +177,8 @@ package body Comperr is
 
  --  Output target name, deleting junk final reverse slash
 
- if Target_Name.all (Target_Name.all'Last) = '\'
-   or else Target_Name.all (Target_Name.all'Last) = '/'
- then
-Write_Str (Target_Name.all (1 .. Target_Name.all'Last - 1));
+ if Target_Name (Target_Name'Last) in '/' | '\' then
+Write_Str (Target_Name (1 .. Target_Name'Last - 1));
  else
 Write_Str (Target_Name.all);
  end if;
diff --git a/gcc/ada/cstand.adb b/gcc/ada/cstand.adb
index 72c287a8739..f53015d1e0c 100644
--- a/gcc/ada/cstand.adb
+++ b/gcc/ada/cstand.adb
@@ -1784,6 +1784,7 @@ package body CStand is
 
   Set_Is_Immediately_Visible  (Ident_Node, True);
   Set_Is_Intrinsic_Subprogram (Ident_Node, True);
+  Set_Is_Past_Self_Hiding_Point (Ident_Node);
 
   Set_Name_Entity_Id (Op, Ident_Node);
   Append_Entity (Ident_Node, Standard_Standard);
@@ -1806,9 +1807,10 @@ package body CStand is
   Set_Is_Public (E);
 
   --  All standard entity names are analyzed manually, and are thus
-  --  frozen as soon as they are created.
+  --  frozen and not self-hidden as soon as they are created.
 
   Set_Is_Frozen (E);
+  Set_Is_Past_Self_Hiding_Point (E);
 
   --  Set debug information required for all standard types
 
diff --git a/gcc/ada/debug.adb b/gcc/ada/debug.adb
index 7497fa04076..9566e095d1a 100644
--- a/gcc/ada/debug.adb
+++ b/gcc/ada/debug.adb
@@ -41,7 +41,7 @@ package body Debug is
--  dh   Generate listing showing loading of name table hash chains
--  di   Generate messages for visibility linking/delinking
--  dj   Suppress "junk null check" for access parameter values
-   --  dk   Generate GNATBUG message on abort, even if previous errors
+   --  dk   Generate "GNAT BUG" message on abort, even if previous errors
--  dl   Generate unit load trace messages
--  dm   Prevent special frontend inlining in GNATprove mode
--  dn   Generate messages for node/list allocation
@@ -113,7 +113,7 @@ package body Debug is
--  d.z  Restore previous support for frontend handling of Inline_Always
 
--  d.A  Enable statistics printing in 

[COMMITTED] ada: Add warning on frontend inlining of Subprogram_Variant

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

We already warned when contracts like pre/postcondition appear together
with pragma Inline_Always and they are ignored by the frontend inlining.

For consistency we now also warn for Subprogram_Variant, which is
similarly ignored even though this contract is only meaningful for
recursive subprograms and those can't be inlined anyway (but error about
this might only be emitted when full compilation is done).

gcc/ada/

* sem_prag.adb
(Check_Postcondition_Use_In_Inlined_Subprogram): Mention
Subprogram_Variant in the comment.
(Analyze_Subprogram_Variant_In_Decl_Part): Warn when contract is
ignored because of pragma Inline_Always and frontend inlining.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_prag.adb | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
index dbc8584e211..feaf486c348 100644
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -208,9 +208,10 @@ package body Sem_Prag is
  (Prag: Node_Id;
   Spec_Id : Entity_Id);
--  Subsidiary to the analysis of pragmas Contract_Cases, Postcondition,
-   --  Precondition, Refined_Post, and Test_Case. Emit a warning when pragma
-   --  Prag is associated with subprogram Spec_Id subject to Inline_Always,
-   --  assertions are enabled and inling is done in the frontend.
+   --  Precondition, Refined_Post, Subprogram_Variant, and Test_Case. Emit a
+   --  warning when pragma Prag is associated with subprogram Spec_Id subject
+   --  to Inline_Always, assertions are enabled and inling is done in the
+   --  frontend.
 
procedure Check_State_And_Constituent_Use
  (States   : Elist_Id;
@@ -29627,6 +29628,11 @@ package body Sem_Prag is
 End_Scope;
  end if;
 
+ --  Currently it is not possible to inline Subprogram_Variant on a
+ --  subprogram subject to pragma Inline_Always.
+
+ Check_Postcondition_Use_In_Inlined_Subprogram (N, Spec_Id);
+
   --  Otherwise the pragma is illegal
 
   else
-- 
2.40.0



[COMMITTED] ada: Fix missing finalization in library-unit instance spec

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This fixes the missing finalization of objects declared in the spec of
package instances that are library units (and only them, i.e. not all
library-level package instances) when the instances have a package body.

The finalization is done when there is no package body, and supporting
this case precisely broke the other case because of a thinko or a typo.

This also requires a small adjustment to the routine writing ALI files.

gcc/ada/

* exp_ch7.adb (Build_Finalizer): Reverse the test comparing the
instantiation and declaration nodes of a package instance, and
therefore bail out only when they are equal.  Adjust comments.
(Expand_N_Package_Declaration): Do not clear the Finalizer field.
* lib-writ.adb: Add with and use clauses for Sem_Util.
(Write_Unit_Information): Look at unit nodes to find finalizers.
* sem_ch12.adb (Analyze_Package_Instantiation): Beef up the comment
about the rewriting of the instantiation node into a declaration.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch7.adb  | 18 +-
 gcc/ada/lib-writ.adb | 19 +++
 gcc/ada/sem_ch12.adb | 10 ++
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb
index 7ea39f7ba16..a02e28e4b34 100644
--- a/gcc/ada/exp_ch7.adb
+++ b/gcc/ada/exp_ch7.adb
@@ -3534,15 +3534,21 @@ package body Exp_Ch7 is
 and then
   (not Is_Library_Level_Entity (Spec_Id)
 
---  Nested packages are library level entities, but do not need to
+--  Nested packages are library-level entities, but do not need to
 --  be processed separately.
 
 or else Scope_Depth (Spec_Id) /= Uint_1
+
+--  Do not build two finalizers for an instance without body that
+--  is a library unit (see Analyze_Package_Instantiation).
+
 or else (Is_Generic_Instance (Spec_Id)
-  and then Package_Instantiation (Spec_Id) /= N))
+  and then Package_Instantiation (Spec_Id) = N))
 
- --  Still need to process package body instantiations which may
- --  contain objects requiring finalization.
+ --  Still need to process library-level package body instances, whose
+ --  instantiation was deferred and thus could not be seen during the
+ --  processing of the enclosing scope, and which may contain objects
+ --  requiring finalization.
 
 and then not
   (For_Package_Body
@@ -5376,7 +5382,9 @@ package body Exp_Ch7 is
 Defer_Abort => False,
 Fin_Id  => Fin_Id);
 
- Set_Finalizer (Id, Fin_Id);
+ if Present (Fin_Id) then
+Set_Finalizer (Id, Fin_Id);
+ end if;
   end if;
 
   --  If this is a library-level package and unnesting is enabled,
diff --git a/gcc/ada/lib-writ.adb b/gcc/ada/lib-writ.adb
index deecfc067c5..23b6266bb41 100644
--- a/gcc/ada/lib-writ.adb
+++ b/gcc/ada/lib-writ.adb
@@ -50,6 +50,7 @@ with Rident; use Rident;
 with Stand;  use Stand;
 with Scn;use Scn;
 with Sem_Eval;   use Sem_Eval;
+with Sem_Util;   use Sem_Util;
 with Sinfo;  use Sinfo;
 with Sinfo.Nodes;use Sinfo.Nodes;
 with Sinfo.Utils;use Sinfo.Utils;
@@ -524,10 +525,20 @@ package body Lib.Writ is
  Write_Info_Str (" O");
  Write_Info_Char (OA_Setting (Unit_Num));
 
- if Ekind (Uent) in E_Package | E_Package_Body
-   and then Present (Finalizer (Uent))
- then
-Write_Info_Str (" PF");
+ --  For a package instance with a body that is a library unit, the two
+ --  compilation units share Cunit_Entity so we cannot rely on Uent.
+
+ if Ukind in N_Package_Declaration | N_Package_Body then
+declare
+   E : constant Entity_Id := Defining_Entity (Unit (Unode));
+
+begin
+   if Ekind (E) in E_Package | E_Package_Body
+ and then Present (Finalizer (E))
+   then
+  Write_Info_Str (" PF");
+   end if;
+end;
  end if;
 
  if Is_Preelaborated (Uent) then
diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
index 181392c2132..c31d0c62faa 100644
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -5007,10 +5007,12 @@ package body Sem_Ch12 is
  Set_First_Private_Entity (Defining_Unit_Name (Unit_Renaming),
First_Private_Entity (Act_Decl_Id));
 
- --  If the instantiation will receive a body, the unit will be
- --  transformed into a package body, and receive its own elaboration
- --  entity. Otherwise, the nature of the unit is now a package
- --  declaration.
+ --  If the instantiation needs a body, the unit will be turned into
+ --  a package body and receive 

[COMMITTED] ada: Fix spurious warning on Inline_Always and contracts

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Warnings about pre/postconditions being ignored with Inline_Always were
only true for the obsolete frontend inlining. With the current backend
pre/postconditions work fine with Inline_Always.

gcc/ada/

* sem_prag.adb (Check_Postcondition_Use_In_Inlined_Subprogram): Only
emit warning when frontend inlining is enabled.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_prag.adb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
index b6c78dbd559..dbc8584e211 100644
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -210,7 +210,7 @@ package body Sem_Prag is
--  Subsidiary to the analysis of pragmas Contract_Cases, Postcondition,
--  Precondition, Refined_Post, and Test_Case. Emit a warning when pragma
--  Prag is associated with subprogram Spec_Id subject to Inline_Always,
-   --  and assertions are enabled.
+   --  assertions are enabled and inling is done in the frontend.
 
procedure Check_State_And_Constituent_Use
  (States   : Elist_Id;
@@ -30304,6 +30304,7 @@ package body Sem_Prag is
   if Warn_On_Redundant_Constructs
 and then Has_Pragma_Inline_Always (Spec_Id)
 and then Assertions_Enabled
+and then not Back_End_Inlining
   then
  Error_Msg_Name_1 := Original_Aspect_Pragma_Name (Prag);
 
-- 
2.40.0



[COMMITTED] ada: Fix spurious freezing error on nonabstract null extension

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This prevents the wrapper function created for each nonoverridden inherited
function with a controlling result of nonabstract null extensions of tagged
types from causing premature freezing of types referenced in its profile.

gcc/ada/

* exp_ch3.adb (Make_Controlling_Function_Wrappers): Create the body
as the expanded body of an expression function.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch3.adb | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
index b8ab549c0fc..3a023092532 100644
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -11109,9 +11109,10 @@ package body Exp_Ch3 is
 Null_Record_Present => True);
 
 --  GNATprove will use expression of an expression function as an
---  implicit postcondition. GNAT will not benefit from expression
---  function (and would struggle if we add an expression function
---  to freezing actions).
+--  implicit postcondition. GNAT will also benefit from expression
+--  function to avoid premature freezing, but would struggle if we
+--  added an expression function to freezing actions, so we create
+--  the expanded form directly.
 
 if GNATprove_Mode then
Func_Body :=
@@ -11130,6 +11131,7 @@ package body Exp_Ch3 is
Statements => New_List (
  Make_Simple_Return_Statement (Loc,
Expression => Ext_Aggr;
+   Set_Was_Expression_Function (Func_Body);
 end if;
 
 Append_To (Body_List, Func_Body);
-- 
2.40.0



[COMMITTED] ada: Remove redundant protection against empty lists

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Piotr Trojanek 

Calls to List_Length on No_List intentionally return 0 (and likewise
call to First on No_List intentionally return Empty), so explicit guards
against No_List are unnecessary. Code cleanup; semantics is unaffected.

gcc/ada/

* exp_aggr.adb (Aggregate_Size): Remove redundant calls to
Present.
* exp_ch5.adb (Expand_N_If_Statement): Likewise.
* sem_prag.adb (Analyze_Pragma): Likewise.
* sem_warn.adb (Find_Var): Likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb |  8 +++-
 gcc/ada/exp_ch5.adb  |  1 -
 gcc/ada/sem_prag.adb | 25 ++---
 gcc/ada/sem_warn.adb |  2 +-
 4 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index 58831bd51ca..e4b1991f410 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -7397,7 +7397,7 @@ package body Exp_Aggr is
  Comp   : Node_Id;
  Choice : Node_Id;
  Lo, Hi : Node_Id;
- Siz : Int := 0;
+ Siz: Int;
 
  procedure Add_Range_Size;
  --  Compute number of components specified by a component association
@@ -7422,11 +7422,9 @@ package body Exp_Aggr is
  end Add_Range_Size;
 
   begin
- --  Aggregate is either all positional or all named.
+ --  Aggregate is either all positional or all named
 
- if Present (Expressions (N)) then
-Siz := List_Length (Expressions (N));
- end if;
+ Siz := List_Length (Expressions (N));
 
  if Present (Component_Associations (N)) then
 Comp := First (Component_Associations (N));
diff --git a/gcc/ada/exp_ch5.adb b/gcc/ada/exp_ch5.adb
index 0c89856b58b..dfe1112f341 100644
--- a/gcc/ada/exp_ch5.adb
+++ b/gcc/ada/exp_ch5.adb
@@ -4743,7 +4743,6 @@ package body Exp_Ch5 is
 and then not Opt.Suppress_Control_Flow_Optimizations
 and then Nkind (N) = N_If_Statement
 and then No (Elsif_Parts (N))
-and then Present (Else_Statements (N))
 and then List_Length (Then_Statements (N)) = 1
 and then List_Length (Else_Statements (N)) = 1
   then
diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
index 36c1add5ea4..5fe5d6a2d0f 100644
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -11699,29 +11699,24 @@ package body Sem_Prag is
 
   --  Preset arguments
 
-  Arg_Count := 0;
-  Arg1  := Empty;
+  Arg_Count := List_Length (Pragma_Argument_Associations (N));
+  Arg1  := First (Pragma_Argument_Associations (N));
   Arg2  := Empty;
   Arg3  := Empty;
   Arg4  := Empty;
   Arg5  := Empty;
 
-  if Present (Pragma_Argument_Associations (N)) then
- Arg_Count := List_Length (Pragma_Argument_Associations (N));
- Arg1 := First (Pragma_Argument_Associations (N));
-
- if Present (Arg1) then
-Arg2 := Next (Arg1);
+  if Present (Arg1) then
+ Arg2 := Next (Arg1);
 
-if Present (Arg2) then
-   Arg3 := Next (Arg2);
+ if Present (Arg2) then
+Arg3 := Next (Arg2);
 
-   if Present (Arg3) then
-  Arg4 := Next (Arg3);
+if Present (Arg3) then
+   Arg4 := Next (Arg3);
 
-  if Present (Arg4) then
- Arg5 := Next (Arg4);
-  end if;
+   if Present (Arg4) then
+  Arg5 := Next (Arg4);
end if;
 end if;
  end if;
diff --git a/gcc/ada/sem_warn.adb b/gcc/ada/sem_warn.adb
index 834d48d311c..5dd7c17d4e2 100644
--- a/gcc/ada/sem_warn.adb
+++ b/gcc/ada/sem_warn.adb
@@ -353,7 +353,7 @@ package body Sem_Warn is
 begin
--  One argument, so check the argument
 
-   if Present (PA) and then List_Length (PA) = 1 then
+   if List_Length (PA) = 1 then
   if Nkind (First (PA)) = N_Parameter_Association then
  Find_Var (Explicit_Actual_Parameter (First (PA)));
   else
-- 
2.40.0



[COMMITTED] ada: Improve -gnatyx style check

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Arnaud Charlet 

Check redundant parentheses in many more places, for now only under
-gnatdQ, while pending violations are fixed.

gcc/ada/

* par-ch3.adb, sem_ch4.adb (P_Discrete_Range, Analyze_Logical_Op,
Analyze_Short_Circuit): Add calls to Check_Xtra_Parentheses.
* par-ch5.adb (P_Condition): Move logic to Check_Xtra_Parentheses.
* style.ads, styleg.adb, styleg.ads (Check_Xtra_Parens): Move logic
related to expressions requiring parentheses here.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/par-ch3.adb | 10 ++
 gcc/ada/par-ch5.adb | 17 +++--
 gcc/ada/sem_ch4.adb | 27 +++
 gcc/ada/style.ads   |  7 ---
 gcc/ada/styleg.adb  | 20 +---
 gcc/ada/styleg.ads  |  7 ---
 6 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/gcc/ada/par-ch3.adb b/gcc/ada/par-ch3.adb
index b763d414763..7126afbfbeb 100644
--- a/gcc/ada/par-ch3.adb
+++ b/gcc/ada/par-ch3.adb
@@ -3064,10 +3064,20 @@ package body Ch3 is
   elsif Token = Tok_Dot_Dot then
  Range_Node := New_Node (N_Range, Token_Ptr);
  Set_Low_Bound (Range_Node, Expr_Node);
+
+ if Style_Check then
+Style.Check_Xtra_Parens (Expr_Node);
+ end if;
+
  Scan; -- past ..
  Expr_Node := P_Expression;
  Check_Simple_Expression (Expr_Node);
  Set_High_Bound (Range_Node, Expr_Node);
+
+ if Style_Check then
+Style.Check_Xtra_Parens (Expr_Node);
+ end if;
+
  return Range_Node;
 
   --  Otherwise we must have a subtype mark, or an Ada 2012 iterator
diff --git a/gcc/ada/par-ch5.adb b/gcc/ada/par-ch5.adb
index 8f7224517bc..6099a78effb 100644
--- a/gcc/ada/par-ch5.adb
+++ b/gcc/ada/par-ch5.adb
@@ -1355,22 +1355,11 @@ package body Ch5 is
 
  return Cond;
 
-  --  Otherwise check for redundant parentheses but do not emit messages
-  --  about expressions that require parentheses (e.g. conditional,
-  --  quantified or declaration expressions).
+  --  Otherwise check for redundant parentheses
 
   else
- if Style_Check
-   and then
- Paren_Count (Cond) >
-   (if Nkind (Cond) in N_Case_Expression
- | N_Expression_With_Actions
- | N_If_Expression
- | N_Quantified_Expression
-then 1
-else 0)
- then
-Style.Check_Xtra_Parens (First_Sloc (Cond));
+ if Style_Check then
+Style.Check_Xtra_Parens (Cond, Enable => True);
  end if;
 
  --  And return the result
diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
index 03737db90d4..e9c5b9f8a33 100644
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -65,6 +65,7 @@ with Sinfo;  use Sinfo;
 with Sinfo.Nodes;use Sinfo.Nodes;
 with Sinfo.Utils;use Sinfo.Utils;
 with Snames; use Snames;
+with Style;  use Style;
 with Tbuild; use Tbuild;
 with Uintp;  use Uintp;
 with Warnsw; use Warnsw;
@@ -3134,6 +3135,20 @@ package body Sem_Ch4 is
 
   Operator_Check (N);
   Check_Function_Writable_Actuals (N);
+
+  if Style_Check then
+ if Nkind (L) not in N_Short_Circuit | N_Op_And | N_Op_Or | N_Op_Xor
+   and then Is_Boolean_Type (Etype (L))
+ then
+Check_Xtra_Parens (L);
+ end if;
+
+ if Nkind (R) not in N_Short_Circuit | N_Op_And | N_Op_Or | N_Op_Xor
+   and then Is_Boolean_Type (Etype (R))
+ then
+Check_Xtra_Parens (R);
+ end if;
+  end if;
end Analyze_Logical_Op;
 
---
@@ -6006,6 +6021,18 @@ package body Sem_Ch4 is
  Resolve (R, Standard_Boolean);
  Set_Etype (N, Standard_Boolean);
   end if;
+
+  if Style_Check then
+ if Nkind (L) not in N_Short_Circuit | N_Op_And | N_Op_Or | N_Op_Xor
+ then
+Check_Xtra_Parens (L);
+ end if;
+
+ if Nkind (R) not in N_Short_Circuit | N_Op_And | N_Op_Or | N_Op_Xor
+ then
+Check_Xtra_Parens (R);
+ end if;
+  end if;
end Analyze_Short_Circuit;
 
---
diff --git a/gcc/ada/style.ads b/gcc/ada/style.ads
index 35118f4d094..4a7faff31e3 100644
--- a/gcc/ada/style.ads
+++ b/gcc/ada/style.ads
@@ -28,6 +28,7 @@
 --  gathered in a separate package so that they can more easily be customized.
 --  Calls to these subprograms are only made if Opt.Style_Check is set True.
 
+with Debug; use Debug;
 with Errout;
 with Styleg;
 with Types;use Types;
@@ -192,10 +193,10 @@ package Style is
  renames Style_Inst.Check_Vertical_Bar;
--  Called after scanning a vertical bar to check spacing
 
-   procedure Check_Xtra_Parens (Loc : Source_Ptr)
+   procedure Check_Xtra_Parens (N : Node_Id; Enable : Boolean := 

[COMMITTED] ada: Accept Assert pragmas in expression functions

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Steve Baird 

gcc/ada/

* sem_ch4.adb (Analyze_Expression_With_Actions.Check_Action_Ok):
Accept an executable pragma occuring in a declare expression as
per AI22-0045. This means Assert and Inspection_Point pragmas as
well as any implementation-defined pragmas that the implementation
chooses to categorize as executable. Currently Assume and Debug
are the only such pragmas.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch4.adb | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
index e9c5b9f8a33..5b013dfb63d 100644
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -2411,10 +2411,27 @@ package body Sem_Ch4 is
   return; -- ???For now; the RM rule is a bit more complicated
end if;
 
+when N_Pragma =>
+   declare
+  --  See AI22-0045 pragma categorization.
+  subtype Executable_Pragma_Id is Pragma_Id
+with Predicate => Executable_Pragma_Id in
+--  language-defined executable pragmas
+  Pragma_Assert | Pragma_Inspection_Point
+
+--  GNAT-defined executable pragmas
+| Pragma_Assume | Pragma_Debug;
+   begin
+  if Get_Pragma_Id (A) in Executable_Pragma_Id then
+ return;
+  end if;
+   end;
+
 when others =>
-   null; -- Nothing else allowed, not even pragmas
+   null; -- Nothing else allowed
  end case;
 
+ --  We could mention pragmas in the message text; let's not.
  Error_Msg_N ("object renaming or constant declaration expected", A);
   end Check_Action_OK;
 
-- 
2.40.0



[COMMITTED] ada: Support calls through dereferences in Find_Actual

2023-05-22 Thread Marc Poulhiès via Gcc-patches
From: Claire Dross 

Return the corresponding formal in the designated subprogram profile in
that case.

gcc/ada/

* sem_util.adb (Find_Actual): On calls through dereferences,
return the corresponding formal in the designated subprogram
profile.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_util.adb | 46 
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
index ef591c935eb..3ea7ef506df 100644
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -8604,6 +8604,7 @@ package body Sem_Util is
   Context  : constant Node_Id := Parent (N);
   Actual   : Node_Id;
   Call_Nam : Node_Id;
+  Call_Ent : Node_Id := Empty;
 
begin
   if Nkind (Context) in N_Indexed_Component | N_Selected_Component
@@ -8652,13 +8653,42 @@ package body Sem_Util is
 Call_Nam := Selector_Name (Call_Nam);
  end if;
 
- if Is_Entity_Name (Call_Nam)
-   and then Present (Entity (Call_Nam))
-   and then (Is_Generic_Subprogram (Entity (Call_Nam))
-  or else Is_Overloadable (Entity (Call_Nam))
-  or else Ekind (Entity (Call_Nam)) in E_Entry_Family
- | E_Subprogram_Body
- | E_Subprogram_Type)
+ --  If Call_Nam is an entity name, get its entity
+
+ if Is_Entity_Name (Call_Nam) then
+Call_Ent := Entity (Call_Nam);
+
+ --  If it is a dereference, get the designated subprogram type
+
+ elsif Nkind (Call_Nam) = N_Explicit_Dereference then
+declare
+   Typ : Entity_Id := Etype (Prefix (Call_Nam));
+begin
+   if Present (Full_View (Typ)) then
+  Typ := Full_View (Typ);
+   elsif Is_Private_Type (Typ)
+ and then Present (Underlying_Full_View (Typ))
+   then
+  Typ := Underlying_Full_View (Typ);
+   end if;
+
+   if Is_Access_Type (Typ) then
+  Call_Ent := Directly_Designated_Type (Typ);
+   else
+  pragma Assert (Has_Implicit_Dereference (Typ));
+  Formal := Empty;
+  Call   := Empty;
+  return;
+   end if;
+end;
+ end if;
+
+ if Present (Call_Ent)
+   and then (Is_Generic_Subprogram (Call_Ent)
+  or else Is_Overloadable (Call_Ent)
+  or else Ekind (Call_Ent) in E_Entry_Family
+| E_Subprogram_Body
+| E_Subprogram_Type)
and then not Is_Overloaded (Call_Nam)
  then
 --  If node is name in call it is not an actual
@@ -8672,7 +8702,7 @@ package body Sem_Util is
 --  Fall here if we are definitely a parameter
 
 Actual := First_Actual (Call);
-Formal := First_Formal (Entity (Call_Nam));
+Formal := First_Formal (Call_Ent);
 while Present (Formal) and then Present (Actual) loop
if Actual = N then
   return;
-- 
2.40.0



  1   2   >