RE: [PATCH] RISC-V: Fix out of range memory access of machine mode table

2023-06-20 Thread Li, Pan2 via Gcc-patches
Thanks Jakub for the useful comments, go thru the mail list and have a 
refinement version as below. But I not sure if I understand correct about 
adding new field named mode_bits in struct lto_file_decl_data, looks 
unnecessary up to a point.

Thanks again for your coaching with patient.

diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index 2cb83406db5..2a0720b4e6f 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1985,8 +1985,6 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
 internal_error ("cannot read LTO mode table from %s",
file_data->file_name);
 
-  unsigned char *table = ggc_cleared_vec_alloc (1 << 8);
-  file_data->mode_table = table;
   const struct lto_simple_header_with_strings *header
 = (const struct lto_simple_header_with_strings *) data;
   int string_offset;
@@ -1998,16 +1996,22 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
header->string_size, vNULL);
   bitpack_d bp = streamer_read_bitpack (&ib);
 
+  unsigned mode_bits = bp_unpack_value (&bp, 5);
+  unsigned char *table = ggc_cleared_vec_alloc (1 << mode_bits);
+
+  file_data->mode_table = table;
+  file_data->mode_bits = mode_bits;
+
   table[VOIDmode] = VOIDmode;
   table[BLKmode] = BLKmode;
   unsigned int m;
-  while ((m = bp_unpack_value (&bp, 8)) != VOIDmode)
+  while ((m = bp_unpack_value (&bp, mode_bits)) != VOIDmode)
 {
   enum mode_class mclass
= bp_unpack_enum (&bp, mode_class, MAX_MODE_CLASS);
   poly_uint16 size = bp_unpack_poly_value (&bp, 16);
   poly_uint16 prec = bp_unpack_poly_value (&bp, 16);
-  machine_mode inner = (machine_mode) bp_unpack_value (&bp, 8);
+  machine_mode inner = (machine_mode) bp_unpack_value (&bp, mode_bits);
   poly_uint16 nunits = bp_unpack_poly_value (&bp, 16);
   unsigned int ibit = 0, fbit = 0;
   unsigned int real_fmt_len = 0;
diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc
index 5ab2eb4301e..77250ee2385 100644
--- a/gcc/lto-streamer-out.cc
+++ b/gcc/lto-streamer-out.cc
@@ -3196,6 +3196,11 @@ lto_write_mode_table (void)
if (inner_m != m)
  streamer_mode_table[(int) inner_m] = 1;
   }
+
+  /* Pack the mode_bits value within 5 bits (up to 31) of the beginning.  */
+  unsigned mode_bits = ceil_log2 (MAX_MACHINE_MODE);
+  bp_pack_value (&bp, mode_bits, 5);
+
   /* First stream modes that have GET_MODE_INNER (m) == m,
  so that we can refer to them afterwards.  */
   for (int pass = 0; pass < 2; pass++)
@@ -3205,11 +3210,11 @@ lto_write_mode_table (void)
  machine_mode m = (machine_mode) i;
  if ((GET_MODE_INNER (m) == m) ^ (pass == 0))
continue;
- bp_pack_value (&bp, m, 8);
+ bp_pack_value (&bp, m, mode_bits);
  bp_pack_enum (&bp, mode_class, MAX_MODE_CLASS, GET_MODE_CLASS (m));
  bp_pack_poly_value (&bp, GET_MODE_SIZE (m), 16);
  bp_pack_poly_value (&bp, GET_MODE_PRECISION (m), 16);
- bp_pack_value (&bp, GET_MODE_INNER (m), 8);
+ bp_pack_value (&bp, GET_MODE_INNER (m), mode_bits);
  bp_pack_poly_value (&bp, GET_MODE_NUNITS (m), 16);
  switch (GET_MODE_CLASS (m))
{
@@ -3229,7 +3234,7 @@ lto_write_mode_table (void)
}
  bp_pack_string (ob, &bp, GET_MODE_NAME (m), true);
}
-  bp_pack_value (&bp, VOIDmode, 8);
+  bp_pack_value (&bp, VOIDmode, mode_bits);
 
   streamer_write_bitpack (&bp);
 
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index fc7133d07ba..443f0cd616e 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -604,6 +604,8 @@ struct GTY(()) lto_file_decl_data
   int order_base;
 
   int unit_base;
+
+  unsigned mode_bits;
 };
 
 typedef struct lto_file_decl_data *lto_file_decl_data_ptr;
diff --git a/gcc/tree-streamer.cc b/gcc/tree-streamer.cc
index ed65a7692e3..a28ef9c7920 100644
--- a/gcc/tree-streamer.cc
+++ b/gcc/tree-streamer.cc
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
During streaming in, we translate the on the disk mode using this
table.  For normal LTO it is set to identity, for ACCEL_COMPILER
depending on the mode_table content.  */
-unsigned char streamer_mode_table[1 << 8];
+unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 
 /* Check that all the TS_* structures handled by the streamer_write_* and
streamer_read_* routines are exactly ALL the structures defined in
diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
index 170d61cf20b..10718b03640 100644
--- a/gcc/tree-streamer.h
+++ b/gcc/tree-streamer.h
@@ -75,7 +75,7 @@ void streamer_write_tree_body (struct output_block *, tree);
 void streamer_write_integer_cst (struct output_block *, tree);
 
 /* In tree-streamer.cc.  */
-extern unsigned char streamer_mode_table[1 << 8];
+extern unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 void streamer_check_handled_ts_structures (void);
 bool streamer_tree_cache_insert (struc

RE: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.

2023-06-20 Thread Wang, Yanzhang via Gcc-patches
Thanks, you are right. I have not considered the iterator much. I picked it
from one of pred_mulh directly. It should be able to work with VFULL_I.

Yanzhang

From: juzhe.zh...@rivai.ai 
Sent: Wednesday, June 21, 2023 2:21 PM
To: Wang, Yanzhang ; gcc-patches 

Cc: Kito.cheng ; Li, Pan2 ; Wang, 
Yanzhang ; Robin Dapp ; 
jeffreyalaw 
Subject: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.

Good catch!
vmulh.vx v24,v24,zero -> vmv.v.i v1,0
can eliminate use of v24 and reduce register pressure.

But I wonder why you pick only VI_QHS?


+  [(set (match_operand:VI_QHS 0 "register_operand")

SEW = 64 should always have such optimization.

Thanks.

juzhe.zh...@rivai.ai

From: yanzhang.wang
Date: 2023-06-21 14:08
To: gcc-patches
CC: juzhe.zhong; 
kito.cheng; pan2.li; 
yanzhang.wang
Subject: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
From: Yanzhang Wang mailto:yanzhang.w...@intel.com>>

This patch will optimize the below mulh example,

vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
  return __riscv_vmulh_vx_i32m1(v1, 0, vl);
}

from mulh pattern

vsetvli   zero, a2, e32, m1, ta, ma
vmulh.vx  v24, v24, zero
vs1r.vv24, 0(a0)

to below vmv.

vsetvli zero,a2,e32,m1,ta,ma
vmv.v.i v1,0
vs1r.v  v1,0(a0)

It will elimate the mul with const 0 instruction to the simple mov
instruction.

Signed-off-by: Yanzhang Wang 
mailto:yanzhang.w...@intel.com>>

gcc/ChangeLog:

* config/riscv/autovec-opt.md: Add a split pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
  with 0 will be simplified to vmv.v.i.
* gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
---
gcc/config/riscv/autovec-opt.md   | 30 +++
.../riscv/rvv/autovec/vmulh-with-zero.cc  | 19 
.../riscv/rvv/base/binop_vx_constraint-121.c  |  3 +-
3 files changed, 51 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 28040805b23..9c14be964b5 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -405,3 +405,33 @@
   "vmv.x.s\t%0,%1"
   [(set_attr "type" "vimovvx")
(set_attr "mode" "")])
+
+;; Simplify VMULH (V, 0) Instructions to vmv.v.i.
+(define_split
+  [(set (match_operand:VI_QHS 0 "register_operand")
+ (if_then_else:VI_QHS
+   (unspec:
+ [(match_operand: 1 "vector_all_trues_mask_operand")
+   (match_operand 5 "vector_length_operand")
+   (match_operand 6 "const_int_operand")
+   (match_operand 7 "const_int_operand")
+   (match_operand 8 "const_int_operand")
+   (reg:SI VL_REGNUM)
+   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+   (unspec:VI_QHS
+ [(vec_duplicate:VI_QHS
+(match_operand: 4 "reg_or_0_operand"))
+   (match_operand:VI_QHS 3 "register_operand")] VMULH)
+   (match_operand:VI_QHS 2 "vector_merge_operand")))]
+  "TARGET_VECTOR
+ && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
+  [(const_int 0)]
+  {
+machine_mode mask_mode = riscv_vector::get_mask_mode (mode)
+  .require ();
+emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
+   RVV_VUNDEF (mode), CONST0_RTX (GET_MODE (operands[0])),
+   operands[5], operands[6], operands[7], operands[8]));
+DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
new file mode 100644
index 000..6e4a3d62bc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+#define VMULH_WITH_LMUL(X) \
+  vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
+  size_t vl) {  \
+return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); \
+  }
+
+
+VMULH_WITH_LMUL (1)
+VMULH_WITH_LMUL (2)
+VMULH_WITH_LMUL (4)
+VMULH_WITH_LMUL (8)
+VMULH_WITH_LMUL (f2)
+
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
index 4d2de91bc14..d1473274137 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
@@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x)
 __riscv_vse64_v_i64m1 (out, v3, 4);
}
-/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } 
} */
+/* { dg-final { scan-assembler-times {vmv\.v\

Re: [PATCH] Improve DSE to handle stores before __builtin_unreachable ()

2023-06-20 Thread Richard Biener via Gcc-patches
On Tue, 20 Jun 2023, Jeff Law wrote:

> 
> 
> On 6/20/23 00:59, Richard Biener via Gcc-patches wrote:
> > DSE isn't good at identifying program points that end lifetime
> > of variables that are not associated with virtual operands.  But
> > at least for those that end basic-blocks we can handle the simple
> > case where this ending is in the same basic-block as the definition
> > we want to elide.  That should catch quite some common cases already.
> > 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> > 
> > As you can see from the testcase I had to adjust this possibly can
> > lead to more severe issues when one forgets a return (the C++ frontend
> > places builtin_unreachable () there).  I'm still planning to push
> > this improvement unless I hear objections.
> > 
> > Thanks,
> > Richard.
> > 
> >  * tree-ssa-dse.cc (dse_classify_store): When we found
> >  no defs and the basic-block with the original definition
> >  ends in __builtin_unreachable[_trap] the store is dead.
> > 
> >  * gcc.dg/tree-ssa/ssa-dse-47.c: New testcase.
> >  * c-c++-common/asan/pr106558.c: Avoid undefined behavior
> >  due to missing return.
> I thought during the introduction of erroneous path isolation that we
> concluded stores, calls and such had observable side effects that must be
> preserved, even when we hit a block that leads to __builtin_unreachable.

Indeed, I remember we repeatedly hit this in the past.  But 
double-checking I see that we instrument

  if (x)
*(int *)0 = 0;

as

   [local count: 1073741824]:
  if (x_2(D) != 0)
goto ; [50.00%]
  else
goto ; [50.00%]

   [local count: 536870913]:
  MEM[(int *)0B] ={v} 0;
  __builtin_trap ();

path isolation doesn't seem to use __builtin_unreachable ().  I did
not add __builtin_trap () as possible sink (but I did want to treat
__builtin_unreachable () and __builtin_unreachable_trap () the same
way).  The pass also marks the offending store as volatile.

We do have testsuite coverage that this happens (dump-scanning,
not runtime it seems).

So yes, I think preserving the original trap kind (if there is any)
is important and it still seems to work.  I don't remember whether
we have any test coverage for that though.  I'll also note that
__builtin_trap () has virtual operands (def and use) while
__builtin_unreachable[_trap] () are 'const'.  Honza correctly
says they should probably be novops instead of 'const' preserving
the fact that they have side-effects.

> Don't get me wrong, I'm all for removing the memory references if it's safe to
> do so.

I think it's desirable for assertions.  Since we elide plain
__builtin_unreachable () and fall thru whereever it leads that
shouldn't be an issue.

If I manually add a __builtin_unreachable () to the above case
I see the *(int *)0 = 0; store DSEd.  Maybe we should avoid
removing stores that might trap here?  POSIX wise such a trap
could be a way to jump out of the path leading to unreachable ()
via siglongjmp ...

Thanks,
Richard.


Re: Re: [PATCH] RISC-V: Implement autovec copysign.

2023-06-20 Thread juzhe.zh...@rivai.ai
LGTM as long as you remove all stuff related to UNSPEC_VNCOPYSIGN

Thanks.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-21 14:36
To: 钟居哲; gcc-patches; palmer; kito.cheng; Jeff Law
CC: rdapp.gcc
Subject: Re: [PATCH] RISC-V: Implement autovec copysign.
> You should remove all "unspec" related of "n" ncopysign including 
> riscv-vector-builtins-bases.cc
> vector.md/ vector-iterators.md 
 
Ah, there was indeed one stray UNSPEC_VNCOPYSIGN in the iterators, thanks.  Any 
other
comments before I sent V2?
 
Regards
Robin
 


Re: [PATCH] RISC-V: Implement autovec copysign.

2023-06-20 Thread Robin Dapp via Gcc-patches
> You should remove all "unspec" related of "n" ncopysign including 
> riscv-vector-builtins-bases.cc
> vector.md/ vector-iterators.md 

Ah, there was indeed one stray UNSPEC_VNCOPYSIGN in the iterators, thanks.  Any 
other
comments before I sent V2?

Regards
 Robin


Re: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.

2023-06-20 Thread juzhe.zh...@rivai.ai
Oh. Yes. Thanks for Robin pointing this.

@yanzhang, could you refine this patch more deeply to gain more optimizations ?

Thanks.


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-21 14:27
To: yanzhang.wang; gcc-patches
CC: rdapp.gcc; juzhe.zhong; kito.cheng; pan2.li
Subject: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
Hi Yanzhang,
 
while I appreciate the optimization, I'm a bit wary about just adding a special
case for "0".  Is that so common? Wouldn't we also like to have
  * pow2_p (val) == << val and others?
 
* 1 should also be covered.
 
Regards
Robin
 


[PATCH 5/5] x86: yet more PR target/100711-like splitting

2023-06-20 Thread Jan Beulich via Gcc-patches
Following two-operand bitwise operations, add another splitter to also
deal with not followed by broadcast all on its own, which can be
expressed as simple embedded broadcast instead once a broadcast operand
is actually permitted in the respective insn. While there also permit
a broadcast operand in the corresponding expander.

gcc/

* config/i386/sse.md: New splitters to simplify
not;vec_duplicate as a singular vpternlog.
(one_cmpl2): Allow broadcast for operand 1.
(one_cmpl2): Likewise.

gcc/testsuite/

* gcc.target/i386/pr100711-6.c: New test.
---
For the purpose here (and elsewhere) bcst_vector_operand() (really:
bcst_mem_operand()) isn't permissive enough: We'd want it to allow
128-bit and 256-bit types as well irrespective of AVX512VL being
enabled. This would likely require a new predicate
(bcst_intvec_operand()?) and a new constraint (BR? Bi?). (Yet for name
selection it will want considering that this is applicable to certain
non-calculational FP operations as well.)

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17156,7 +17156,7 @@
 
 (define_expand "one_cmpl2"
   [(set (match_operand:VI 0 "register_operand")
-   (xor:VI (match_operand:VI 1 "vector_operand")
+   (xor:VI (match_operand:VI 1 "bcst_vector_operand")
(match_dup 2)))]
   "TARGET_SSE"
 {
@@ -17168,7 +17168,7 @@
 
 (define_insn "one_cmpl2"
   [(set (match_operand:VI 0 "register_operand" "=v,v")
-   (xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
+   (xor:VI (match_operand:VI 1 "bcst_vector_operand" "vBr,m")
(match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
   "TARGET_AVX512F
&& (!
@@ -17191,6 +17191,19 @@
  (symbol_ref " == 64 || TARGET_AVX512VL")
  (const_int 1)))])
 
+(define_split
+  [(set (match_operand:VI48_AVX512F 0 "register_operand")
+   (vec_duplicate:VI48_AVX512F
+ (not:
+   (match_operand: 1 "nonimmediate_operand"]
+  " == 64 || TARGET_AVX512VL
+   || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
+  [(set (match_dup 0)
+   (xor:VI48_AVX512F
+ (vec_duplicate:VI48_AVX512F (match_dup 1))
+ (match_dup 2)))]
+  "operands[2] = CONSTM1_RTX (mode);")
+
 (define_expand "_andnot3"
   [(set (match_operand:VI_AVX2 0 "register_operand")
(and:VI_AVX2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100711-6.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+
+v16si foo_v16si (const int *a)
+{
+return (__extension__ (v16si) {~*a, ~*a, ~*a, ~*a, ~*a, ~*a, ~*a, ~*a,
+  ~*a, ~*a, ~*a, ~*a, ~*a, ~*a, ~*a, ~*a});
+}
+
+v8di foo_v8di (const long long *a)
+{
+return (__extension__ (v8di) {~*a, ~*a, ~*a, ~*a, ~*a, ~*a, ~*a, ~*a});
+}
+
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x55, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}" 2 } } */



Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.

2023-06-20 Thread Robin Dapp via Gcc-patches
Hi Yanzhang,

while I appreciate the optimization, I'm a bit wary about just adding a special
case for "0".  Is that so common? Wouldn't we also like to have
  * pow2_p (val) == << val and others?

* 1 should also be covered.

Regards
 Robin


[PATCH 4/5] x86: further PR target/100711-like splitting

2023-06-20 Thread Jan Beulich via Gcc-patches
With respective two-operand bitwise operations now expressable by a
single VPTERNLOG, add splitters to also deal with ior and xor
counterparts of the original and-only case. Note that the splitters need
to be separate, as the placement of "not" differs in the final insns
(*iornot3, *xnor3) which are intended to pick up one half of
the result.

gcc/

* config/i386/sse.md: New splitters to simplify
not;vec_duplicate;{ior,xor} as vec_duplicate;{iornot,xnor}.

gcc/testsuite/

* gcc.target/i386/pr100711-4.c: New test.
* gcc.target/i386/pr100711-5.c: New test.

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17366,6 +17366,36 @@
(match_dup 2)))]
   "operands[3] = gen_reg_rtx (mode);")
 
+(define_split
+  [(set (match_operand:VI 0 "register_operand")
+   (ior:VI
+ (vec_duplicate:VI
+   (not:
+ (match_operand: 1 "nonimmediate_operand")))
+ (match_operand:VI 2 "vector_operand")))]
+  " == 64 || TARGET_AVX512VL
+   || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
+  [(set (match_dup 3)
+   (vec_duplicate:VI (match_dup 1)))
+   (set (match_dup 0)
+   (ior:VI (not:VI (match_dup 3)) (match_dup 2)))]
+  "operands[3] = gen_reg_rtx (mode);")
+
+(define_split
+  [(set (match_operand:VI 0 "register_operand")
+   (xor:VI
+ (vec_duplicate:VI
+   (not:
+ (match_operand: 1 "nonimmediate_operand")))
+ (match_operand:VI 2 "vector_operand")))]
+  " == 64 || TARGET_AVX512VL
+   || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
+  [(set (match_dup 3)
+   (vec_duplicate:VI (match_dup 1)))
+   (set (match_dup 0)
+   (not:VI (xor:VI (match_dup 3) (match_dup 2]
+  "operands[3] = gen_reg_rtx (mode);")
+
 (define_insn "*andnot3_mask"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI48_AVX512VL
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100711-4.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+
+v64qi foo_v64qi (char a, v64qi b)
+{
+return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+   ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+   ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+   ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+v32hi foo_v32hi (short a, v32hi b)
+{
+return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+   ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+   ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+v16si foo_v16si (int a, v16si b)
+{
+return (__extension__ (v16si) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+v8di foo_v8di (long long a, v8di b)
+{
+return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 4 { 
target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 2 { 
target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xdd" 2 { 
target { ia32 } } } } */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100711-5.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+
+v64qi foo_v64qi (char a, v64qi b)
+{
+return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+   ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+   ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+   ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+  ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b;
+}
+
+v32hi foo_v32hi (short a, v32hi b)
+{
+return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+   ~a, ~a, ~a, ~a, ~a, ~a, ~

[PATCH 3/5] x86: allow memory operand for AVX2 splitter for PR target/100711

2023-06-20 Thread Jan Beulich via Gcc-patches
The intended broadcast (with AVX512) can very well be done right from
memory.

gcc/

* config/i386/sse.md: Permit non-immediate operand 1 in AVX2
form of splitter for PR target/100711.

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17356,7 +17356,7 @@
(and:VI_AVX2
  (vec_duplicate:VI_AVX2
(not:
- (match_operand: 1 "register_operand")))
+ (match_operand: 1 "nonimmediate_operand")))
  (match_operand:VI_AVX2 2 "vector_operand")))]
   "TARGET_AVX2"
   [(set (match_dup 3)



[PATCH 2/5] x86: use VPTERNLOG also for certain andnot forms

2023-06-20 Thread Jan Beulich via Gcc-patches
When it's the memory operand which is to be inverted, using VPANDN*
requires a further load instruction. The same can be achieved by a
single VPTERNLOG*. Add two new alternatives (for plain memory and
embedded broadcast), adjusting the predicate for the first operand
accordingly.

Two pre-existing testcases actually end up being affected (improved) by
the change, which is reflected in updated expectations there.

gcc/

PR target/93768
* config/i386/sse.md (*andnot3): Add new alternatives
for memory form operand 1.

gcc/testsuite/

PR target/93768
* gcc.target/i386/avx512f-andn-di-zmm-2.c: New test.
* gcc.target/i386/avx512f-andn-si-zmm-2.c: Adjust expecations
towards generated code.
* gcc.target/i386/pr100711-3.c: Adjust expectations for 32-bit
code.

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17210,11 +17210,13 @@
   "TARGET_AVX512F")
 
 (define_insn "*andnot3"
-  [(set (match_operand:VI 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v")
(and:VI
- (not:VI (match_operand:VI 1 "vector_operand" "0,x,v"))
- (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
-  "TARGET_SSE"
+ (not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br"))
+ (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))]
+  "TARGET_SSE
+   && (register_operand (operands[1], mode)
+   || register_operand (operands[2], mode))"
 {
   char buf[64];
   const char *ops;
@@ -17281,6 +17283,15 @@
 case 2:
   ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
   break;
+case 3:
+case 4:
+  tmp = "pternlog";
+  ssesuffix = "";
+  if (which_alternative != 4 || TARGET_AVX512VL)
+   ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}";
+  else
+   ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}";
+  break;
 default:
   gcc_unreachable ();
 }
@@ -17289,7 +17300,7 @@
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx,avx")
+  [(set_attr "isa" "noavx,avx,avx,*,*")
(set_attr "type" "sselog")
(set (attr "prefix_data16")
  (if_then_else
@@ -17297,9 +17308,12 @@
(eq_attr "mode" "TI"))
(const_string "1")
(const_string "*")))
-   (set_attr "prefix" "orig,vex,evex")
+   (set_attr "prefix" "orig,vex,evex,evex,evex")
(set (attr "mode")
-   (cond [(match_test "TARGET_AVX2")
+   (cond [(and (eq_attr "alternative" "3,4")
+   (match_test " < 64 && !TARGET_AVX512VL"))
+(const_string "XI")
+  (match_test "TARGET_AVX2")
 (const_string "")
   (match_test "TARGET_AVX")
 (if_then_else
@@ -17310,7 +17324,15 @@
(match_test "optimize_function_for_size_p (cfun)"))
 (const_string "V4SF")
  ]
- (const_string "")))])
+ (const_string "")))
+   (set (attr "enabled")
+   (cond [(eq_attr "alternative" "3")
+(symbol_ref " == 64 || TARGET_AVX512VL")
+  (eq_attr "alternative" "4")
+(symbol_ref " == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
+ ]
+ (const_string "*")))])
 
 ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn
 (define_split
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } } */
+
+#define type __m512i
+#define vec 512
+#define op andnot
+#define suffix epi64
+#define SCALAR long long
+
+#include "avx512-binop-2.h"
--- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } 
*/
-/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
 #define vec 512
--- a/gcc/testsuite/gcc.target/i386/pr100711-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c
@@ -37,4 +37,6 @@ v8di foo_v8di (long long a, v8di b)
 return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b;
 }
 
-/* { dg-final { scan-assembler-times "vpandn" 4 } } */
+/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-a

[PATCH 1/5] x86: use VPTERNLOG for further bitwise two-vector operations

2023-06-20 Thread Jan Beulich via Gcc-patches
All combinations of and, ior, xor, and not involving two operands can be
expressed that way in a single insn.

gcc/

PR target/93768
* config/i386/i386.cc (ix86_rtx_costs): Further special-case
bitwise vector operations.
* config/i386/sse.md (*iornot3): New insn.
(*xnor3): Likewise.
(*3): Likewise.
(andor): New code iterator.
(nlogic): New code attribute.
(ternlog_nlogic): Likewise.

gcc/testsuite/

PR target/93768
gcc.target/i386/avx512-binop-not-1.h: New.
gcc.target/i386/avx512-binop-not-2.h: New.
gcc.target/i386/avx512f-orn-si-zmm-1.c: New test.
gcc.target/i386/avx512f-orn-si-zmm-2.c: New test.
---
The use of VI matches that in e.g. one_cmpl2 /
one_cmpl2 and *andnot3, despite
(here and there)
- V64QI and V32HI being needlessly excluded when AVX512BW isn't enabled,
- VTI not being covered,
- vector modes more narrow than 16 bytes not being covered.

--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21178,6 +21178,32 @@ ix86_rtx_costs (rtx x, machine_mode mode
   return false;
 
 case IOR:
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+   {
+ /* (ior (not ...) ...) can be a single insn in AVX512.  */
+ if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
+ && (GET_MODE_SIZE (mode) == 64
+ || (TARGET_AVX512VL
+ && (GET_MODE_SIZE (mode) == 32
+ || GET_MODE_SIZE (mode) == 16
+   {
+ rtx right = GET_CODE (XEXP (x, 1)) != NOT
+ ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
+
+ *total = ix86_vec_cost (mode, cost->sse_op)
+  + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+  outer_code, opno, speed)
+  + rtx_cost (right, mode, outer_code, opno, speed);
+ return true;
+   }
+ *total = ix86_vec_cost (mode, cost->sse_op);
+   }
+  else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+   *total = cost->add * 2;
+  else
+   *total = cost->add;
+  return false;
+
 case XOR:
   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
*total = ix86_vec_cost (mode, cost->sse_op);
@@ -21198,11 +21224,20 @@ ix86_rtx_costs (rtx x, machine_mode mode
  /* pandn is a single instruction.  */
  if (GET_CODE (XEXP (x, 0)) == NOT)
{
+ rtx right = XEXP (x, 1);
+
+ /* (and (not ...) (not ...)) can be a single insn in AVX512.  */
+ if (GET_CODE (right) == NOT && TARGET_AVX512F
+ && (GET_MODE_SIZE (mode) == 64
+ || (TARGET_AVX512VL
+ && (GET_MODE_SIZE (mode) == 32
+ || GET_MODE_SIZE (mode) == 16
+   right = XEXP (right, 0);
+
  *total = ix86_vec_cost (mode, cost->sse_op)
   + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   outer_code, opno, speed)
-  + rtx_cost (XEXP (x, 1), mode,
-  outer_code, opno, speed);
+  + rtx_cost (right, mode, outer_code, opno, speed);
  return true;
}
  else if (GET_CODE (XEXP (x, 1)) == NOT)
@@ -21260,8 +21295,25 @@ ix86_rtx_costs (rtx x, machine_mode mode
 
 case NOT:
   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
-   // vnot is pxor -1.
-   *total = ix86_vec_cost (mode, cost->sse_op) + 1;
+   {
+ /* (not (xor ...)) can be a single insn in AVX512.  */
+ if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
+ && (GET_MODE_SIZE (mode) == 64
+ || (TARGET_AVX512VL
+ && (GET_MODE_SIZE (mode) == 32
+ || GET_MODE_SIZE (mode) == 16
+   {
+ *total = ix86_vec_cost (mode, cost->sse_op)
+  + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+  outer_code, opno, speed)
+  + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
+  outer_code, opno, speed);
+ return true;
+   }
+
+ // vnot is pxor -1.
+ *total = ix86_vec_cost (mode, cost->sse_op) + 1;
+   }
   else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
*total = cost->add * 2;
   else
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17616,6 +17616,98 @@
   operands[2] = force_reg (V1TImode, CONSTM1_RTX (V1TImode));
 })
 
+(define_insn "*iornot3"
+  [(set (match_operand:VI 0 "register_operand" "=v,v,v,v")
+   (ior:VI
+ (not:VI
+   (match_operand:VI 1 "bcst_vector_operand" "v,Br,v,m"))
+ (match_operand:VI 2 "bcst_vector_operand" "vBr,v,m,v")))]
+  "( == 64 || TARGET_AVX512VL
+|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
+   && (register_operand

[PATCH 0/5] x86: make better use of VPTERNLOG{D,Q}

2023-06-20 Thread Jan Beulich via Gcc-patches
While there are some quite sophisticated 4-operand expanders,
2-operand binary logic which can't be expressed by just VPAND,
VPANDN, VPOR, or VPXOR doesn't utilize this insn to carry out
such operations in a single insn. Therefore the first two
patches address one of the sub-aspects of PR target/93768 (which
imo was closed prematurely), while the latter three ones extend
what was done for PR target/100711.

1: use VPTERNLOG for further bitwise two-vector operations
2: use VPTERNLOG also for certain andnot forms
3: allow memory operand for AVX2 splitter for PR target/100711
4: further PR target/100711-like splitting
5: yet more PR target/100711-like splitting

Jan


Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.

2023-06-20 Thread juzhe.zh...@rivai.ai
+machine_mode mask_mode = riscv_vector::get_mask_mode (mode)
+  .require ();
+emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
+   RVV_VUNDEF (mode), CONST0_RTX (GET_MODE (operands[0])),
+   operands[5], operands[6], operands[7], operands[8]));

I don't think you need to get_mask_mode, instead, you can simplify the code as 
follows:
emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mode),
+   RVV_VUNDEF (mode), CONST0_RTX (GET_MODE (operands[0])),
+   operands[5], operands[6], operands[7], operands[8]));
use mode to get the mask mode.


juzhe.zh...@rivai.ai
 
From: yanzhang.wang
Date: 2023-06-21 14:08
To: gcc-patches
CC: juzhe.zhong; kito.cheng; pan2.li; yanzhang.wang
Subject: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
From: Yanzhang Wang 
 
This patch will optimize the below mulh example,
 
vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
  return __riscv_vmulh_vx_i32m1(v1, 0, vl);
}
 
from mulh pattern
 
vsetvli   zero, a2, e32, m1, ta, ma
vmulh.vx  v24, v24, zero
vs1r.vv24, 0(a0)
 
to below vmv.
 
vsetvli zero,a2,e32,m1,ta,ma
vmv.v.i v1,0
vs1r.v  v1,0(a0)
 
It will elimate the mul with const 0 instruction to the simple mov
instruction.
 
Signed-off-by: Yanzhang Wang 
 
gcc/ChangeLog:
 
* config/riscv/autovec-opt.md: Add a split pattern.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
  with 0 will be simplified to vmv.v.i.
* gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
---
gcc/config/riscv/autovec-opt.md   | 30 +++
.../riscv/rvv/autovec/vmulh-with-zero.cc  | 19 
.../riscv/rvv/base/binop_vx_constraint-121.c  |  3 +-
3 files changed, 51 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
 
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 28040805b23..9c14be964b5 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -405,3 +405,33 @@
   "vmv.x.s\t%0,%1"
   [(set_attr "type" "vimovvx")
(set_attr "mode" "")])
+
+;; Simplify VMULH (V, 0) Instructions to vmv.v.i.
+(define_split
+  [(set (match_operand:VI_QHS 0 "register_operand")
+ (if_then_else:VI_QHS
+   (unspec:
+ [(match_operand: 1 "vector_all_trues_mask_operand")
+   (match_operand 5 "vector_length_operand")
+   (match_operand 6 "const_int_operand")
+   (match_operand 7 "const_int_operand")
+   (match_operand 8 "const_int_operand")
+   (reg:SI VL_REGNUM)
+   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+   (unspec:VI_QHS
+ [(vec_duplicate:VI_QHS
+(match_operand: 4 "reg_or_0_operand"))
+   (match_operand:VI_QHS 3 "register_operand")] VMULH)
+   (match_operand:VI_QHS 2 "vector_merge_operand")))]
+  "TARGET_VECTOR
+ && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
+  [(const_int 0)]
+  {
+machine_mode mask_mode = riscv_vector::get_mask_mode (mode)
+  .require ();
+emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
+   RVV_VUNDEF (mode), CONST0_RTX (GET_MODE (operands[0])),
+   operands[5], operands[6], operands[7], operands[8]));
+DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
new file mode 100644
index 000..6e4a3d62bc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+#define VMULH_WITH_LMUL(X) \
+  vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
+  size_t vl) {  \
+return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); \
+  }
+
+
+VMULH_WITH_LMUL (1)
+VMULH_WITH_LMUL (2)
+VMULH_WITH_LMUL (4)
+VMULH_WITH_LMUL (8)
+VMULH_WITH_LMUL (f2)
+
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
index 4d2de91bc14..d1473274137 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
@@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x)
 __riscv_vse64_v_i64m1 (out, v3, 4);
}
-/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } 
} */
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */
+/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } 
} */
/* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } 
*/
/* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } 
*/
-- 
2.40.1
 
 


Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.

2023-06-20 Thread juzhe.zh...@rivai.ai
Good catch!
vmulh.vx v24,v24,zero -> vmv.v.i v1,0
can eliminate use of v24 and reduce register pressure.
 
But I wonder why you pick only VI_QHS?

+  [(set (match_operand:VI_QHS 0 "register_operand")

SEW = 64 should always have such optimization.

Thanks.


juzhe.zh...@rivai.ai
 
From: yanzhang.wang
Date: 2023-06-21 14:08
To: gcc-patches
CC: juzhe.zhong; kito.cheng; pan2.li; yanzhang.wang
Subject: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
From: Yanzhang Wang 
 
This patch will optimize the below mulh example,
 
vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
  return __riscv_vmulh_vx_i32m1(v1, 0, vl);
}
 
from mulh pattern
 
vsetvli   zero, a2, e32, m1, ta, ma
vmulh.vx  v24, v24, zero
vs1r.vv24, 0(a0)
 
to below vmv.
 
vsetvli zero,a2,e32,m1,ta,ma
vmv.v.i v1,0
vs1r.v  v1,0(a0)
 
It will elimate the mul with const 0 instruction to the simple mov
instruction.
 
Signed-off-by: Yanzhang Wang 
 
gcc/ChangeLog:
 
* config/riscv/autovec-opt.md: Add a split pattern.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
  with 0 will be simplified to vmv.v.i.
* gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
---
gcc/config/riscv/autovec-opt.md   | 30 +++
.../riscv/rvv/autovec/vmulh-with-zero.cc  | 19 
.../riscv/rvv/base/binop_vx_constraint-121.c  |  3 +-
3 files changed, 51 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
 
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 28040805b23..9c14be964b5 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -405,3 +405,33 @@
   "vmv.x.s\t%0,%1"
   [(set_attr "type" "vimovvx")
(set_attr "mode" "")])
+
+;; Simplify VMULH (V, 0) Instructions to vmv.v.i.
+(define_split
+  [(set (match_operand:VI_QHS 0 "register_operand")
+ (if_then_else:VI_QHS
+   (unspec:
+ [(match_operand: 1 "vector_all_trues_mask_operand")
+   (match_operand 5 "vector_length_operand")
+   (match_operand 6 "const_int_operand")
+   (match_operand 7 "const_int_operand")
+   (match_operand 8 "const_int_operand")
+   (reg:SI VL_REGNUM)
+   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+   (unspec:VI_QHS
+ [(vec_duplicate:VI_QHS
+(match_operand: 4 "reg_or_0_operand"))
+   (match_operand:VI_QHS 3 "register_operand")] VMULH)
+   (match_operand:VI_QHS 2 "vector_merge_operand")))]
+  "TARGET_VECTOR
+ && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
+  [(const_int 0)]
+  {
+machine_mode mask_mode = riscv_vector::get_mask_mode (mode)
+  .require ();
+emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
+   RVV_VUNDEF (mode), CONST0_RTX (GET_MODE (operands[0])),
+   operands[5], operands[6], operands[7], operands[8]));
+DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
new file mode 100644
index 000..6e4a3d62bc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+#define VMULH_WITH_LMUL(X) \
+  vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
+  size_t vl) {  \
+return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); \
+  }
+
+
+VMULH_WITH_LMUL (1)
+VMULH_WITH_LMUL (2)
+VMULH_WITH_LMUL (4)
+VMULH_WITH_LMUL (8)
+VMULH_WITH_LMUL (f2)
+
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
index 4d2de91bc14..d1473274137 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
@@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x)
 __riscv_vse64_v_i64m1 (out, v3, 4);
}
-/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } 
} */
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */
+/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } 
} */
/* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } 
*/
/* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } 
*/
-- 
2.40.1
 
 


[PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.

2023-06-20 Thread yanzhang.wang--- via Gcc-patches
From: Yanzhang Wang 

This patch will optimize the below mulh example,

vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
  return __riscv_vmulh_vx_i32m1(v1, 0, vl);
}

from mulh pattern

vsetvli   zero, a2, e32, m1, ta, ma
vmulh.vx  v24, v24, zero
vs1r.vv24, 0(a0)

to below vmv.

vsetvli zero,a2,e32,m1,ta,ma
vmv.v.i v1,0
vs1r.v  v1,0(a0)

It will elimate the mul with const 0 instruction to the simple mov
instruction.

Signed-off-by: Yanzhang Wang 

gcc/ChangeLog:

* config/riscv/autovec-opt.md: Add a split pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
  with 0 will be simplified to vmv.v.i.
* gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
---
 gcc/config/riscv/autovec-opt.md   | 30 +++
 .../riscv/rvv/autovec/vmulh-with-zero.cc  | 19 
 .../riscv/rvv/base/binop_vx_constraint-121.c  |  3 +-
 3 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 28040805b23..9c14be964b5 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -405,3 +405,33 @@
   "vmv.x.s\t%0,%1"
   [(set_attr "type" "vimovvx")
(set_attr "mode" "")])
+
+;; Simplify VMULH (V, 0) Instructions to vmv.v.i.
+(define_split
+  [(set (match_operand:VI_QHS 0 "register_operand")
+   (if_then_else:VI_QHS
+ (unspec:
+   [(match_operand: 1 "vector_all_trues_mask_operand")
+ (match_operand 5 "vector_length_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VI_QHS
+   [(vec_duplicate:VI_QHS
+  (match_operand: 4 "reg_or_0_operand"))
+ (match_operand:VI_QHS 3 "register_operand")] VMULH)
+ (match_operand:VI_QHS 2 "vector_merge_operand")))]
+  "TARGET_VECTOR
+ && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
+  [(const_int 0)]
+  {
+machine_mode mask_mode = riscv_vector::get_mask_mode (mode)
+  .require ();
+emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
+ RVV_VUNDEF (mode), CONST0_RTX (GET_MODE (operands[0])),
+ operands[5], operands[6], operands[7], operands[8]));
+DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
new file mode 100644
index 000..6e4a3d62bc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+#define VMULH_WITH_LMUL(X) \
+  vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
+size_t vl) {  \
+return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); \
+  }
+
+
+VMULH_WITH_LMUL (1)
+VMULH_WITH_LMUL (2)
+VMULH_WITH_LMUL (4)
+VMULH_WITH_LMUL (8)
+VMULH_WITH_LMUL (f2)
+
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
index 4d2de91bc14..d1473274137 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
@@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x)
 __riscv_vse64_v_i64m1 (out, v3, 4);
 }
 
-/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } 
} */
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */
+/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } 
} */
 /* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } 
} */
 /* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } 
} */
-- 
2.40.1



[PATCH v2] x86: make better use of VBROADCASTSS / VPBROADCASTD

2023-06-20 Thread Jan Beulich via Gcc-patches
... in vec_dupv4sf / *vec_dupv4si. The respective broadcast insns are
never longer (yet sometimes shorter) than the corresponding VSHUFPS /
VPSHUFD, due to the immediate operand of the shuffle insns balancing the
possible need for VEX3 in the broadcast ones. When EVEX encoding is
required the broadcast insns are always shorter.

Add new alternatives to cover the AVX2 and AVX512 cases as appropriate.

gcc/

* config/i386/sse.md (vec_dupv4sf): Make first alternative use
vbroadcastss for AVX2. New AVX512F alternative.
(*vec_dupv4si): New AVX2 and AVX512F alternatives using
vpbroadcastd.
---
Especially with the added "enabled" attribute I didn't really see how to
(further) fold alternatives 0 and 1. Instead *vec_dupv4si might benefit
from using sse2_noavx2 instead of sse2 for alternative 2, except that
there is no sse2_noavx2, only sse2_noavx.

Is there a reason why vec_dupv4sf uses sseshuf1 for its shuffle
alternatives, but *vec_dupv4si uses sselog1? I'd be happy to correct
this in whichever is the appropriate direction, while touching this
anyway.

I'm working from the assumption that the isa attributes to the original
1st and 2nd alternatives don't need further restricting (to sse2_noavx2
or avx_noavx2 as applicable), as the new earlier alternatives cover all
operand forms already when at least AVX2 is enabled.

Isn't prefix_extra use bogus here? What extra prefix does vbroadcastss
use? (Same further down in *vec_dupv4si and avx2_vbroadcasti128_
and elsewhere.)
---
v2: Correct operand constraints. Respect -mprefer-vector-width=. Fold
two alternatives of vec_dupv4sf.

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -26141,41 +26141,64 @@
(const_int 1)))])
 
 (define_insn "vec_dupv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
+  [(set (match_operand:V4SF 0 "register_operand" "=v,v,v,x")
(vec_duplicate:V4SF
- (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
+ (match_operand:SF 1 "nonimmediate_operand" "Yv,v,m,0")))]
   "TARGET_SSE"
   "@
-   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
+   * return TARGET_AVX2 ? \"vbroadcastss\t{%1, %0|%0, %1}\" : \"vshufps\t{$0, 
%d1, %0|%0, %d1, 0}\";
+   vbroadcastss\t{%1, %g0|%g0, %1}
vbroadcastss\t{%1, %0|%0, %1}
shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "avx,avx,noavx")
-   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_evex,maybe_evex,orig")
-   (set_attr "mode" "V4SF")])
+  [(set_attr "isa" "avx,*,avx,noavx")
+   (set (attr "type")
+   (cond [(and (eq_attr "alternative" "0")
+   (match_test "!TARGET_AVX2"))
+(const_string "sseshuf1")
+  (eq_attr "alternative" "3")
+(const_string "sseshuf1")
+ ]
+ (const_string "ssemov")))
+   (set (attr "length_immediate")
+   (if_then_else (eq_attr "type" "sseshuf1")
+ (const_string "1")
+ (const_string "0")))
+   (set_attr "prefix_extra" "0,0,1,*")
+   (set_attr "prefix" "maybe_evex,evex,maybe_evex,orig")
+   (set_attr "mode" "V4SF,V16SF,V4SF,V4SF")
+   (set (attr "enabled")
+   (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
+  && !TARGET_PREFER_AVX256")
+ (const_string "*")))])
 
 (define_insn "*vec_dupv4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=v,v,v,v,x")
(vec_duplicate:V4SI
- (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
+ (match_operand:SI 1 "nonimmediate_operand" "Yvm,v,Yv,m,0")))]
   "TARGET_SSE"
   "@
+   vpbroadcastd\t{%1, %0|%0, %1}
+   vpbroadcastd\t{%1, %g0|%g0, %1}
%vpshufd\t{$0, %1, %0|%0, %1, 0}
vbroadcastss\t{%1, %0|%0, %1}
shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "sse2,avx,noavx")
-   (set_attr "type" "sselog1,ssemov,sselog1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
-   (set_attr "mode" "TI,V4SF,V4SF")
+  [(set_attr "isa" "avx2,*,sse2,avx,noavx")
+   (set_attr "type" "ssemov,ssemov,sselog1,ssemov,sselog1")
+   (set_attr "length_immediate" "0,0,1,0,1")
+   (set_attr "prefix_extra" "0,0,0,1,*")
+   (set_attr "prefix" "maybe_evex,evex,maybe_vex,maybe_evex,orig")
+   (set_attr "mode" "TI,XI,TI,V4SF,V4SF")
(set (attr "preferred_for_speed")
- (cond [(eq_attr "alternative" "1")
+ (cond [(eq_attr "alternative" "3")
  (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC")
   ]
-  (symbol_ref "true")))])
+  (symbol_ref "true")))
+   (set (attr "enabled")
+   (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
+

[PATCH] x86: add -mprefer-vector-width=512 to new avx512f-dupv2di.c testcase

2023-06-20 Thread Jan Beulich via Gcc-patches
This is to cover testing also being done with -march=cascadelake.
---
Committing as obvious.

--- a/gcc/testsuite/gcc.target/i386/avx512f-dupv2di.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-dupv2di.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-mavx512f -mno-avx512vl -O2" } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
 /* { dg-final { scan-assembler-not "vmovddup\[^\n\]*%xmm16" } } */
 
 typedef long long __attribute__ ((vector_size (16))) v2di;



[PATCH] Update array address space in c_build_qualified_type

2023-06-20 Thread SenthilKumar.Selvaraj--- via Gcc-patches
Hi,

  When c-typeck.cc:c_build_qualified_type builds an array type
  from its element type, it does not copy the address space of
  the element type to the array type itself. This is unlike
  tree.cc:build_array_type_1, which explicitly does

TYPE_ADDR_SPACE (t) = TYPE_ADDR_SPACE (elt_type);

  This causes the ICE described in
  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86869.

struct S {
  char y[2];
};

extern const __memx  struct S *s;

extern void bar(const __memx void*);

void foo(void) {
  bar(&s->y);
}

  build_component_ref calls c_build_qualified_type, passing in the
  array type and quals including the address space (ADDR_SPACE_MEMX
  in this case). Because of this missing address space copy, the
  returned array type remains in the generic address space.  Later
  down the line, expand_expr_addr_expr detects the mismatch in
  address space/mode and tries to convert, and that leads to the
  ICE described in the bug.

  This patch sets the address space of the array type to that of the
  element type.

  Regression tests for avr look ok. Ok for trunk?

Regards
Senthil

PR 86869

gcc/c/ChangeLog:

* c-typeck.cc (c_build_qualified_type): Set
TYPE_ADDR_SPACE for ARRAY_TYPE.

gcc/testsuite/ChangeLog:

* gcc.target/avr/pr86869.c: New test.

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 22e240a3c2a..d4ab1d1bd46 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -16284,6 +16284,7 @@ c_build_qualified_type (tree type, int type_quals, tree 
orig_qual_type,
 
  t = build_variant_type_copy (type);
  TREE_TYPE (t) = element_type;
+ TYPE_ADDR_SPACE (t) = TYPE_ADDR_SPACE (element_type);
 
   if (TYPE_STRUCTURAL_EQUALITY_P (element_type)
   || (domain && TYPE_STRUCTURAL_EQUALITY_P (domain)))
diff --git a/gcc/testsuite/gcc.target/avr/pr86869.c 
b/gcc/testsuite/gcc.target/avr/pr86869.c
new file mode 100644
index 000..54cd984276e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/pr86869.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+
+extern void bar(const __memx void* p);
+
+struct S {
+  char y[2];
+};
+extern const __memx struct S *s;
+
+void foo(void) {
+  bar(&s->y);
+}


[PATCH] Refine maskloadmn pattern with UNSPEC_MASKLOAD.

2023-06-20 Thread liuhongt via Gcc-patches
If mem_addr points to a memory region with less than whole vector size
bytes of accessible memory and k is a mask that would prevent reading
the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent
it to be transformed to vpblendd.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to master.

gcc/ChangeLog:

PR target/110309
* config/i386/sse.md (maskload):
Refine pattern with UNSPEC_MASKLOAD.
(maskload): Ditto.
(*_load_mask): Extend mode iterator to
VI12HFBF_AVX512VL.
(*_load): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr110309.c: New test.
---
 gcc/config/i386/sse.md   | 32 +---
 gcc/testsuite/gcc.target/i386/pr110309.c | 10 
 2 files changed, 28 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 87570357db6..4d1f7ac8d7e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1465,12 +1465,12 @@ (define_expand "_load_mask"
 })
 
 (define_insn "*_load_mask"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
-   (vec_merge:VI12_AVX512VL
- (unspec:VI12_AVX512VL
-   [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+   (vec_merge:VI12HFBF_AVX512VL
+ (unspec:VI12HFBF_AVX512VL
+   [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
UNSPEC_MASKLOAD)
- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
+ (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand" "0C")
  (match_operand: 3 "register_operand" "Yk")))]
   "TARGET_AVX512BW"
   "vmovdqu\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
@@ -1479,9 +1479,9 @@ (define_insn "*_load_mask"
(set_attr "mode" "")])
 
 (define_insn_and_split "*_load"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
-   (unspec:VI12_AVX512VL
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+   (unspec:VI12HFBF_AVX512VL
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
  UNSPEC_MASKLOAD))]
   "TARGET_AVX512BW"
   "#"
@@ -26883,17 +26883,21 @@ (define_expand "maskload"
   "TARGET_AVX")
 
 (define_expand "maskload"
-  [(set (match_operand:V48H_AVX512VL 0 "register_operand")
-   (vec_merge:V48H_AVX512VL
- (match_operand:V48H_AVX512VL 1 "memory_operand")
+  [(set (match_operand:V48_AVX512VL 0 "register_operand")
+   (vec_merge:V48_AVX512VL
+ (unspec:V48_AVX512VL
+   [(match_operand:V48_AVX512VL 1 "memory_operand")]
+   UNSPEC_MASKLOAD)
  (match_dup 0)
  (match_operand: 2 "register_operand")))]
   "TARGET_AVX512F")
 
 (define_expand "maskload"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
-   (vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "memory_operand")
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand")
+   (vec_merge:VI12HFBF_AVX512VL
+ (unspec:VI12HFBF_AVX512VL
+   [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
+   UNSPEC_MASKLOAD)
  (match_dup 0)
  (match_operand: 2 "register_operand")))]
   "TARGET_AVX512BW")
diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c 
b/gcc/testsuite/gcc.target/i386/pr110309.c
new file mode 100644
index 000..f6e9e9c3c61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110309.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 
-mprefer-vector-width=256" } */
+/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */
+
+
+void foo (int * __restrict a, int *b)
+{
+  for (int i = 0; i < 6; ++i)
+a[i] = b[i] + 42;
+}
-- 
2.39.1.388.g2fc9e9ca3c



Re: [pushed] aarch64: Robustify stack tie handling

2023-06-20 Thread Jeff Law via Gcc-patches




On 6/20/23 14:49, Richard Sandiford via Gcc-patches wrote:

The SVE handling of stack clash protection copied the stack
pointer to X11 before the probe and set up X11 as the CFA
for unwind purposes:

 /* This is done to provide unwinding information for the stack
adjustments we're about to do, however to prevent the optimizers
from removing the R11 move and leaving the CFA note (which would be
very wrong) we tie the old and new stack pointer together.
The tie will expand to nothing but the optimizers will not touch
the instruction.  */
 rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
 emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
 emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));

 /* We want the CFA independent of the stack pointer for the
duration of the loop.  */
 add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
 RTX_FRAME_RELATED_P (insn) = 1;

-fcprop-registers is now smart enough to realise that X11 = SP,
replace X11 with SP in the stack tie, and delete the instruction
created above.

This patch tries to prevent that by making stack_tie fussy about
the register numbers.  It fixes failures in
gcc.target/aarch64/sve/pcs/stack_clash*.c.

Tested on aarch64-linux-gnu & pushed.
Thanks for taking care of this.  It was a bit surprising that we ran 
into these problems given that we were allowing regcprop to propagate 
sp->gpr copies a few years back and we're just re-enabling that 
capability in the safe cases.  Presumably this stack tie went in after 
we'd disabled propagating away sp->gpr copies.


Jeff


Re: [gofrontend-dev] Re: libgo patch committed: Use a C function to call mmap

2023-06-20 Thread Cherry Mui via Gcc-patches
On Tue, Jun 20, 2023 at 3:37 PM Ian Lance Taylor  wrote:

> On Tue, Jun 20, 2023 at 11:35 AM Andreas Schwab 
> wrote:
> >
> > On Jun 20 2023, Ian Lance Taylor via Gcc-patches wrote:
> >
> > > This libgo patches changes the runtime pacakge to use a C function to
> call mmap.
> > >
> > > The final argument to mmap, of type off_t, varies. In
> > > https://go.dev/cl/445375
> > > (https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604158.html)
> > > we changed it to always use the C off_t type, but that broke 32-bit
> > > big-endian Linux systems.
> >
> > This has nothing to do with big-endian, armv7 isn't big-endian.
>
> OK, but I think that it does have something to do with big-endian.
> The bug was that on some 32-bit systems it was passing a 64-bit value
> to a function that expected a 32-bit value.  The problem didn't show
> up on 32-bit x86 because it is little-endian, and did show up on
> 32-bit PPC because it is big-endian.  I guess the armv7 case was
> failing for a different reason.


I think there is a calling convention issue. On 32-bit ARM, for the case of
mmap, if the last argument is 32-bit, it is passed 4 bytes at sp+4. If it
is 64-bit, the offset is aligned and it is stored as 8 bytes at sp+8. So if
the callee tries to read at sp+4, it gets the wrong value, even for little
endian. On 32-bit x86 it doesn't seem to have that alignment padding.


Re: [PATCH] RISC-V: Implement autovec copysign.

2023-06-20 Thread 钟居哲
You should remove all "unspec" related of "n" ncopysign including 
riscv-vector-builtins-bases.cc
vector.md/ vector-iterators.md 



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-06-20 20:47
To: gcc-patches; palmer; Kito Cheng; juzhe.zh...@rivai.ai; jeffreyalaw
CC: rdapp.gcc
Subject: [PATCH] RISC-V: Implement autovec copysign.
Hi,
 
this adds vector copysign, ncopysign and xorsign as well as the
accompanying tests.
 
In order to easily match the ncopysign patterns I changed the
builtin implementation slightly.  Juzhe might want to comment
on that.  For now I kept the  attribute's name even though
it doesn't emit an "n" anymore.
 
Regards
Robin
 
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (copysign3): Add expander.
(xorsign3): Dito.
* config/riscv/riscv-vector-builtins-bases.cc (class vfsgnjn):
New class.
* config/riscv/vector-iterators.md (copysign): Remove ncopysign.
(xorsign): Dito.
(n): Dito.
(x): Dito.
* config/riscv/vector.md (@pred_ncopysign): Split off.
(@pred_ncopysign_scalar): Dito.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/binop/copysign-run.c: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-rv64gcv.c: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-rv32gcv.c: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-template.h: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-zvfh-run.c: New test.
---
gcc/config/riscv/autovec.md   | 43 +
.../riscv/riscv-vector-builtins-bases.cc  | 18 +++-
gcc/config/riscv/vector-iterators.md  |  9 +-
gcc/config/riscv/vector.md| 43 +
.../riscv/rvv/autovec/binop/copysign-run.c| 89 +++
.../rvv/autovec/binop/copysign-rv32gcv.c  |  8 ++
.../rvv/autovec/binop/copysign-rv64gcv.c  |  8 ++
.../rvv/autovec/binop/copysign-template.h | 78 
.../rvv/autovec/binop/copysign-zvfh-run.c | 83 +
9 files changed, 371 insertions(+), 8 deletions(-)
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-run.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-rv32gcv.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-rv64gcv.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-template.h
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-zvfh-run.c
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f1641d7e1ea..f2e69aaf102 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -804,3 +804,46 @@ (define_expand "3"
riscv_vector::RVV_BINOP, operands);
   DONE;
})
+
+;; 
---
+;;  [FP] Sign copying
+;; 
---
+;; Includes:
+;; - vfsgnj.vv/vfsgnjn.vv
+;; - vfsgnj.vf/vfsgnjn.vf
+;; 
---
+
+;; Leave the pattern like this as to still allow combine to match
+;; a negated copysign (see vector.md) before adding the UNSPEC_VPREDICATE 
later.
+(define_insn_and_split "copysign3"
+  [(set (match_operand:VF 0 "register_operand"  "=vd, vd, vr, vr")
+(unspec:VF
+ [(match_operand:VF 1 "register_operand"" vr, vr, vr, vr")
+ (match_operand:VF 2 "register_operand" " vr, vr, vr, vr")] 
UNSPEC_VCOPYSIGN))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  riscv_vector::emit_vlmax_insn (code_for_pred (UNSPEC_VCOPYSIGN, mode),
+ riscv_vector::RVV_BINOP, operands);
+  DONE;
+}
+  [(set_attr "type" "vfsgnj")
+   (set_attr "mode" "")])
+
+;; 
---
+;; Includes:
+;; - vfsgnjx.vv
+;; - vfsgnjx.vf
+;; 
---
+(define_expand "xorsign3"
+  [(match_operand:VF_AUTO 0 "register_operand")
+(match_operand:VF_AUTO 1 "register_operand")
+(match_operand:VF_AUTO 2 "register_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_vlmax_insn (code_for_pred (UNSPEC_VXORSIGN, mode),
+ riscv_vector::RVV_BINOP, operands);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index c6c53dc13a5..0313986f6b9 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1212,7 +1212,7 @@ public:
   }
};
-/* Implements vfsqrt7/vfrec7/vfclass/vfsgnj/vfsgnjn/vfsgnjx.  */
+/* Implements vfsqrt7/vfrec7/vfclass/vfsgnj/vfsgnjx.  */
template
class float_misc : public function_base
{
@@ -1227,6 +1227,20 @@ public:
   }
};
+/* Implements vfsgnjn.  */
+class vfsgnjn : public function_base
+{
+public:
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_exact_ins

Re: [Patch, fortran] PR107900 Select type with intrinsic type inside associate causes ICE / Segmenation fault

2023-06-20 Thread Harald Anlauf via Gcc-patches

Hi Paul,

On 6/20/23 12:54, Paul Richard Thomas via Gcc-patches wrote:

Hi Harald,

Fixing the original testcase in this PR turned out to be slightly more
involved than I expected. However, it resulted in an open door to fix
some other PRs and the attached much larger patch.

This time, I did remember to include the testcases in the .diff :-)


indeed! :-)

I've only had a superficial look so far although it looks very good.
(I have to trust your experience with unlimited polymorphism.)

However, I was wondering about the following helper function:

+bool
+gfc_is_ptr_fcn (gfc_expr *e)
+{
+  return e != NULL && e->expr_type == EXPR_FUNCTION
+ && (gfc_expr_attr (e).pointer
+ || (e->ts.type == BT_CLASS
+ && CLASS_DATA (e)->attr.class_pointer));
+}
+
+
 /* Copy a shape array.  */

Is there a case where gfc_expr_attr (e).pointer returns false
and you really need the || part?  Looking at gfc_expr_attr
and the present context, it might just not be necessary.


I believe that, between the Change.Logs and the comments, it is
reasonably self-explanatory.

OK for trunk?


OK from my side.

Thanks for the patch!

Harald


Regards

Paul

Fortran: Fix some bugs in associate [PR87477]

2023-06-20  Paul Thomas  

gcc/fortran
PR fortran/87477
PR fortran/88688
PR fortran/94380
PR fortran/107900
PR fortran/110224
* decl.cc (char_len_param_value): Fix memory leak.
(resolve_block_construct): Remove unnecessary static decls.
* expr.cc (gfc_is_ptr_fcn): New function.
(gfc_check_vardef_context): Use it to permit pointer function
result selectors to be used for associate names in variable
definition context.
* gfortran.h: Prototype for gfc_is_ptr_fcn.
* match.cc (build_associate_name): New function.
(gfc_match_select_type): Use the new function to replace inline
version and to build a new associate name for the case where
the supplied associate name is already used for that purpose.
* resolve.cc (resolve_assoc_var): Call gfc_is_ptr_fcn to allow
associate names with pointer function targets to be used in
variable definition context.
* trans-decl.cc (gfc_get_symbol_decl): Unlimited polymorphic
variables need deferred initialisation of the vptr.
(gfc_trans_deferred_vars): Do the vptr initialisation.
* trans-stmt.cc (trans_associate_var): Ensure that a pointer
associate name points to the target of the selector and not
the selector itself.

gcc/testsuite/
PR fortran/87477
PR fortran/107900
* gfortran.dg/pr107900.f90 : New test

PR fortran/110224
* gfortran.dg/pr110224.f90 : New test

PR fortran/88688
* gfortran.dg/pr88688.f90 : New test

PR fortran/94380
* gfortran.dg/pr94380.f90 : New test

PR fortran/95398
* gfortran.dg/pr95398.f90 : Set -std=f2008, bump the line
numbers in the error tests by two and change the text in two.





Re: libgo patch committed: Use a C function to call mmap

2023-06-20 Thread Andreas Schwab
On Jun 20 2023, Ian Lance Taylor wrote:

> OK, but I think that it does have something to do with big-endian.
> The bug was that on some 32-bit systems it was passing a 64-bit value
> to a function that expected a 32-bit value.  The problem didn't show
> up on 32-bit x86 because it is little-endian, and did show up on
> 32-bit PPC because it is big-endian.  I guess the armv7 case was
> failing for a different reason.

Not failing is no proof for correctness.  It fails everywhere for the
same reason.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


[pushed] aarch64: Fix gcc.target/aarch64/sve/pcs failures

2023-06-20 Thread Richard Sandiford via Gcc-patches
Several gcc.target/aarch64/sve/pcs tests started failing after
6a2e8dcbbd4, because the tests weren't robust against whether
an indirect argument register or the stack pointer was used as
the base for stores.

The patch allows either base register when there is only one
indirect argument.  It disables -fcprop-registers in cases where
there are sometimes multiple indirect arguments, since the name
of the argument register is then an important part of the test.

Disabling -fcprop-registers gives poor final register allocation,
since:

* combine's make_more_copies hack adds extra redundant moves
* code with those moves is not allocated as well as moves without them
* we often rely on -fcprop-registers to clean up the allocation later

The patch therefore disables combine in the same tests as
cprop-registers.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/testsuite/
* gcc.target/aarch64/sve/pcs/args_1.c: Match moves from the stack
pointer to indirect argument registers and allow either to be used
as the base register in subsequent stores.
* gcc.target/aarch64/sve/pcs/args_8.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_2.c: Allow the store of the
indirect argument to happen via the argument register or the
stack pointer.
* gcc.target/aarch64/sve/pcs/args_3.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_4.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_bf16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_f16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_f32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_f64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_s16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_s32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_s64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_s8.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_u16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_u32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_u64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_be_u8.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_bf16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_f16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_f32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_f64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_s16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_s32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_s64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_s8.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_u16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_u32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_u64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_5_le_u8.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_bf16.c: Disable
-fcprop-registers and combine.
* gcc.target/aarch64/sve/pcs/args_6_be_f16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_f32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_f64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_s16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_s32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_s64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_s8.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_u16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_u32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_u64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_be_u8.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_bf16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_f16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_f32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_f64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_s16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_s32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_s64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_s8.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_u16.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_u32.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_u64.c: Likewise.
* gcc.target/aarch64/sve/pcs/args_6_le_u8.c: Likewise.
* gcc.target/aarch64/sve/pcs/varargs_1.c: Likewise.
* gcc.target/aarch64/sve/pcs/varargs_2_f16.c: Likewise.
* gcc.target/aarch64/sve/pcs/varargs_2_f32.c: Likewise.
* gcc.target/aarch64/sve/pcs/varargs_2_f64.c: Likewise.
* gcc.target/aarch64/sve/pcs/varargs_2_s16.c: Likewise.
* gcc.target/aarch64/sve/pcs/varargs_2_s32.c: Likewise.
* gcc.target/aarch64/sve/pcs/varargs_2_s64.c: Likewise.
* gcc.target/aarch64/sve/pc

[pushed] aarch64: Robustify stack tie handling

2023-06-20 Thread Richard Sandiford via Gcc-patches
The SVE handling of stack clash protection copied the stack
pointer to X11 before the probe and set up X11 as the CFA
for unwind purposes:

/* This is done to provide unwinding information for the stack
   adjustments we're about to do, however to prevent the optimizers
   from removing the R11 move and leaving the CFA note (which would be
   very wrong) we tie the old and new stack pointer together.
   The tie will expand to nothing but the optimizers will not touch
   the instruction.  */
rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));

/* We want the CFA independent of the stack pointer for the
   duration of the loop.  */
add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
RTX_FRAME_RELATED_P (insn) = 1;

-fcprop-registers is now smart enough to realise that X11 = SP,
replace X11 with SP in the stack tie, and delete the instruction
created above.

This patch tries to prevent that by making stack_tie fussy about
the register numbers.  It fixes failures in
gcc.target/aarch64/sve/pcs/stack_clash*.c.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
* config/aarch64/aarch64.md (stack_tie): Hard-code the first
register operand to the stack pointer.  Require the second register
operand to have the number specified in a separate const_int operand.
* config/aarch64/aarch64.cc (aarch64_emit_stack_tie): New function.
(aarch64_allocate_and_probe_stack_space): Use it.
(aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
(aarch64_expand_epilogue): Likewise.
---
 gcc/config/aarch64/aarch64.cc | 18 ++
 gcc/config/aarch64/aarch64.md |  7 ---
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index ee37ceaa255..b99f12c99e9 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9664,6 +9664,16 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
   return STACK_CLASH_CALLER_GUARD;
 }
 
+/* Emit a stack tie that acts as a scheduling barrier for all previous and
+   subsequent memory accesses and that requires the stack pointer and REG
+   to have their current values.  REG can be stack_pointer_rtx if no
+   other register's value needs to be fixed.  */
+
+static void
+aarch64_emit_stack_tie (rtx reg)
+{
+  emit_insn (gen_stack_tie (reg, gen_int_mode (REGNO (reg), DImode)));
+}
 
 /* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
registers.  If POLY_SIZE is not large enough to require a probe this 
function
@@ -9776,7 +9786,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
 the instruction.  */
  rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
  emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
- emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
+ aarch64_emit_stack_tie (stack_ptr_copy);
 
  /* We want the CFA independent of the stack pointer for the
 duration of the loop.  */
@@ -10145,7 +10155,7 @@ aarch64_expand_prologue (void)
  aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1],
  hard_frame_pointer_rtx, 0);
}
-  emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
+  aarch64_emit_stack_tie (hard_frame_pointer_rtx);
 }
 
   aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
@@ -10248,7 +10258,7 @@ aarch64_expand_epilogue (bool for_sibcall)
   || cfun->calls_alloca
   || crtl->calls_eh_return)
 {
-  emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+  aarch64_emit_stack_tie (stack_pointer_rtx);
   need_barrier_p = false;
 }
 
@@ -10287,7 +10297,7 @@ aarch64_expand_epilogue (bool for_sibcall)
callee_adjust != 0, &cfi_ops);
 
   if (need_barrier_p)
-emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+aarch64_emit_stack_tie (stack_pointer_rtx);
 
   if (callee_adjust != 0)
 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 25f7905c6a0..01cf989641f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7325,10 +7325,11 @@ (define_insn "tlsdesc_small_sve_"
 
 (define_insn "stack_tie"
   [(set (mem:BLK (scratch))
-   (unspec:BLK [(match_operand:DI 0 "register_operand" "rk")
-(match_operand:DI 1 "register_operand" "rk")]
+   (unspec:BLK [(reg:DI SP_REGNUM)
+(match_operand:DI 0 "register_operand" "rk")
+(match_operand:DI 1 "const_int_operand")]
UNSPEC_PRLG_STK))]
-  ""
+  "REGNO (operands[0]) =

Re: [PATCH] tree-optimization/110243 - kill off IVOPTs split_offset

2023-06-20 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> On Mon, 19 Jun 2023, Richard Sandiford wrote:
>
>> Jeff Law  writes:
>> > On 6/16/23 06:34, Richard Biener via Gcc-patches wrote:
>> >> IVOPTs has strip_offset which suffers from the same issues regarding
>> >> integer overflow that split_constant_offset did but the latter was
>> >> fixed quite some time ago.  The following implements strip_offset
>> >> in terms of split_constant_offset, removing the redundant and
>> >> incorrect implementation.
>> >> 
>> >> The implementations are not exactly the same, strip_offset relies
>> >> on ptrdiff_tree_p to fend off too large offsets while 
>> >> split_constant_offset
>> >> simply assumes those do not happen and truncates them.  By
>> >> the same means strip_offset also handles POLY_INT_CSTs but
>> >> split_constant_offset does not.  Massaging the latter to
>> >> behave like strip_offset in those cases might be the way to go?
>> >> 
>> >> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>> >> 
>> >> Comments?
>> >> 
>> >> Thanks,
>> >> Richard.
>> >> 
>> >>   PR tree-optimization/110243
>> >>   * tree-ssa-loop-ivopts.cc (strip_offset_1): Remove.
>> >>   (strip_offset): Make it a wrapper around split_constant_offset.
>> >> 
>> >>   * gcc.dg/torture/pr110243.c: New testcase.
>> > Your call -- IMHO you know this code far better than I.
>> 
>> +1, but LGTM FWIW.  I couldn't see anything obvious (and valid)
>> that split_offset_1 handles and split_constant_offset doesn't.
>
> I think it's only the INTEGER_CST vs. ptrdiff_tree_p where the
> latter (used in split_offset_1) handles POLY_INT_CSTs.  split_offset
> also computes the offset in poly_int64 and checks it fits
> (to some extent) while split_constant_offset simply converts all
> INTEGER_CSTs to ssizetype because it knows it starts from addresses
> only.
>
> An alternative fix would have been to rewrite signed arithmetic
> to unsigned in strip_offset_1.
>
> I wonder if we want to change split_constant_offset to record the
> offset in a poly_int64 and have a wrapper converting it back to
> a tree for data-ref analysis.

Sounds a good idea if it's easily doable.

> Then we can at least put cst_and_fits_in_hwi checks in the code?

What would they be protecting against, if we're dealing with
address arithmetic?

> The code also tracks a range so it doesn't look like handling
> POLY_INT_CSTs is easy there - do you remember whether that was
> important for IVOPTs?

Got to admit that:

tree
strip_offset (tree expr, poly_uint64_pod *offset)
{
  poly_int64 off;
  tree core = strip_offset_1 (expr, false, false, &off);
  if (!off.is_constant ())
{
  core = expr;
  off = 0;
}
  *offset = off;
  return core;
}

doesn't seem to trigger any testsuite failures from a quick test
(but not a full regtest).

Thanks,
Richard


Re: [committed] amdgcn: minimal V64TImode vector support

2023-06-20 Thread Thomas Schwinge
Hi!

On 2023-06-19T12:37:52+0100, Andrew Stubbs  wrote:
> This patch adds just enough TImode vector support to use them for moving
> data about.

Andrew tells me this need not be worried about, but -- for my future self
searching email archives for FAILs/ICEs -- I'd like to at least document
here that commit 8aeabd9f63d8a54a5fa0b038ad4425a999e1cc75
"amdgcn: minimal V64TImode vector support" in '-march=gfx90a' testing
does regress:

[-PASS:-]{+FAIL: gcc.dg/pr78526.c (internal compiler error: in 
extract_insn, at recog.cc:2791)+}
{+FAIL:+} gcc.dg/pr78526.c (test for excess errors)

[...]/gcc/testsuite/gcc.dg/pr78526.c: In function 'foo':
[...]/gcc/testsuite/gcc.dg/pr78526.c:21:1: error: unrecognizable insn:
(insn 41 40 42 8 (set (reg:V4TI 443)
(vec_merge:V4TI (vec_duplicate:V4TI (reg:TI 433))
(reg:V4TI 443)
(ashift (const_int 1 [0x1])
(const_int 0 [0] 
"[...]/gcc/testsuite/gcc.dg/pr78526.c":13:11 -1
 (nil))
during RTL pass: vregs
[...]/gcc/testsuite/gcc.dg/pr78526.c:21:1: internal compiler error: in 
extract_insn, at recog.cc:2791
0x73d9f9 _fatal_insn(char const*, rtx_def const*, char const*, int, char 
const*)
[...]/gcc/rtl-error.cc:108
0x73da7a _fatal_insn_not_found(rtx_def const*, char const*, int, char 
const*)
[...]/gcc/rtl-error.cc:116
0xeb019e extract_insn(rtx_insn*)
[...]/gcc/recog.cc:2791
0xb2683c instantiate_virtual_regs_in_insn
[...]/gcc/function.cc:1611
0xb2683c instantiate_virtual_regs
[...]/gcc/function.cc:1984
0xb2683c execute
[...]/gcc/function.cc:2033

Similarly:

[-PASS:-]{+FAIL: gcc.dg/pr78540.c (internal compiler error: in 
extract_insn, at recog.cc:2791)+}
{+FAIL:+} gcc.dg/pr78540.c (test for excess errors)

[...]/gcc/testsuite/gcc.dg/pr78540.c: In function 'bar':
[...]/gcc/testsuite/gcc.dg/pr78540.c:27:1: error: unrecognizable insn:
(insn 68 67 69 2 (set (reg:V4TI 472)
(vec_merge:V4TI (vec_duplicate:V4TI (reg:TI 464))
(reg:V4TI 472)
(ashift (const_int 1 [0x1])
(reg:SI 474 
"[...]/gcc/testsuite/gcc.dg/pr78540.c":25:21 discrim 1 -1
 (nil))
during RTL pass: vregs
[...]/gcc/testsuite/gcc.dg/pr78540.c:27:1: internal compiler error: in 
extract_insn, at recog.cc:2791
0x73d9f9 _fatal_insn(char const*, rtx_def const*, char const*, int, char 
const*)
[...]/gcc/rtl-error.cc:108
0x73da7a _fatal_insn_not_found(rtx_def const*, char const*, int, char 
const*)
[...]/gcc/rtl-error.cc:116
0xeb019e extract_insn(rtx_insn*)
[...]/gcc/recog.cc:2791
0xb2683c instantiate_virtual_regs_in_insn
[...]/gcc/function.cc:1611
0xb2683c instantiate_virtual_regs
[...]/gcc/function.cc:1984
0xb2683c execute
[...]/gcc/function.cc:2033

Differently:

[-PASS:-]{+FAIL: gcc.dg/pr78575.c (internal compiler error: in 
gen_ds_bpermutevNm, at config/gcn/gcn.cc:1377)+}
{+FAIL:+} gcc.dg/pr78575.cg/ (test for excess errors)

during RTL pass: expand
[...]/gcc/testsuite/gcc.dg/pr78575.c: In function 'foo':
[...]/gcc/testsuite/gcc.dg/pr78575.c:10:1: internal compiler error: in 
gen_ds_bpermutevNm, at config/gcn/gcn.cc:1377
0x1390c33 gen_ds_bpermutevNm
[...]/gcc/config/gcn/gcn.cc:1376
0x13a0f3a gcn_vectorize_vec_perm_const
[...]/gcc/config/gcn/gcn.cc:4867
0xded44b expand_vec_perm_const(machine_mode, rtx_def*, rtx_def*, 
int_vector_builder > const&, machine_mode, rtx_def*)
[...]/gcc/optabs.cc:6456
0xaae98d expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, 
expand_modifier)
[...]/gcc/expr.cc:10446
0x941803 expand_gimple_stmt_1
[...]/gcc/cfgexpand.cc:3984
0x941803 expand_gimple_stmt
[...]/gcc/cfgexpand.cc:4044
0x942eba expand_gimple_basic_block
[...]/gcc/cfgexpand.cc:6096
0x9453d3 execute
[...]/gcc/cfgexpand.cc:6831

That's all.  ;-)


Grüße
 Thomas


> This is primarily for the use of divmodv64di4, which will
> use TImode to return a pair of DImode values.
>
> The TImode vectors have no other operators defined, and there are no
> hardware instructions to support this mode, beyond load and store.
>
> Committed to mainline, and OG13 will follow shortly.
>
> Andrew

> amdgcn: minimal V64TImode vector support
>
> Just enough support for TImode vectors to exist, load, store, move,
> without any real instructions available.
>
> This is primarily for the use of divmodv64di4, which uses TImode to
> return a pair of DImode values.
>
> gcc/ChangeLog:
>
>   * config/gcn/gcn-protos.h (vgpr_4reg_mode_p): New function.
>   * config/gcn/gcn-valu.md (V_4REG, V_4REG_ALT): New iterators.
>   (V_MOV, V_MOV_ALT): Likewise.
>   (scalar_mode, SCALAR_MODE): Add TImode.
>   (vnsi, VnSI, vndi, VnDI)

Re: [Patch, fortran] PR108961 - Segfault when associating to pointer from C_F_POINTER

2023-06-20 Thread Mikael Morin

Le 20/06/2023 à 18:30, Tobias Burnus a écrit :

On 20.06.23 18:19, Paul Richard Thomas via Fortran wrote:


Is there a better way to detect a type(c_ptr) formal argument?

u.derived->intmod_sym_id == ISOCBINDING_PTR ?

&& u.derived->from_intmod == INTMOD_ISO_C_BINDING ?



Re: [PATCH] Add scalar_storage_order support to C++

2023-06-20 Thread Andrew Pinski via Gcc-patches
On Thu, May 25, 2023 at 2:32 AM naveenh--- via Gcc-patches
 wrote:
>
> From: Naveen H S 
>
> This patch adds support scalar_storage_order attribute to C++ front-end.
> It treats the opposite order fields similar as the packed fields are
> treated such that they will not bind to references.
> For arrays, the attributes applies to the inner type rather than the array
> type similar. The code is similar to how it is handled in the C front-end.
>
> 2021-04-03  Andrew Pinski   
>
> Co-authored-by: Naveen H S 

First off, sorry this was sent multiple times to the list, Naveen
didn't know Marvell's internal email SMTP server was delaying the mail
so much; he didn't realize it went through so he sent it again. I let
him know next time to send it and come back in an hour to see if it
made it through for next time.

Ping?

Thanks,
Andrew


>
> gcc/ChangeLog:
>
> * c-family/c-attribs.cc (handle_scalar_storage_order_attribute):
> Do not reject the C++ cases.
> * cp/class.cc (layout_nonempty_base_or_field): Fix the type of
> arrays in C++.
> * cp/call.cc (reference_binding): Treat reversed field similar as
> packed fields.
> (build_temp): Likewise.
> (convert_like_internal): Emit error code for non binding reversed
> endian field.
> * cp/cp-tree.h (clk_implicit_rval) : Add clk_reversed.
> * cp/cp-tree.c (lvalue_kind) : Handle reverse storage ordered 
> operands.
>
> gcc/testsuite/ChangeLog:
>
> * c-c++-common/sso/dump.h: Move from gcc.dg/sso to c-c++-common/sso.
> * c-c++-common/sso/init1.h: Likewise.
> * c-c++-common/sso/init13.h: Likewise.
> * c-c++-common/sso/init2.h: Likewise.
> * c-c++-common/sso/init3.h: Likewise.
> * c-c++-common/sso/init4.h: Likewise.
> * c-c++-common/sso/init5.h: Likewise.
> * c-c++-common/sso/init6.h: Likewise.
> * c-c++-common/sso/init7.h: Likewise.
> * c-c++-common/sso/init8.h: Likewise.
> * c-c++-common/sso/init9.h: Likewise.
> * c-c++-common/sso/p1.c: Likewise.
> * c-c++-common/sso/p13.c: Likewise.
> * c-c++-common/sso/p2.c: Likewise.
> * c-c++-common/sso/p3.c: Likewise.
> * c-c++-common/sso/p4.c: Likewise.
> * c-c++-common/sso/p5.c: Likewise.
> * c-c++-common/sso/p6.c: Likewise.
> * c-c++-common/sso/p7.c: Likewise.
> * c-c++-common/sso/p8.c: Likewise.
> * c-c++-common/sso/p9.c: Likewise.
> * c-c++-common/sso/q1.c: Likewise.
> * c-c++-common/sso/q13.c: Likewise.
> * c-c++-common/sso/q2.c: Likewise.
> * c-c++-common/sso/q3.c: Likewise.
> * c-c++-common/sso/q4.c: Likewise.
> * c-c++-common/sso/q5.c: Likewise.
> * c-c++-common/sso/q6.c: Likewise.
> * c-c++-common/sso/q7.c: Likewise.
> * c-c++-common/sso/q8.c: Likewise.
> * c-c++-common/sso/q9.c: Likewise.
> * c-c++-common/sso/r3.c: Likewise.
> * c-c++-common/sso/r5.c: Likewise.
> * c-c++-common/sso/r6.c: Likewise.
> * c-c++-common/sso/r7.c: Likewise.
> * c-c++-common/sso/r8.c: Likewise.
> * c-c++-common/sso/s3.c: Likewise.
> * c-c++-common/sso/s5.c: Likewise.
> * c-c++-common/sso/s6.c: Likewise.
> * c-c++-common/sso/s7.c: Likewise.
> * c-c++-common/sso/s8.c: Likewise.
> * c-c++-common/sso/t1.c: Likewise.
> * c-c++-common/sso/t13.c: Likewise.
> * c-c++-common/sso/t2.c: Likewise.
> * c-c++-common/sso/t3.c: Likewise.
> * c-c++-common/sso/t4.c: Likewise.
> * c-c++-common/sso/t5.c: Likewise.
> * c-c++-common/sso/t6.c: Likewise.
> * c-c++-common/sso/t7.c: Likewise.
> * c-c++-common/sso/t8.c: Likewise.
> * c-c++-common/sso/t9.c: Likewise.
> * c-c++-common/sso/u5.c: Likewise.
> * c-c++-common/sso/t6.c: Likewise.
> * g++.dg/sso/sso.exp: New file.
> * g++.dg/sso/auto-1.C: New file.
> * g++.dg/sso/auto-2.C: New file.
> * g++.dg/sso/auto-3.C: New file.
> * g++.dg/sso/template-reference-1.C: New file.
> * g++.dg/sso/template-reference-2.C: New file.
> * g++.dg/sso/template-reference-3.C: New file.
> * g++.dg/sso/template-reference-4.C: New file.
> * g++.dg/sso-1.C: Modified.
> ---
>  gcc/c-family/c-attribs.cc |  2 +-
>  gcc/cp/call.cc| 17 ++-
>  gcc/cp/class.cc   | 22 ++
>  gcc/cp/cp-tree.h  |  3 +-
>  gcc/cp/tree.cc|  5 ++-
>  .../{gcc.dg => c-c++-common}/sso/dump.h   |  0
>  .../{gcc.dg => c-c++-common}/sso/init1.h  |  0
>  .../{gcc.dg => c-c++-common}/sso/init13.h |  0
>  .../{gcc.dg => c-c++-common}/sso/init2.h  |  0
>  .../{gcc.dg => c-c++-common}/sso/init3.h  |  0
>  .../{gcc.dg => c-c++-common}/sso/init4.h

Re: [PATCH][gensupport] drop suppport for define_cond_exec from compact syntac

2023-06-20 Thread Richard Sandiford via Gcc-patches
Tamar Christina  writes:
> Hi All,
>
> define_cond_exec does not support the special @@ syntax
> and so can't support {@.  As such just remove support
> for it.
>
> Bootstrapped and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>   PR bootstrap/110324
>   * gensupport.cc (convert_syntax): Explicitly check for RTX code.

OK, thanks.

Richard

> --- inline copy of patch -- 
> diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
> index 
> 980b49cd4814c9f92cae5876a1bae936338df071..e39e6dacce25009df1ef83a0ab9ed309704ca74b
>  100644
> --- a/gcc/gensupport.cc
> +++ b/gcc/gensupport.cc
> @@ -878,7 +878,8 @@ convert_syntax (rtx x, file_location loc)
>const char *templ;
>vec_conlist tconvec, convec, attrvec;
>  
> -  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
> +  templ_index = 3;
> +  gcc_assert (GET_CODE (x) == DEFINE_INSN);
>  
>templ = XTMPL (x, templ_index);
>  
> @@ -1053,7 +1054,6 @@ process_rtx (rtx desc, file_location loc)
>break;
>  
>  case DEFINE_COND_EXEC:
> -  convert_syntax (desc, loc);
>queue_pattern (desc, &define_cond_exec_tail, loc);
>break;


Re: [COMMITTED] ada: Add CHERI intrinsic bindings and helper functions.

2023-06-20 Thread Alex Coplan via Gcc-patches
Hi Marc,

On 20/06/2023 15:47, Marc Poulhiès wrote:
> Hi,
> 
> >> The package Interfaces.CHERI provides intrinsic bindings and
> >> helper functions to allow software to query, create, and
> >> manipulate CHERI capabilities.
> >
> > I'm curious what the motivation for these intrinsic wrappers is, given that
> > GCC trunk doesn't currently support them. Out of interest, can you share 
> > what
> > the use case for these is?
> 
> We share the same Ada frontend with different GCC compilers and
> contribute it in GCC's master branch.
> 
> You're correct that this particular change is not useful (yet) with
> master, but we are testing/using it with a CHERI-aware GCC.

Interesting, I was only curious because we (Arm) are maintaining a branch with 
CHERI
and Morello support in the ARM/morello vendor branch:
https://gcc.gnu.org/git/?p=gcc.git;a=shortlog;h=refs/vendors/ARM/heads/morello

is this a different CHERI GCC port that you're referring to?

Thanks,
Alex

> 
> Does that answer your question?
> 
> Marc


Re: [PATCH, V6] Fix power10 fusion and -fstack-protector, PR target/105325

2023-06-20 Thread Segher Boessenkool
Hi!

The patch looks great now, thanks you!

But the commit message needs some work:

First off, the subject, which is a short (50 character max!) summary of
what the patch is about.
Fix power10 fusion and -fstack-protector, PR target/105325

There is absolutely nothing to do with stack protector, it does not
belong in the commit message at all, and certainly not in the subject!

On Tue, Jun 13, 2023 at 10:14:02PM -0400, Michael Meissner wrote:
> This patch fixes an issue where if you use the -fstack-protector and
> -mcpu=power10 options and you have a large stack frame, the GCC compiler will
> generate a LWA instruction with a large offset.

That is not the core issue, it is just one example where things went
wrong.  That prompted this patch, sure, so you can talk about that ten
or so lines down if you think it is important (I don't fwiw), but not at
the start here.  You should just say what was wrong, so people with a
short attention span can just skip this patch when looking through git
log (and even earlier if the subject would be good).

Commit messages are for *future* users.  Not for getting your patch
approved.

> Here is the initial fused initial insn that was created.  It refers to the
> stack location based off of the virtrual frame pointer:

The soft frame pointer, not a virtual one.  For PowerPC this is not a
real register and LRA will eventually replace it, sure.  "Virtual" here
in GCC has a very specific meaning; virtual things are replaced very
soon after expand.

> When the split2 pass is run after reload has finished the ds_form_mem_operand
> predicate that was used for lwa and ld no longer returns true.

Yes.  It is the wrong predicate to use here.  *That* is the problem.

> 2)Delete ds_form_mem_operand since it is no longer used.

... and we don't expect to use it any time soon.

> 3)Use the "YZ" constraints for ld/lwa instead of "m".

Yes, constraints and predicates.

>   * config/rs6000/genfusion.pl (gen_ld_cmpi_p10_one): Fix problems that
>   allowed prefixed lwa to be generated.

You should not say what the *old* code did, in the changelog!

> --- a/gcc/config/rs6000/genfusion.pl
> +++ b/gcc/config/rs6000/genfusion.pl
> @@ -61,20 +61,30 @@ sub gen_ld_cmpi_p10_one
>my $mempred = "non_update_memory_operand";
>my $extend;
>  
> +  # We need to special case lwa.  The prefixed_load_p function in rs6000.cc
> +  # (which determines if a load instruction is prefixed) uses the fact that 
> the
> +  # register mode is different from the memory mode, and that the sign_extend
> +  # attribute is set to use DS-form rules for the address instead of D-form.
> +  # If the register size is the same, prefixed_load_p assumes we are doing a
> +  # lwz.  We change to use an lwz and word compare if we don't need to sign
> +  # extend the SImode value.  Otherwise if we need the value, we need to
> +  # make sure the insn is marked as ds-form.
> +  my $lwa_insn = ($lmode eq "SI" && $ccmode eq "CC");

That is a pretty bad name, the variable does not hold an "insn" in any
way, shape, or form.  It is hardish to give it a good name because it
mixes two questions into one variable?  You can just repeat the tiny
conditions wherever you use them, and the code would be more readable
(and less cryptic!)

> +  if ($lwa_insn && $cmp_size eq "d") {

Name it "cmp_size_char" maybe?  "cmp_size" suggests a number.

> --- /dev/null
> +++ b/gcc/testsuite/g++.target/powerpc/pr105325.C
> @@ -0,0 +1,26 @@
> +/* { dg-do assemble } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-require-effective-target powerpc_prefixed_addr } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -fstack-protector" } */
> +
> +/* Test that power10 fusion does not generate an LWA/CMPDI instruction pair
> +   instead of PLWZ/CMPWI.  Ultimately the code was dying because the fusion
> +   load + compare -1/0/1 patterns did not handle the possibility that the 
> load
> +   might be prefixed.  The -fstack-protector option is needed to show the
> +   bug.  */

Mention the PR number somewhere in the text as well?  For grep etc.

Okay for trunk, with some more reasonable commmit message.  Thank you!
Also okay for all backports.


Segher


Re: [PATCH v5 3/5] p1689r5: initial support

2023-06-20 Thread Ben Boeckel via Gcc-patches
On Tue, Feb 14, 2023 at 16:50:27 -0500, Jason Merrill wrote:
> On 1/25/23 13:06, Ben Boeckel wrote:
> > - header-unit information fields
> > 
> > Header units (including the standard library headers) are 100%
> > unsupported right now because the `-E` mechanism wants to import their
> > BMIs. A new mode (i.e., something more workable than existing `-E`
> > behavior) that mocks up header units as if they were imported purely
> > from their path and content would be required.
> 
> I notice that the cpp dependency generation tries (in open_file_failed) 
> to continue after encountering a missing file, is that not sufficient 
> for header units?  Or adjustable to be sufficient?

No. Header units can introduce macros which can be used to modify the
set of modules that are imported. Included headers are "discovered"
dependencies and don't modify the build graph (just add more files that
trigger a rebuild) and can be collected during compilation. Module
dependencies are needed to get the build correct in the first place in
order to:

- order module compilations in the build graph so that imported modules
  are ready before anything using them; and
- computing the set of flags needed for telling the compiler where
  imported modules' CMI files should be located.

> > - non-utf8 paths
> > 
> > The current standard says that paths that are not unambiguously
> > represented using UTF-8 are not supported (because these cases are rare
> > and the extra complication is not worth it at this time). Future
> > versions of the format might have ways of encoding non-UTF-8 paths. For
> > now, this patch just doesn't support non-UTF-8 paths (ignoring the
> > "unambiguously represetable in UTF-8" case).
> 
> typo "representable"

Fixed.

> > diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
> > index c68a2a27469..1c14ce3fe8e 100644
> > --- a/gcc/c-family/c-opts.cc
> > +++ b/gcc/c-family/c-opts.cc
> > @@ -77,6 +77,9 @@ static bool verbose;
> >   /* Dependency output file.  */
> >   static const char *deps_file;
> >   
> > +/* Enhanced dependency output file.  */
> 
> Maybe "structured", as in the docs?  It isn't really a direct 
> enhancement of the makefile dependencies.

Agreed. I'll also add a link to p1689r5 as a comment for what
"structured" means where it is parsed out.

> > +  if (cpp_opts->deps.format != DEPS_FMT_NONE)
> > +{
> > +  if (!fdeps_file)
> > +   fdeps_stream = out_stream;
> > +  else if (fdeps_file[0] == '-' && fdeps_file[1] == '\0')
> > +   fdeps_stream = stdout;
> 
> You probably want to check that deps_stream and fdeps_stream don't end 
> up as the same stream.

Hmm. But `stdout` is probably fine to use for both though. Basically:

if (fdeps_stream == out_stream && fdeps_stream != stdout)
  make_diagnostic_noise ();

> > @@ -1374,6 +1410,8 @@ handle_deferred_opts (void)
> >   
> > if (opt->code == OPT_MT || opt->code == OPT_MQ)
> >   deps_add_target (deps, opt->arg, opt->code == OPT_MQ);
> > +   else if (opt->code == OPT_fdep_output_)
> > + deps_add_output (deps, opt->arg, true);
> 
> How about fdeps_add_target?

Renamed.

> > diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
> > index ef371ca8c26..630781fdf8a 100644
> > --- a/gcc/c-family/c.opt
> > +++ b/gcc/c-family/c.opt
> > @@ -256,6 +256,18 @@ MT
> >   C ObjC C++ ObjC++ Joined Separate MissingArgError(missing makefile target 
> > after %qs)
> >   -MT   Add a target that does not require quoting.
> >   
> > +fdep-format=
> > +C ObjC C++ ObjC++ NoDriverArg Joined MissingArgError(missing format after 
> > %qs)
> > +Format for output dependency information.  Supported (\"p1689r5\").
> 
> I think we want "structured" here, as well.

Fixed.

> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index 06d77983e30..b61c3ebd3ec 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -2791,6 +2791,21 @@ is @option{-fpermitted-flt-eval-methods=c11}.  The 
> > default when in a GNU
> >   dialect (@option{-std=gnu11} or similar) is
> >   @option{-fpermitted-flt-eval-methods=ts-18661-3}.
> >   
> > +@item -fdep-file=@var{file}
> > +@opindex fdep-file
> > +Where to write structured dependency information.
> > +
> > +@item -fdep-format=@var{format}
> > +@opindex fdep-format
> > +The format to use for structured dependency information. @samp{p1689r5} is 
> > the
> > +only supported format right now.  Note that when this argument is 
> > specified, the
> > +output of @samp{-MF} is stripped of some information (namely C++ modules) 
> > so
> > +that it does not use extended makefile syntax not understood by most tools.
> > +
> > +@item -fdep-output=@var{file}
> > +@opindex fdep-output
> > +Analogous to @option{-MT} but for structured dependency information.
> 
> Please add more detail about how these are intended to be used.

Will do.

> > diff --git a/gcc/testsuite/g++.dg/modules/p1689-1.C 
> > b/gcc/testsuite/g++.dg/modules/p1689-1.C
> > new file mode 100644
> > index 000..245e30d09ce

Re: [V1][PATCH 1/3] Provide element_count attribute to flexible array member field (PR108896)

2023-06-20 Thread Qing Zhao via Gcc-patches


> On Jun 16, 2023, at 5:35 PM, Joseph Myers  wrote:
> 
> On Fri, 16 Jun 2023, Qing Zhao via Gcc-patches wrote:
> 
>>> So for 
>>> 
>>> struct foo { int c; int buf[(struct { int d; }){ .d = .c }]; };
>>> 
>>> one knows during parsing that the .d is a designator
>>> and that .c is not.
>> 
>> Therefore, the above should be invalid based on this rule since .c is 
>> not a member in the current structure.
> 
> What do you mean by "current structure"?  I think two different concepts 
> are being conflated: the structure *being initialized* (what the C 
> standard calls the "current object" for a brace-enclosed initializer 
> list),

I think the concept of “current structure” should be stick to this. 

> and the structure *being defined*.
Not this.

(Forgive me about my poor English -:)).

Then it will be cleaner? 

What’s your opinion?


>  The former is what's relevant 
> for designators.  The latter is what's relevant for the suggested new 
> syntax.  And .c *is* a member of the structure being defined in this 
> example.
> 
> Those two structure types are always different, except for corner cases 
> with C2x tag compatibility (where an object of structure type might be 
> initialized in the middle of a redefinition of that type).

Can you give an example on this?  Thanks.

Qing
> 
> -- 
> Joseph S. Myers
> jos...@codesourcery.com



Re: libgo patch committed: Use a C function to call mmap

2023-06-20 Thread Ian Lance Taylor via Gcc-patches
On Tue, Jun 20, 2023 at 11:35 AM Andreas Schwab  wrote:
>
> On Jun 20 2023, Ian Lance Taylor via Gcc-patches wrote:
>
> > This libgo patches changes the runtime pacakge to use a C function to call 
> > mmap.
> >
> > The final argument to mmap, of type off_t, varies. In
> > https://go.dev/cl/445375
> > (https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604158.html)
> > we changed it to always use the C off_t type, but that broke 32-bit
> > big-endian Linux systems.
>
> This has nothing to do with big-endian, armv7 isn't big-endian.

OK, but I think that it does have something to do with big-endian.
The bug was that on some 32-bit systems it was passing a 64-bit value
to a function that expected a 32-bit value.  The problem didn't show
up on 32-bit x86 because it is little-endian, and did show up on
32-bit PPC because it is big-endian.  I guess the armv7 case was
failing for a different reason.

Ian


Re: libgo patch committed: Use a C function to call mmap

2023-06-20 Thread Andreas Schwab
On Jun 20 2023, Ian Lance Taylor via Gcc-patches wrote:

> This libgo patches changes the runtime pacakge to use a C function to call 
> mmap.
>
> The final argument to mmap, of type off_t, varies. In
> https://go.dev/cl/445375
> (https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604158.html)
> we changed it to always use the C off_t type, but that broke 32-bit
> big-endian Linux systems.

This has nothing to do with big-endian, armv7 isn't big-endian.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


Re: [PATCH] tree-ssa-math-opts: Small uaddc/usubc pattern matching improvement [PR79173]

2023-06-20 Thread Richard Biener via Gcc-patches



> Am 20.06.2023 um 18:46 schrieb Jakub Jelinek via Gcc-patches 
> :
> 
> Hi!
> 
> In the following testcase we fail to pattern recognize the least significant
> .UADDC call.  The reason is that arg3 in that case is
>  _3 = .ADD_OVERFLOW (...);
>  _2 = __imag__ _3;
>  _1 = _2 != 0;
>  arg3 = (unsigned long) _1;
> and while before the changes arg3 has a single use in some .ADD_OVERFLOW
> later on, we add a .UADDC call next to it (and gsi_remove/gsi_replace only
> what is strictly necessary and leave quite a few dead stmts around which
> next DCE cleans up) and so it all of sudden isn't used just once, but twice
> (.ADD_OVERFLOW and .UADDC) and so uaddc_cast fails.  While we could tweak
> uaddc_cast and not require has_single_use in these uses, there is also
> no vrp that would figure out that because __imag__ _3 is in [0, 1] range,
> it can just use arg3 = __imag__ _3; and drop the comparison and cast.
> 
> We already search if either arg2 or arg3 is ultimately set from __imag__
> of .{{ADD,SUB}_OVERFLOW,U{ADD,SUB}C} call, so the following patch just
> remembers the lhs of __imag__ from that case and uses it later.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok

Richard 

> 2023-06-20  Jakub Jelinek  
> 
>PR middle-end/79173
>* tree-ssa-math-opts.cc (match_uaddc_usubc): Remember lhs of
>IMAGPART_EXPR of arg2/arg3 and use that as arg3 if it has the right
>type.
> 
>* g++.target/i386/pr79173-1.C: New test.
> 
> --- gcc/tree-ssa-math-opts.cc.jj2023-06-20 08:57:38.0 +0200
> +++ gcc/tree-ssa-math-opts.cc2023-06-20 10:33:52.969805538 +0200
> @@ -4728,6 +4728,7 @@ match_uaddc_usubc (gimple_stmt_iterator
>   if (!types_compatible_p (type, TREE_TYPE (arg1)))
> return false;
>   int kind[2] = { 0, 0 };
> +  tree arg_im[2] = { NULL_TREE, NULL_TREE };
>   /* At least one of arg2 and arg3 should have type compatible
>  with arg1/rhs[0], and the other one should have value in [0, 1]
>  range.  If both are in [0, 1] range and type compatible with
> @@ -4758,6 +4759,7 @@ match_uaddc_usubc (gimple_stmt_iterator
>  g = uaddc_ne0 (g);
>  if (!uaddc_is_cplxpart (g, IMAGPART_EXPR))
>continue;
> +  arg_im[i] = gimple_assign_lhs (g);
>  g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
>  if (!is_gimple_call (g) || !gimple_call_internal_p (g))
>continue;
> @@ -4781,6 +4783,7 @@ match_uaddc_usubc (gimple_stmt_iterator
> {
>   std::swap (arg2, arg3);
>   std::swap (kind[0], kind[1]);
> +  std::swap (arg_im[0], arg_im[1]);
> }
>   if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
> return false;
> @@ -4810,6 +4813,8 @@ match_uaddc_usubc (gimple_stmt_iterator
>   /* Build .UADDC/.USUBC call which will be placed before the stmt.  */
>   gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
>   gimple *g;
> +  if ((kind[1] & 4) != 0 && types_compatible_p (type, TREE_TYPE (arg_im[1])))
> +arg3 = arg_im[1];
>   if ((kind[1] & 1) == 0)
> {
>   if (TREE_CODE (arg3) == INTEGER_CST)
> --- gcc/testsuite/g++.target/i386/pr79173-1.C.jj2023-06-20 
> 09:44:37.515578731 +0200
> +++ gcc/testsuite/g++.target/i386/pr79173-1.C2023-06-20 
> 10:35:33.650418101 +0200
> @@ -0,0 +1,33 @@
> +// PR middle-end/79173
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-O2 -fno-stack-protector -masm=att" }
> +// { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } }
> +// { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } }
> +// { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } }
> +// { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } }
> +// { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } }
> +// { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } }
> +// { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } }
> +// { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } }
> +
> +template 
> +inline constexpr T
> +uaddc (T x, T y, T carry_in, T &carry_out) noexcept
> +{
> +  [[gnu::assume (carry_in <= 1)]];
> +  x += y;
> +  carry_out = x < y;
> +  x += carry_in;
> +  carry_out += x < carry_in;
> +  return x;
> +}
> +
> +void
> +foo (unsigned long *p, unsigned long *q)
> +{
> +  unsigned long c;
> +  p[0] = uaddc (p[0], q[0], 0UL, c);
> +  p[1] = uaddc (p[1], q[1], c, c);
> +  p[2] = uaddc (p[2], q[2], c, c);
> +  p[3] = uaddc (p[3], q[3], c, c);
> +}
> 
>Jakub
> 


[committed] calls: Change return type of predicate function from int to bool

2023-06-20 Thread Uros Bizjak via Gcc-patches
Also change some internal variables and some function arguments to bool.

gcc/ChangeLog:

* calls.h (setjmp_call_p): Change return type from int to bool.
* calls.cc (struct arg_data): Change "pass_on_stack" to bool.
(store_one_arg): Change return type from int to bool
and adjust function body accordingly.  Change "sibcall_failure"
variable to bool.
(finalize_must_preallocate): Ditto.  Change *must_preallocate pointer
argument  to bool.  Change "partial_seen" variable to bool.
(load_register_parameters):  Change *sibcall_failure
pointer argument to bool.
(check_sibcall_argument_overlap_1): Change return type from int to bool
and adjust function body accordingly.
(check_sibcall_argument_overlap):  Ditto.  Change
"mark_stored_args_map" argument to bool.
(emit_call_1): Change "already_popped" variable to bool.
(setjmp_call_p): Change return type from int to bool
and adjust function body accordingly.
(initialize_argument_information): Change *must_preallocate
pointer argument to bool.
(expand_call): Change "pcc_struct_value", "must_preallocate"
and "sibcall_failure" variables to bool.
(emit_library_call_value_1): Change "pcc_struct_value"
variable to bool.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/calls.cc b/gcc/calls.cc
index 1c9abccef68..1f3a6d5c450 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -94,11 +94,11 @@ struct arg_data
   /* Number of bytes to put in registers.  0 means put the whole arg
  in registers.  Also 0 if not passed in registers.  */
   int partial;
-  /* Nonzero if argument must be passed on stack.
+  /* True if argument must be passed on stack.
  Note that some arguments may be passed on the stack
- even though pass_on_stack is zero, just because FUNCTION_ARG says so.
+ even though pass_on_stack is false, just because FUNCTION_ARG says so.
  pass_on_stack identifies arguments that *cannot* go in registers.  */
-  int pass_on_stack;
+  bool pass_on_stack;
   /* Some fields packaged up for locate_and_pad_parm.  */
   struct locate_and_pad_arg_data locate;
   /* Location on the stack at which parameter should be stored.  The store
@@ -150,19 +150,19 @@ static unsigned HOST_WIDE_INT stored_args_watermark;
 static int stack_arg_under_construction;
 
 static void precompute_register_parameters (int, struct arg_data *, int *);
-static int store_one_arg (struct arg_data *, rtx, int, int, int);
+static bool store_one_arg (struct arg_data *, rtx, int, int, int);
 static void store_unaligned_arguments_into_pseudos (struct arg_data *, int);
-static int finalize_must_preallocate (int, int, struct arg_data *,
- struct args_size *);
+static bool finalize_must_preallocate (bool, int, struct arg_data *,
+  struct args_size *);
 static void precompute_arguments (int, struct arg_data *);
 static void compute_argument_addresses (struct arg_data *, rtx, int);
 static rtx rtx_for_function_call (tree, tree);
 static void load_register_parameters (struct arg_data *, int, rtx *, int,
- int, int *);
+ int, bool *);
 static int special_function_p (const_tree, int);
-static int check_sibcall_argument_overlap_1 (rtx);
-static int check_sibcall_argument_overlap (rtx_insn *, struct arg_data *, int);
-
+static bool check_sibcall_argument_overlap_1 (rtx);
+static bool check_sibcall_argument_overlap (rtx_insn *, struct arg_data *,
+   bool);
 static tree split_complex_types (tree);
 
 #ifdef REG_PARM_STACK_SPACE
@@ -383,7 +383,7 @@ emit_call_1 (rtx funexp, tree fntree ATTRIBUTE_UNUSED, tree 
fndecl ATTRIBUTE_UNU
 {
   rtx rounded_stack_size_rtx = gen_int_mode (rounded_stack_size, Pmode);
   rtx call, funmem, pat;
-  int already_popped = 0;
+  bool already_popped = false;
   poly_int64 n_popped = 0;
 
   /* Sibling call patterns never pop arguments (no sibcall(_value)_pop
@@ -461,7 +461,7 @@ emit_call_1 (rtx funexp, tree fntree ATTRIBUTE_UNUSED, tree 
fndecl ATTRIBUTE_UNU
pat = targetm.gen_call_pop (funmem, rounded_stack_size_rtx,
next_arg_reg, n_pop);
 
-  already_popped = 1;
+  already_popped = true;
 }
   else
 {
@@ -666,14 +666,17 @@ decl_return_flags (tree fndecl)
   return 0;
 }
 
-/* Return nonzero when FNDECL represents a call to setjmp.  */
+/* Return true when FNDECL represents a call to setjmp.  */
 
-int
+bool
 setjmp_call_p (const_tree fndecl)
 {
   if (DECL_IS_RETURNS_TWICE (fndecl))
-return ECF_RETURNS_TWICE;
-  return special_function_p (fndecl, 0) & ECF_RETURNS_TWICE;
+return true;
+  if (special_function_p (fndecl, 0) & ECF_RETURNS_TWICE)
+return true;
+
+  return false;
 }
 
 
@@ -1266,8 +1269,11 @@ maybe_complain_about_tail_call (tree call_expr, const 
char *reason)
OLD_STACK_LEVEL is a pointer t

PING^2: Re: [PATCH 1/3] testsuite: move handle-multiline-outputs to before check for blank lines

2023-06-20 Thread David Malcolm via Gcc-patches
Does this testsuite patch look OK?

  https://gcc.gnu.org/pipermail/gcc-patches/2023-May/620275.html

Thanks
David

On Mon, 2023-06-12 at 19:11 -0400, David Malcolm wrote:
> Please can someone review this testsuite patch:
>   https://gcc.gnu.org/pipermail/gcc-patches/2023-May/620275.html
> 
> Thanks
> Dave
> 
> On Wed, 2023-05-31 at 14:06 -0400, David Malcolm wrote:
> > I have followup patches that require checking for multiline
> > patterns
> > that have blank lines within them, so this moves the handling of
> > multiline patterns before the check for blank lines, allowing for
> > such
> > multiline patterns.
> > 
> > Doing so uncovers some issues with existing multiline directives,
> > which
> > the patch fixes.
> > 
> > gcc/testsuite/ChangeLog:
> > * c-c++-common/Wlogical-not-parentheses-2.c: Split up the
> > multiline directive.
> > * gcc.dg/analyzer/malloc-macro-inline-events.c: Remove
> > redundant
> > dg-regexp directives.
> > * gcc.dg/missing-header-fixit-5.c: Split up the multiline
> > directives.
> > * lib/gcc-dg.exp (gcc-dg-prune): Move call to
> > handle-multiline-outputs from prune_gcc_output to here.
> > * lib/multiline.exp (dg-end-multiline-output): Move call to
> > maybe-handle-nn-line-numbers from prune_gcc_output to here.
> > * lib/prune.exp (prune_gcc_output): Move calls to
> > maybe-handle-nn-line-numbers and handle-multiline-outputs
> > from
> > here to the above.
> > ---
> >  .../c-c++-common/Wlogical-not-parentheses-2.c  |  2 ++
> >  .../gcc.dg/analyzer/malloc-macro-inline-events.c   |  5 -
> >  gcc/testsuite/gcc.dg/missing-header-fixit-5.c  | 10
> > --
> >  gcc/testsuite/lib/gcc-dg.exp   |  5 +
> >  gcc/testsuite/lib/multiline.exp    |  7
> > ++-
> >  gcc/testsuite/lib/prune.exp    |  7 --
> > -
> >  6 files changed, 21 insertions(+), 15 deletions(-)
> > 
> > diff --git a/gcc/testsuite/c-c++-common/Wlogical-not-parentheses-
> > 2.c
> > b/gcc/testsuite/c-c++-common/Wlogical-not-parentheses-2.c
> > index ba8dce84f5d..2d9382014c4 100644
> > --- a/gcc/testsuite/c-c++-common/Wlogical-not-parentheses-2.c
> > +++ b/gcc/testsuite/c-c++-common/Wlogical-not-parentheses-2.c
> > @@ -12,6 +12,8 @@ foo (int aaa, int bbb)
> >  /* { dg-begin-multiline-output "" }
> >     r += !aaa == bbb;
> >   ^~
> > +   { dg-end-multiline-output "" } */
> > +/* { dg-begin-multiline-output "" }
> >     r += !aaa == bbb;
> >  ^~~~
> >  (   )
> > diff --git a/gcc/testsuite/gcc.dg/analyzer/malloc-macro-inline-
> > events.c b/gcc/testsuite/gcc.dg/analyzer/malloc-macro-inline-
> > events.c
> > index f08aee626a5..9134bb4781e 100644
> > --- a/gcc/testsuite/gcc.dg/analyzer/malloc-macro-inline-events.c
> > +++ b/gcc/testsuite/gcc.dg/analyzer/malloc-macro-inline-events.c
> > @@ -12,11 +12,6 @@ int test (void *ptr)
> >    WRAPPED_FREE (ptr); /* { dg-message "in expansion of macro
> > 'WRAPPED_FREE'" } */
> >    WRAPPED_FREE (ptr); /* { dg-message "in expansion of macro
> > 'WRAPPED_FREE'" } */
> >  
> > -  /* Erase the spans indicating the header file
> > - (to avoid embedding path assumptions).  */
> > -  /* { dg-regexp "\[^|\]+/malloc-macro.h:\[0-9\]+:\[0-9\]+:" } */
> > -  /* { dg-regexp "\[^|\]+/malloc-macro.h:\[0-9\]+:\[0-9\]+:" } */
> > -
> >    /* { dg-begin-multiline-output "" }
> >     NN | #define WRAPPED_FREE(PTR) free(PTR)
> >    |   ^
> > diff --git a/gcc/testsuite/gcc.dg/missing-header-fixit-5.c
> > b/gcc/testsuite/gcc.dg/missing-header-fixit-5.c
> > index 916033c689c..bf44feb24a9 100644
> > --- a/gcc/testsuite/gcc.dg/missing-header-fixit-5.c
> > +++ b/gcc/testsuite/gcc.dg/missing-header-fixit-5.c
> > @@ -12,14 +12,18 @@ foo (char *m, int i)
> >    /* { dg-begin-multiline-output "" }
> >     11 |   if (isdigit (m[0]))
> >    |   ^~~
> > + { dg-end-multiline-output "" } */
> > +  /* { dg-begin-multiline-output "" }
> >    +++ |+#include 
> >  1 | 
> >   { dg-end-multiline-output "" } */
> >  {
> >    return abs (i); /* { dg-warning "implicit declaration of
> > function" } */
> >    /* { dg-begin-multiline-output "" }
> > -   19 |   return abs (i);
> > +   21 |   return abs (i);
> >    |  ^~~
> > + { dg-end-multiline-output "" } */
> > +  /* { dg-begin-multiline-output "" }
> >    +++ |+#include 
> >  1 | 
> >   { dg-end-multiline-output "" } */
> > @@ -27,8 +31,10 @@ foo (char *m, int i)
> >    else
> >  putchar (m[0]); /* { dg-warning "implicit declaration of
> > function" } */
> >    /* { dg-begin-multiline-output "" }
> > -   28 | putchar (m[0]);
> > +   32 | putchar (m[0]);
> >    | ^~~
> > + { dg-end-multiline-output "" } */
> > +  /* { dg-begin-multiline-output "" }
> >    +++ |+#include 
> >  1 | 
> >   { dg-end-multiline-ou

[PATCH][gensupport] drop suppport for define_cond_exec from compact syntac

2023-06-20 Thread Tamar Christina via Gcc-patches
Hi All,

define_cond_exec does not support the special @@ syntax
and so can't support {@.  As such just remove support
for it.

Bootstrapped and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR bootstrap/110324
* gensupport.cc (convert_syntax): Explicitly check for RTX code.

--- inline copy of patch -- 
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index 
980b49cd4814c9f92cae5876a1bae936338df071..e39e6dacce25009df1ef83a0ab9ed309704ca74b
 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -878,7 +878,8 @@ convert_syntax (rtx x, file_location loc)
   const char *templ;
   vec_conlist tconvec, convec, attrvec;
 
-  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
+  templ_index = 3;
+  gcc_assert (GET_CODE (x) == DEFINE_INSN);
 
   templ = XTMPL (x, templ_index);
 
@@ -1053,7 +1054,6 @@ process_rtx (rtx desc, file_location loc)
   break;
 
 case DEFINE_COND_EXEC:
-  convert_syntax (desc, loc);
   queue_pattern (desc, &define_cond_exec_tail, loc);
   break;
 




-- 
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index 
980b49cd4814c9f92cae5876a1bae936338df071..e39e6dacce25009df1ef83a0ab9ed309704ca74b
 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -878,7 +878,8 @@ convert_syntax (rtx x, file_location loc)
   const char *templ;
   vec_conlist tconvec, convec, attrvec;
 
-  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
+  templ_index = 3;
+  gcc_assert (GET_CODE (x) == DEFINE_INSN);
 
   templ = XTMPL (x, templ_index);
 
@@ -1053,7 +1054,6 @@ process_rtx (rtx desc, file_location loc)
   break;
 
 case DEFINE_COND_EXEC:
-  convert_syntax (desc, loc);
   queue_pattern (desc, &define_cond_exec_tail, loc);
   break;
 





libgo patch committed: Use a C function to call mmap

2023-06-20 Thread Ian Lance Taylor via Gcc-patches
This libgo patches changes the runtime pacakge to use a C function to call mmap.

The final argument to mmap, of type off_t, varies. In
https://go.dev/cl/445375
(https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604158.html)
we changed it to always use the C off_t type, but that broke 32-bit
big-endian Linux systems.  On those systems, using the C off_t type
requires calling the mmap64 function.  In C this is automatically
handled by the  file.  In Go, we would have to change the
magic //extern comment to call mmap64 when appropriate.  Rather than
try to get that right, we instead go through a C function that uses C
implicit type conversions to pick the right type.

This fixes https://gcc.gnu.org/PR110297.

Bootstrapped and tested on x86_64-pc-linux-gnu and
powerpc-pc-linux-gnu (32-bit and 64-bit).  Committed to trunk and GCC
13 branch.

Ian
7f5a6c8a27190daf9daadf5e9f14ef5f4ece
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 1191a8d663d..dbb2d68f909 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-a3a3c3a2d1bc6a8ca51b302d08c94ef27cdd8f0f
+6a1d165c2218cd127ee937a1f45599075762f716
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/Makefile.am b/libgo/Makefile.am
index 207d5a98127..920f8cc7071 100644
--- a/libgo/Makefile.am
+++ b/libgo/Makefile.am
@@ -462,6 +462,7 @@ runtime_files = \
runtime/go-memclr.c \
runtime/go-memmove.c \
runtime/go-memequal.c \
+   runtime/go-mmap.c \
runtime/go-nanotime.c \
runtime/go-now.c \
runtime/go-nosys.c \
diff --git a/libgo/go/runtime/mem_gccgo.go b/libgo/go/runtime/mem_gccgo.go
index 1e84f4f5c56..e7b51ff37cc 100644
--- a/libgo/go/runtime/mem_gccgo.go
+++ b/libgo/go/runtime/mem_gccgo.go
@@ -14,8 +14,8 @@ import (
 //go:linkname sysAlloc
 //go:linkname sysFree
 
-//extern mmap
-func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off 
_libgo_off_t_type) unsafe.Pointer
+//extern __go_mmap
+func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off 
uintptr) unsafe.Pointer
 
 //extern munmap
 func munmap(addr unsafe.Pointer, length uintptr) int32
@@ -38,7 +38,7 @@ func init() {
 }
 
 func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) 
(unsafe.Pointer, int) {
-   p := sysMmap(addr, n, prot, flags, fd, _libgo_off_t_type(off))
+   p := sysMmap(addr, n, prot, flags, fd, off)
if uintptr(p) == _MAP_FAILED {
return nil, errno()
}
diff --git a/libgo/runtime/go-mmap.c b/libgo/runtime/go-mmap.c
new file mode 100644
index 000..b2327ba68f5
--- /dev/null
+++ b/libgo/runtime/go-mmap.c
@@ -0,0 +1,21 @@
+/* go-mmap.c -- functions for calling C mmap functions.
+
+   Copyright 2023 The Go Authors. All rights reserved.
+   Use of this source code is governed by a BSD-style
+   license that can be found in the LICENSE file.  */
+
+#include "config.h"
+
+#include 
+#include 
+
+/* The exact C function to call varies between mmap and mmap64, and
+   the size of the off_t argument also varies.  Here we provide a
+   function that Go code can call with consistent types.  */
+
+void *
+__go_mmap(void *addr, uintptr_t length, int32_t prot, int32_t flags,
+ int32_t fd, uintptr_t offset)
+{
+  return mmap(addr, length, prot, flags, fd, offset);
+}
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index b3dc4fd2414..699770d53ad 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -355,9 +355,6 @@ boolruntime_notetsleepg(Note*, int64)  // false - 
timeout
 /*
  * low level C-called
  */
-#define runtime_mmap mmap
-#define runtime_munmap munmap
-#define runtime_madvise madvise
 #define runtime_memclr(buf, size) __builtin_memset((buf), 0, (size))
 #define runtime_getcallerpc() __builtin_return_address(0)
 


Re: [PATCH v5 3/5] p1689r5: initial support

2023-06-20 Thread Ben Boeckel via Gcc-patches
On Mon, Jun 19, 2023 at 17:33:58 -0400, Jason Merrill wrote:
> On 5/12/23 10:24, Ben Boeckel wrote:
> > `file` can be omitted (the `output_stream` will be used then). I *think*
> > I see that adding:
> > 
> >  %{fdeps_file:-fdeps-file=%{!o:%b.ddi}%{o*:%.ddi%*}}
> 
> %{!fdeps-file: but yes.
> 
> > would at least do for `-fdeps-file` defaults? I don't know if there's a
> > reasonable default for `-fdeps-target=` though given that this command
> > line has no information about the object file that will be used (`-o` is
> > used for preprocessor output since we're leaning on `-E` here).
> 
> I would think it could default to %b.o?

I suppose that could work, yes.

> I had quite a few more comments on the v5 patch that you didn't respond 
> to here or address in the v6 patch; did your mail client hide them from you?

Oof. Sorry, I saw large chunks of quoting and apparently assumed the
rest was fine (I usually do aggressive trimming when doing that style of
review). I see them now. Will go through and include in v7.

--Ben


[PATCH] tree-ssa-math-opts: Small uaddc/usubc pattern matching improvement [PR79173]

2023-06-20 Thread Jakub Jelinek via Gcc-patches
Hi!

In the following testcase we fail to pattern recognize the least significant
.UADDC call.  The reason is that arg3 in that case is
  _3 = .ADD_OVERFLOW (...);
  _2 = __imag__ _3;
  _1 = _2 != 0;
  arg3 = (unsigned long) _1;
and while before the changes arg3 has a single use in some .ADD_OVERFLOW
later on, we add a .UADDC call next to it (and gsi_remove/gsi_replace only
what is strictly necessary and leave quite a few dead stmts around which
next DCE cleans up) and so it all of sudden isn't used just once, but twice
(.ADD_OVERFLOW and .UADDC) and so uaddc_cast fails.  While we could tweak
uaddc_cast and not require has_single_use in these uses, there is also
no vrp that would figure out that because __imag__ _3 is in [0, 1] range,
it can just use arg3 = __imag__ _3; and drop the comparison and cast.

We already search if either arg2 or arg3 is ultimately set from __imag__
of .{{ADD,SUB}_OVERFLOW,U{ADD,SUB}C} call, so the following patch just
remembers the lhs of __imag__ from that case and uses it later.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-06-20  Jakub Jelinek  

PR middle-end/79173
* tree-ssa-math-opts.cc (match_uaddc_usubc): Remember lhs of
IMAGPART_EXPR of arg2/arg3 and use that as arg3 if it has the right
type.

* g++.target/i386/pr79173-1.C: New test.

--- gcc/tree-ssa-math-opts.cc.jj2023-06-20 08:57:38.0 +0200
+++ gcc/tree-ssa-math-opts.cc   2023-06-20 10:33:52.969805538 +0200
@@ -4728,6 +4728,7 @@ match_uaddc_usubc (gimple_stmt_iterator
   if (!types_compatible_p (type, TREE_TYPE (arg1)))
 return false;
   int kind[2] = { 0, 0 };
+  tree arg_im[2] = { NULL_TREE, NULL_TREE };
   /* At least one of arg2 and arg3 should have type compatible
  with arg1/rhs[0], and the other one should have value in [0, 1]
  range.  If both are in [0, 1] range and type compatible with
@@ -4758,6 +4759,7 @@ match_uaddc_usubc (gimple_stmt_iterator
  g = uaddc_ne0 (g);
  if (!uaddc_is_cplxpart (g, IMAGPART_EXPR))
continue;
+ arg_im[i] = gimple_assign_lhs (g);
  g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
  if (!is_gimple_call (g) || !gimple_call_internal_p (g))
continue;
@@ -4781,6 +4783,7 @@ match_uaddc_usubc (gimple_stmt_iterator
 {
   std::swap (arg2, arg3);
   std::swap (kind[0], kind[1]);
+  std::swap (arg_im[0], arg_im[1]);
 }
   if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
 return false;
@@ -4810,6 +4813,8 @@ match_uaddc_usubc (gimple_stmt_iterator
   /* Build .UADDC/.USUBC call which will be placed before the stmt.  */
   gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
   gimple *g;
+  if ((kind[1] & 4) != 0 && types_compatible_p (type, TREE_TYPE (arg_im[1])))
+arg3 = arg_im[1];
   if ((kind[1] & 1) == 0)
 {
   if (TREE_CODE (arg3) == INTEGER_CST)
--- gcc/testsuite/g++.target/i386/pr79173-1.C.jj2023-06-20 
09:44:37.515578731 +0200
+++ gcc/testsuite/g++.target/i386/pr79173-1.C   2023-06-20 10:35:33.650418101 
+0200
@@ -0,0 +1,33 @@
+// PR middle-end/79173
+// { dg-do compile { target c++11 } }
+// { dg-options "-O2 -fno-stack-protector -masm=att" }
+// { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
target lp64 } } }
+// { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
target lp64 } } }
+// { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
target lp64 } } }
+// { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
target lp64 } } }
+// { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 
1 { target ia32 } } }
+// { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 
1 { target ia32 } } }
+// { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 
1 { target ia32 } } }
+// { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } }
+
+template 
+inline constexpr T
+uaddc (T x, T y, T carry_in, T &carry_out) noexcept
+{
+  [[gnu::assume (carry_in <= 1)]];
+  x += y;
+  carry_out = x < y;
+  x += carry_in;
+  carry_out += x < carry_in;
+  return x;
+}
+
+void
+foo (unsigned long *p, unsigned long *q)
+{
+  unsigned long c;
+  p[0] = uaddc (p[0], q[0], 0UL, c);
+  p[1] = uaddc (p[1], q[1], c, c);
+  p[2] = uaddc (p[2], q[2], c, c);
+  p[3] = uaddc (p[3], q[3], c, c);
+}

Jakub



Re: [pushed] wwwdocs: Add GCC Code of Conduct

2023-06-20 Thread Xi Ruoyao via Gcc-patches
On Tue, 2023-06-20 at 12:22 -0400, Jason Merrill via Gcc-patches wrote:
> diff --git a/htdocs/bugs/index.html b/htdocs/bugs/index.html
> index aaef8915..6dbe5d45 100644
> --- a/htdocs/bugs/index.html
> +++ b/htdocs/bugs/index.html
> @@ -122,6 +122,9 @@ three of which can be obtained from the output of 
> gcc -v:
>    Questions about the correctness or the expected behavior of
>    certain constructs that are not GCC extensions.  Ask them in forums
>    dedicated to the discussion of the programming language.
> +
> +  Violations of the Code of Conduct.

The link should be "../conduct.html" :).

> +
>  

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [Patch, fortran] PR108961 - Segfault when associating to pointer from C_F_POINTER

2023-06-20 Thread Tobias Burnus

On 20.06.23 18:19, Paul Richard Thomas via Fortran wrote:


Is there a better way to detect a type(c_ptr) formal argument?

u.derived->intmod_sym_id == ISOCBINDING_PTR ?

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[pushed] wwwdocs: Add GCC Code of Conduct

2023-06-20 Thread Jason Merrill via Gcc-patches
As announced on gcc@.

---
 htdocs/bugs/index.html   |   3 +
 htdocs/conduct-faq.html  |  66 
 htdocs/conduct-report.html   | 122 ++
 htdocs/conduct-response.html | 141 +++
 htdocs/conduct.html  | 118 +
 htdocs/index.html|   3 +
 htdocs/lists.html|   2 +
 7 files changed, 455 insertions(+)
 create mode 100644 htdocs/conduct-faq.html
 create mode 100644 htdocs/conduct-report.html
 create mode 100644 htdocs/conduct-response.html
 create mode 100644 htdocs/conduct.html

diff --git a/htdocs/bugs/index.html b/htdocs/bugs/index.html
index aaef8915..6dbe5d45 100644
--- a/htdocs/bugs/index.html
+++ b/htdocs/bugs/index.html
@@ -122,6 +122,9 @@ three of which can be obtained from the output of gcc 
-v:
   Questions about the correctness or the expected behavior of
   certain constructs that are not GCC extensions.  Ask them in forums
   dedicated to the discussion of the programming language.
+
+  Violations of the Code of Conduct.
+
 
 
 Where to post it
diff --git a/htdocs/conduct-faq.html b/htdocs/conduct-faq.html
new file mode 100644
index ..867527e8
--- /dev/null
+++ b/htdocs/conduct-faq.html
@@ -0,0 +1,66 @@
+
+
+
+
+
+
+
+GCC Code of Conduct FAQ
+https://gcc.gnu.org/gcc.css";>
+
+
+
+GCC Code of Conduct FAQ
+
+Why adopt a Code of Conduct?
+The vast majority of the time, the GCC community is a very civil,
+cooperative space. On the rare occasions that it isn't, it's helpful to have
+something to point to to remind people of our expectations.  It's also good for
+newcomers to have something to refer to, for both how they are expected to
+conduct themselves and how they can expect to be treated.
+
+More importantly, if there is offensive behavior that isn't addressed
+immediately, it's important for there to be a way to report that to the project
+leadership so that we can intervene.
+
+Why not just refer to the GNU Kind Communication Guidelines?
+The Guidelines are helpful for establishing the kind of behavior we want to
+see, but it's also important to have a reporting mechanism to help people feel
+safe and supported in the community, and to help leadership to hear about
+problems that might otherwise have escaped their notice.
+
+Shouldn't people try to work problems out between themselves first?
+Certainly, in many cases.  And we hope referring to the CoC might be helpful
+then, as well.  If the problem is successfully resolved, no report is
+necessary, though individuals might still want to let the CoC committee know
+about the incident just for their information.
+
+What about the rights of the reportee?
+The CoC committee will get their perspective, and any other available
+information, before taking any action.
+
+Besides which, we expect the response to the vast majority of incidents to
+be email asking those involved to moderate their behavior. That has been the
+experience of other free software projects after adopting a code of conduct:
+see the https://www.kernel.org/code-of-conduct.html";>Linux Kernel
+CoC reports for an example.
+
+Is this going to be used to drive out people with "wrong" opinions?
+No, this is a code of conduct, not a code of
+philosophy. And it only deals with behavior within the context of the GCC
+project; for instance, harassment in private email in response to a public
+discussion is covered, a social media post about politics is not.
+
+Can I report incidents from before the adoption of the CoC?
+Yes. We may take no action if the issue seems to have been resolved, but it
+can be helpful to have context for future discussions.
+
+My question isn't answered here!
+Please also see the Reporting Guidelines
+and Response Guide.  If they don't answer
+your question either,
+email mailto:cond...@gcc.gnu.org";>cond...@gcc.gnu.org with any
+additional questions or feedback.
+
+
diff --git a/htdocs/conduct-report.html b/htdocs/conduct-report.html
new file mode 100644
index ..13be57ce
--- /dev/null
+++ b/htdocs/conduct-report.html
@@ -0,0 +1,122 @@
+
+
+
+
+
+
+
+GCC Code of Conduct Reporting Guide
+https://gcc.gnu.org/gcc.css";>
+
+
+
+GCC Code of Conduct Reporting Guide
+
+NOTE: The Code
+of Conduct Committee, and the formal reporting and response procedures, are not
+yet fully established.  The below are the currently planned procedures for when
+the committee is in place.
+
+If you believe someone is violating the code of conduct we ask that you
+report it to the CoC committee by
+emailing mailto:cond...@gcc.gnu.org";>cond...@gcc.gnu.org. 
All
+reports will be kept confidential to the extent permitted by applicable
+law. In some cases we may determine that a public statement will need
+to be made. If that's the case, the identities of all reporters will remain
+confidential unless they instruct us otherwise.
+
+If you are unsure whether the incident is a violation, or whether the space
+where it happened is covered by this

[Patch, fortran] PR108961 - Segfault when associating to pointer from C_F_POINTER

2023-06-20 Thread Paul Richard Thomas via Gcc-patches
Dear All,

This patch is verging on obvious. The PR was originally, incorrectly
blocking PR87477 and the testcase has remained in my 'associate'
directory. I thought that it is time to get shot of it!

Is there a better way to detect a type(c_ptr) formal argument?

Subject to advice on the question, OK for trunk?

Paul
diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 45a984b6bdb..0823efd5abc 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -7353,6 +7353,8 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
 	 need the length.  */
   if (parmse.string_length != NULL_TREE
 	  && !sym->attr.is_bind_c
+	  && !(fsym && fsym->ts.type == BT_DERIVED
+	   && !strcmp (fsym->ts.u.derived->name, "c_ptr"))
 	  && !(fsym && UNLIMITED_POLY (fsym)))
 	vec_safe_push (stringargs, parmse.string_length);
 


Change.Logs
Description: Binary data
! { dg-do run }
!
! Contributed by Jeffrey Hill  
!
module associate_ptr
use iso_c_binding
contains
subroutine c_f_strpointer(cptr, ptr2)
type(c_ptr), target, intent(in) :: cptr
character(kind=c_char,len=4), pointer :: ptr1
character(kind=c_char,len=:), pointer, intent(out) :: ptr2
call c_f_pointer(cptr, ptr1)
if (ptr1 .ne. 'abcd') stop 1
ptr2 => ptr1  ! Failed here
end subroutine
end module

program test_associate_ptr
use associate_ptr
character(kind=c_char, len=1), target :: char_array(7)
character(kind=c_char,len=:), pointer :: ptr2
char_array = ['a', 'b', 'c', 'd', c_null_char, 'e', 'f']
! The first argument was providing a constant hidden string length => segfault
call c_f_strpointer(c_loc(char_array), ptr2)
if (ptr2 .ne. 'abcd') stop 2
end program


Re: [PATCH 2/2] libstdc++: use new built-in trait __is_const

2023-06-20 Thread Patrick Palka via Gcc-patches
On Tue, 21 Mar 2023, Ken Matsui wrote:

> This patch lets libstdc++ use new built-in trait __is_const.
> 
> libstdc++-v3/ChangeLog:
> 
>   * include/std/type_traits (is_const): Use __is_const built-in trait.

We should also use it in is_const_v (likewise for the __is_array and
__is_volatile patches).

> ---
>  libstdc++-v3/include/std/type_traits | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 2bd607a8b8f..e77de828501 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -764,6 +764,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>// Type properties.
>  
>/// is_const
> +#if __has_builtin(__is_const)
> +  template
> +struct is_const
> +: public __bool_constant<__is_const(_Tp)>
> +{ };
> +#else
>template
>  struct is_const
>  : public false_type { };
> @@ -771,6 +777,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>template
>  struct is_const<_Tp const>
>  : public true_type { };
> +#endif
>  
>/// is_volatile
>template
> -- 
> 2.40.0
> 
> 



[PATCH] [vect]Use intermiediate integer type for float_expr/fix_trunc_expr when direct optab is not existed.

2023-06-20 Thread liuhongt via Gcc-patches
I notice there's some refactor in vectorizable_conversion
for code_helper,so I've adjusted my patch to that.
Here's the patch I'm going to commit.

We have already use intermidate type in case WIDEN, but not for NONE,
this patch extended that.

gcc/ChangeLog:

PR target/110018
* tree-vect-stmts.cc (vectorizable_conversion): Use
intermiediate integer type for float_expr/fix_trunc_expr when
direct optab is not existed.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr110018-1.c: New test.
---
 gcc/testsuite/gcc.target/i386/pr110018-1.c | 94 ++
 gcc/tree-vect-stmts.cc | 66 ++-
 2 files changed, 158 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110018-1.c

diff --git a/gcc/testsuite/gcc.target/i386/pr110018-1.c 
b/gcc/testsuite/gcc.target/i386/pr110018-1.c
new file mode 100644
index 000..b1baffd7af1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110018-1.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -O2 -mavx512dq" } */
+/* { dg-final { scan-assembler-times {(?n)vcvttp[dsh]2[dqw]} 5 } } */
+/* { dg-final { scan-assembler-times {(?n)vcvt[dqw]*2p[dsh]} 5 } } */
+
+void
+foo (double* __restrict a, char* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+}
+
+void
+foo1 (float* __restrict a, char* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+}
+
+void
+foo2 (_Float16* __restrict a, char* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+  a[4] = b[4];
+  a[5] = b[5];
+  a[6] = b[6];
+  a[7] = b[7];
+}
+
+void
+foo3 (double* __restrict a, short* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+}
+
+void
+foo4 (float* __restrict a, char* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+}
+
+void
+foo5 (double* __restrict b, char* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+}
+
+void
+foo6 (float* __restrict b, char* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+}
+
+void
+foo7 (_Float16* __restrict b, char* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+  a[4] = b[4];
+  a[5] = b[5];
+  a[6] = b[6];
+  a[7] = b[7];
+}
+
+void
+foo8 (double* __restrict b, short* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+}
+
+void
+foo9 (float* __restrict b, char* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 056a0ecb2be..ae24f3e66e6 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5041,7 +5041,7 @@ vectorizable_conversion (vec_info *vinfo,
   tree scalar_dest;
   tree op0, op1 = NULL_TREE;
   loop_vec_info loop_vinfo = dyn_cast  (vinfo);
-  tree_code tc1;
+  tree_code tc1, tc2;
   code_helper code, code1, code2;
   code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
   tree new_temp;
@@ -5249,6 +5249,57 @@ vectorizable_conversion (vec_info *vinfo,
code1 = tc1;
break;
   }
+
+  /* For conversions between float and smaller integer types try whether we
+can use intermediate signed integer types to support the
+conversion.  */
+  if ((code == FLOAT_EXPR
+  && GET_MODE_SIZE (lhs_mode) > GET_MODE_SIZE (rhs_mode))
+ || (code == FIX_TRUNC_EXPR
+ && GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode)))
+   {
+ bool float_expr_p = code == FLOAT_EXPR;
+ scalar_mode imode = float_expr_p ? rhs_mode : lhs_mode;
+ fltsz = GET_MODE_SIZE (float_expr_p ? lhs_mode : rhs_mode);
+ code1 = float_expr_p ? code : NOP_EXPR;
+ codecvt1 = float_expr_p ? NOP_EXPR : code;
+ FOR_EACH_2XWIDER_MODE (rhs_mode_iter, imode)
+   {
+ imode = rhs_mode_iter.require ();
+ if (GET_MODE_SIZE (imode) > fltsz)
+   break;
+
+ cvt_type
+   = build_nonstandard_integer_type (GET_MODE_BITSIZE (imode),
+ 0);
+ cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type,
+ slp_node);
+ /* This should only happened for SLP as long as loop vectorizer
+only supports same-sized vector.  */
+ if (cvt_type == NULL_TREE
+ || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
+ || !supportable_convert_operation ((tree_code) code1,
+vectype_out,
+cvt_type, &tc1)
+ || !supportable_convert_operation ((tree_code) codecvt1,
+cvt_type,
+vectype_in, &tc2))
+   continue;
+
+ found_mode = true;
+ break;
+   }
+
+ if (found_mode)
+   {
+ multi_step_cvt++;
+   

Re: [PATCH v7 2/6] libstdc++: use new built-in trait __is_reference for std::is_reference

2023-06-20 Thread Patrick Palka via Gcc-patches
On Mon, 12 Jun 2023, Ken Matsui via Libstdc++ wrote:

> This patch gets std::is_reference to dispatch to new built-in trait
> __is_reference.
> 
> libstdc++-v3/ChangeLog:
> 
>   * include/std/type_traits (is_reference): Use __is_reference built-in
>   trait.
>   (is_reference_v): Likewise.

LGTM

> 
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 14 ++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 0e7a9c9c7f3..2a14df7e5f9 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -639,6 +639,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>// Composite type categories.
>  
>/// is_reference
> +#if __has_builtin(__is_reference)
> +  template
> +struct is_reference
> +: public __bool_constant<__is_reference(_Tp)>
> +{ };
> +#else
>template
>  struct is_reference
>  : public false_type
> @@ -653,6 +659,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  struct is_reference<_Tp&&>
>  : public true_type
>  { };
> +#endif
>  
>/// is_arithmetic
>template
> @@ -3192,12 +3199,19 @@ template 
>inline constexpr bool is_class_v = __is_class(_Tp);
>  template 
>inline constexpr bool is_function_v = is_function<_Tp>::value;
> +
> +#if __has_builtin(__is_reference)
> +template 
> +  inline constexpr bool is_reference_v = __is_reference(_Tp);
> +#else
>  template 
>inline constexpr bool is_reference_v = false;
>  template 
>inline constexpr bool is_reference_v<_Tp&> = true;
>  template 
>inline constexpr bool is_reference_v<_Tp&&> = true;
> +#endif
> +
>  template 
>inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
>  template 
> -- 
> 2.41.0
> 
> 



Re: [PATCH v7 1/6] c++: implement __is_reference built-in trait

2023-06-20 Thread Patrick Palka via Gcc-patches
On Mon, 12 Jun 2023, Ken Matsui via Libstdc++ wrote:

> This patch implements built-in trait for std::is_reference.
> 
> gcc/cp/ChangeLog:
> 
>   * cp-trait.def: Define __is_reference.
>   * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_REFERENCE.
>   * semantics.cc (trait_expr_value): Likewise.
>   (finish_trait_expr): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/ext/has-builtin-1.C: Test existence of __is_reference.
>   * g++.dg/ext/is_reference.C: New test.

LGTM

> 
> Signed-off-by: Ken Matsui 
> ---
>  gcc/cp/constraint.cc |  3 +++
>  gcc/cp/cp-trait.def  |  1 +
>  gcc/cp/semantics.cc  |  4 +++
>  gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
>  gcc/testsuite/g++.dg/ext/is_reference.C  | 34 
>  5 files changed, 45 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/ext/is_reference.C
> 
> diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
> index 8cf0f2d0974..f6951ee2670 100644
> --- a/gcc/cp/constraint.cc
> +++ b/gcc/cp/constraint.cc
> @@ -3705,6 +3705,9 @@ diagnose_trait_expr (tree expr, tree args)
>  case CPTK_HAS_VIRTUAL_DESTRUCTOR:
>inform (loc, "  %qT does not have a virtual destructor", t1);
>break;
> +case CPTK_IS_REFERENCE:
> +  inform (loc, "  %qT is not a reference", t1);
> +  break;
>  case CPTK_IS_ABSTRACT:
>inform (loc, "  %qT is not an abstract class", t1);
>break;
> diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> index 8b7fece0cc8..1e3310cd682 100644
> --- a/gcc/cp/cp-trait.def
> +++ b/gcc/cp/cp-trait.def
> @@ -67,6 +67,7 @@ DEFTRAIT_EXPR (IS_CONVERTIBLE, "__is_convertible", 2)
>  DEFTRAIT_EXPR (IS_EMPTY, "__is_empty", 1)
>  DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1)
>  DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1)
> +DEFTRAIT_EXPR (IS_REFERENCE, "__is_reference", 1)
>  DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2)
>  DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1)
>  DEFTRAIT_EXPR (IS_NOTHROW_ASSIGNABLE, "__is_nothrow_assignable", 2)
> diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> index a2e74a5d2c7..2f37bc353a1 100644
> --- a/gcc/cp/semantics.cc
> +++ b/gcc/cp/semantics.cc
> @@ -12075,6 +12075,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, 
> tree type2)
>  case CPTK_IS_FINAL:
>return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
>  
> +case CPTK_IS_REFERENCE:
> +  return type_code1 == REFERENCE_TYPE;
> +
>  case CPTK_IS_LAYOUT_COMPATIBLE:
>return layout_compatible_type_p (type1, type2);
>  
> @@ -12289,6 +12292,7 @@ finish_trait_expr (location_t loc, cp_trait_kind 
> kind, tree type1, tree type2)
>  case CPTK_IS_ENUM:
>  case CPTK_IS_UNION:
>  case CPTK_IS_SAME:
> +case CPTK_IS_REFERENCE:
>break;
>  
>  case CPTK_IS_LAYOUT_COMPATIBLE:
> diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
> b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> index f343e153e56..b697673790c 100644
> --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> @@ -146,3 +146,6 @@
>  #if !__has_builtin (__remove_cvref)
>  # error "__has_builtin (__remove_cvref) failed"
>  #endif
> +#if !__has_builtin (__is_reference)
> +# error "__has_builtin (__is_reference) failed"
> +#endif
> diff --git a/gcc/testsuite/g++.dg/ext/is_reference.C 
> b/gcc/testsuite/g++.dg/ext/is_reference.C
> new file mode 100644
> index 000..b5ce4db7afd
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/ext/is_reference.C
> @@ -0,0 +1,34 @@
> +// { dg-do compile { target c++11 } }
> +
> +#include 
> +
> +using namespace __gnu_test;
> +
> +#define SA(X) static_assert((X),#X)
> +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)\
> +  SA(TRAIT(TYPE) == EXPECT); \
> +  SA(TRAIT(const TYPE) == EXPECT);   \
> +  SA(TRAIT(volatile TYPE) == EXPECT);\
> +  SA(TRAIT(const volatile TYPE) == EXPECT)
> +
> +// Positive tests.
> +SA_TEST_CATEGORY(__is_reference, int&, true);
> +SA_TEST_CATEGORY(__is_reference, ClassType&, true);
> +SA(__is_reference(int(&)(int)));
> +SA_TEST_CATEGORY(__is_reference, int&&, true);
> +SA_TEST_CATEGORY(__is_reference, ClassType&&, true);
> +SA(__is_reference(int(&&)(int)));
> +SA_TEST_CATEGORY(__is_reference, IncompleteClass&, true);
> +
> +// Negative tests
> +SA_TEST_CATEGORY(__is_reference, void, false);
> +SA_TEST_CATEGORY(__is_reference, int*, false);
> +SA_TEST_CATEGORY(__is_reference, int[3], false);
> +SA(!__is_reference(int(int)));
> +SA(!__is_reference(int(*const)(int)));
> +SA(!__is_reference(int(*volatile)(int)));
> +SA(!__is_reference(int(*const volatile)(int)));
> +
> +// Sanity check.
> +SA_TEST_CATEGORY(__is_reference, ClassType, false);
> +SA_TEST_CATEGORY(__is_reference, IncompleteClass, false);
> -- 
> 2.41.0
> 
> 



Re: [PATCH v7 3/6] c++: implement __is_function built-in trait

2023-06-20 Thread Patrick Palka via Gcc-patches
On Mon, 12 Jun 2023, Ken Matsui via Gcc-patches wrote:

> This patch implements built-in trait for std::is_function.
> 
> gcc/cp/ChangeLog:
> 
>   * cp-trait.def: Define __is_function.
>   * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_FUNCTION.
>   * semantics.cc (trait_expr_value): Likewise.
>   (finish_trait_expr): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/ext/has-builtin-1.C: Test existence of __is_function.
>   * g++.dg/ext/is_function.C: New test.

LGTM

> 
> Signed-off-by: Ken Matsui 
> ---
>  gcc/cp/constraint.cc |  3 ++
>  gcc/cp/cp-trait.def  |  1 +
>  gcc/cp/semantics.cc  |  4 ++
>  gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
>  gcc/testsuite/g++.dg/ext/is_function.C   | 58 
>  5 files changed, 69 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C
> 
> diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
> index f6951ee2670..927605c6cb7 100644
> --- a/gcc/cp/constraint.cc
> +++ b/gcc/cp/constraint.cc
> @@ -3754,6 +3754,9 @@ diagnose_trait_expr (tree expr, tree args)
>  case CPTK_IS_UNION:
>inform (loc, "  %qT is not a union", t1);
>break;
> +case CPTK_IS_FUNCTION:
> +  inform (loc, "  %qT is not a function", t1);
> +  break;
>  case CPTK_IS_AGGREGATE:
>inform (loc, "  %qT is not an aggregate", t1);
>break;
> diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> index 1e3310cd682..3cd3babc242 100644
> --- a/gcc/cp/cp-trait.def
> +++ b/gcc/cp/cp-trait.def
> @@ -83,6 +83,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
> "__is_trivially_assignable", 2)
>  DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", 
> -1)
>  DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
>  DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
> +DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
>  DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
> "__reference_constructs_from_temporary", 2)
>  DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
> "__reference_converts_from_temporary", 2)
>  /* FIXME Added space to avoid direct usage in GCC 13.  */
> diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> index 2f37bc353a1..b976633645a 100644
> --- a/gcc/cp/semantics.cc
> +++ b/gcc/cp/semantics.cc
> @@ -12072,6 +12072,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, 
> tree type2)
>  case CPTK_IS_ENUM:
>return type_code1 == ENUMERAL_TYPE;
>  
> +case CPTK_IS_FUNCTION:
> +  return type_code1 == FUNCTION_TYPE;
> +
>  case CPTK_IS_FINAL:
>return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
>  
> @@ -12293,6 +12296,7 @@ finish_trait_expr (location_t loc, cp_trait_kind 
> kind, tree type1, tree type2)
>  case CPTK_IS_UNION:
>  case CPTK_IS_SAME:
>  case CPTK_IS_REFERENCE:
> +case CPTK_IS_FUNCTION:
>break;
>  
>  case CPTK_IS_LAYOUT_COMPATIBLE:
> diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
> b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> index b697673790c..90eb00ebf2d 100644
> --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> @@ -149,3 +149,6 @@
>  #if !__has_builtin (__is_reference)
>  # error "__has_builtin (__is_reference) failed"
>  #endif
> +#if !__has_builtin (__is_function)
> +# error "__has_builtin (__is_function) failed"
> +#endif
> diff --git a/gcc/testsuite/g++.dg/ext/is_function.C 
> b/gcc/testsuite/g++.dg/ext/is_function.C
> new file mode 100644
> index 000..2e1594b12ad
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/ext/is_function.C
> @@ -0,0 +1,58 @@
> +// { dg-do compile { target c++11 } }
> +
> +#include 
> +
> +using namespace __gnu_test;
> +
> +#define SA(X) static_assert((X),#X)
> +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)\
> +  SA(TRAIT(TYPE) == EXPECT); \
> +  SA(TRAIT(const TYPE) == EXPECT);   \
> +  SA(TRAIT(volatile TYPE) == EXPECT);\
> +  SA(TRAIT(const volatile TYPE) == EXPECT)
> +
> +struct A
> +{ void fn(); };
> +
> +template
> +struct AHolder { };
> +
> +template
> +struct AHolder
> +{ using type = U; };
> +
> +// Positive tests.
> +SA(__is_function(int (int)));
> +SA(__is_function(ClassType (ClassType)));
> +SA(__is_function(float (int, float, int[], int&)));
> +SA(__is_function(int (int, ...)));
> +SA(__is_function(bool (ClassType) const));
> +SA(__is_function(AHolder::type));
> +
> +void fn();
> +SA(__is_function(decltype(fn)));
> +
> +// Negative tests.
> +SA_TEST_CATEGORY(__is_function, int, false);
> +SA_TEST_CATEGORY(__is_function, int*, false);
> +SA_TEST_CATEGORY(__is_function, int&, false);
> +SA_TEST_CATEGORY(__is_function, void, false);
> +SA_TEST_CATEGORY(__is_function, void*, false);
> +SA_TEST_CATEGORY(__is_function, void**, false);
> +SA_TEST_CATEGORY(__is_function, std::nullptr_t, false);
> +
> +SA_TEST_CATEGORY(__is_function, Abst

Re: [PATCH v7 4/6] libstdc++: use new built-in trait __is_function for std::is_function

2023-06-20 Thread Patrick Palka via Gcc-patches
On Mon, 12 Jun 2023, Ken Matsui via Libstdc++ wrote:

> This patch gets std::is_function to dispatch to new built-in trait
> __is_function.
> 
> libstdc++-v3/ChangeLog:
> 
>   * include/std/type_traits (is_function): Use __is_function built-in
>   trait.
>   (is_function_v): Likewise.

LGTM

> 
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 13 +
>  1 file changed, 13 insertions(+)
> 
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 2a14df7e5f9..954b57518de 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -594,6 +594,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  { };
>  
>/// is_function
> +#if __has_builtin(__is_function)
> +  template
> +struct is_function
> +: public __bool_constant<__is_function(_Tp)>
> +{ };
> +#else
>template
>  struct is_function
>  : public __bool_constant::value> { };
> @@ -605,6 +611,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>template
>  struct is_function<_Tp&&>
>  : public false_type { };
> +#endif
>  
>  #define __cpp_lib_is_null_pointer 201309L
>  
> @@ -3197,8 +3204,14 @@ template 
>inline constexpr bool is_union_v = __is_union(_Tp);
>  template 
>inline constexpr bool is_class_v = __is_class(_Tp);
> +
> +#if __has_builtin(__is_function)
> +template 
> +  inline constexpr bool is_function_v = __is_function(_Tp);
> +#else
>  template 
>inline constexpr bool is_function_v = is_function<_Tp>::value;
> +#endif
>  
>  #if __has_builtin(__is_reference)
>  template 
> -- 
> 2.41.0
> 
> 



Re: [PATCH v7 5/6] c++, libstdc++: implement __is_void built-in trait

2023-06-20 Thread Patrick Palka via Gcc-patches
On Mon, 12 Jun 2023, Ken Matsui via Libstdc++ wrote:

> This patch implements built-in trait for std::is_void. Since the new built-in
> name is __is_void, to avoid unintentional macro replacement, this patch also
> involves the removal of the existing __is_void in helper_functions.h and
> cpp_type_traits.h and renaming __is_void to is_void in the test file,
> pr46567.C.

Hmm, I suspect an __is_void built-in won't show an improvement over
the current is_void implementation in terms of four explicit specializations.
And given the __is_void name conflict in cpp_type_traits.h (which means
GCC trunk will reject older libstdc++ headers at least until we get
smarter about how we recognize built-ins), I'm leaning towards not
implementing an __is_void built-in for now.

In that case we should probably define a built-in for is_object since
we can no longer implement it solely in terms of other built-ins, and
fortunately the name __is_object seems to never have been used in
libstdc++ so we won't have to deal with any name conflicts unlike with
__is_void.

Jonathan, what do you think?

> 
> gcc/cp/ChangeLog:
> 
>   * cp-trait.def: Define __is_void.
>   * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_VOID.
>   * semantics.cc (trait_expr_value): Likewise.
>   (finish_trait_expr): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/tm/pr46567.C (__is_void): Rename to ...
>   (is_void): ... this.
>   * g++.dg/ext/has-builtin-1.C: Test existence of __is_void.
>   * g++.dg/ext/is_void.C: New test.
> 
> libstdc++-v3/ChangeLog:
> 
>   * include/debug/helper_functions.h (_DiffTraits): Stop using
>   __is_void.
>   * include/bits/cpp_type_traits.h (__is_void): Remove unused __is_void.
>   * include/std/type_traits (is_void_v): Use __is_void built-in
>   trait.
> 
> Signed-off-by: Ken Matsui 
> ---
>  gcc/cp/constraint.cc  |  3 ++
>  gcc/cp/cp-trait.def   |  1 +
>  gcc/cp/semantics.cc   |  4 +++
>  gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  3 ++
>  gcc/testsuite/g++.dg/ext/is_void.C| 35 +++
>  gcc/testsuite/g++.dg/tm/pr46567.C |  6 ++--
>  libstdc++-v3/include/bits/cpp_type_traits.h   | 15 
>  libstdc++-v3/include/debug/helper_functions.h |  5 ++-
>  libstdc++-v3/include/std/type_traits  |  6 
>  9 files changed, 57 insertions(+), 21 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/ext/is_void.C
> 
> diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
> index 927605c6cb7..e8cd98eb2c7 100644
> --- a/gcc/cp/constraint.cc
> +++ b/gcc/cp/constraint.cc
> @@ -3757,6 +3757,9 @@ diagnose_trait_expr (tree expr, tree args)
>  case CPTK_IS_FUNCTION:
>inform (loc, "  %qT is not a function", t1);
>break;
> +case CPTK_IS_VOID:
> +  inform (loc, "  %qT is not a void type", t1);
> +  break;
>  case CPTK_IS_AGGREGATE:
>inform (loc, "  %qT is not an aggregate", t1);
>break;
> diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> index 3cd3babc242..8e76668f6ed 100644
> --- a/gcc/cp/cp-trait.def
> +++ b/gcc/cp/cp-trait.def
> @@ -84,6 +84,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, 
> "__is_trivially_constructible", -1)
>  DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
>  DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
>  DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
> +DEFTRAIT_EXPR (IS_VOID, "__is_void", 1)
>  DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
> "__reference_constructs_from_temporary", 2)
>  DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
> "__reference_converts_from_temporary", 2)
>  /* FIXME Added space to avoid direct usage in GCC 13.  */
> diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> index b976633645a..c4d44413dce 100644
> --- a/gcc/cp/semantics.cc
> +++ b/gcc/cp/semantics.cc
> @@ -12075,6 +12075,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, 
> tree type2)
>  case CPTK_IS_FUNCTION:
>return type_code1 == FUNCTION_TYPE;
>  
> +case CPTK_IS_VOID:
> +  return VOID_TYPE_P (type1);
> +
>  case CPTK_IS_FINAL:
>return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
>  
> @@ -12297,6 +12300,7 @@ finish_trait_expr (location_t loc, cp_trait_kind 
> kind, tree type1, tree type2)
>  case CPTK_IS_SAME:
>  case CPTK_IS_REFERENCE:
>  case CPTK_IS_FUNCTION:
> +case CPTK_IS_VOID:
>break;
>  
>  case CPTK_IS_LAYOUT_COMPATIBLE:
> diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
> b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> index 90eb00ebf2d..b96cc9e6f50 100644
> --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> @@ -152,3 +152,6 @@
>  #if !__has_builtin (__is_function)
>  # error "__has_builtin (__is_function) failed"
>  #endif
> +#if !__has_builtin (__is_void)
> +# error "__has_builtin (__is_void) failed"
> +#endif
> diff --git 

Re: [PATCH] libcpp: Improve location for macro names [PR66290]

2023-06-20 Thread David Malcolm via Gcc-patches
On Fri, 2022-08-05 at 12:04 -0400, Lewis Hyatt via Gcc-patches wrote:
> 
> When libcpp reports diagnostics whose locus is a macro name (such as
> for
> -Wunused-macros), it uses the location in the cpp_macro object that
> was
> stored by _cpp_new_macro. This is currently set to pfile-
> >directive_line,
> which contains the line number only and no column information. This
> patch
> changes the stored location to the src_loc for the token defining the
> macro
> name, which includes the location and range information.

Sorry about the delay in reviewing this.

The patch looks good to me

Thanks
Dave


> 
> libcpp/ChangeLog:
> 
> PR c++/66290
> * macro.cc (_cpp_create_definition): Add location argument.
> * internal.h (_cpp_create_definition): Adjust prototype.
> * directives.cc (do_define): Pass new location argument to
> _cpp_create_definition.
> (do_undef): Stop passing inferior location to
> cpp_warning_with_line;
> the default from cpp_warning is better.
> (cpp_pop_definition): Pass new location argument to
> _cpp_create_definition.
> * pch.cc (cpp_read_state): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> PR c++/66290
> * c-c++-common/cpp/macro-ranges.c: New test.
> * c-c++-common/cpp/line-2.c: Adapt to check for column
> information
> on macro-related libcpp warnings.
> * c-c++-common/cpp/line-3.c: Likewise.
> * c-c++-common/cpp/macro-arg-count-1.c: Likewise.
> * c-c++-common/cpp/pr58844-1.c: Likewise.
> * c-c++-common/cpp/pr58844-2.c: Likewise.
> * c-c++-common/cpp/warning-zero-location.c: Likewise.
> * c-c++-common/pragma-diag-14.c: Likewise.
> * c-c++-common/pragma-diag-15.c: Likewise.
> * g++.dg/modules/macro-2_d.C: Likewise.
> * g++.dg/modules/macro-4_d.C: Likewise.
> * g++.dg/modules/macro-4_e.C: Likewise.
> * g++.dg/spellcheck-macro-ordering.C: Likewise.
> * gcc.dg/builtin-redefine.c: Likewise.
> * gcc.dg/cpp/Wunused.c: Likewise.
> * gcc.dg/cpp/redef2.c: Likewise.
> * gcc.dg/cpp/redef3.c: Likewise.
> * gcc.dg/cpp/redef4.c: Likewise.
> * gcc.dg/cpp/ucnid-11-utf8.c: Likewise.
> * gcc.dg/cpp/ucnid-11.c: Likewise.
> * gcc.dg/cpp/undef2.c: Likewise.
> * gcc.dg/cpp/warn-redefined-2.c: Likewise.
> * gcc.dg/cpp/warn-redefined.c: Likewise.
> * gcc.dg/cpp/warn-unused-macros-2.c: Likewise.
> * gcc.dg/cpp/warn-unused-macros.c: Likewise.
> ---
> 
> Notes:
>     Hello-
>     
>     The PR (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66290) was
> originally
>     about the entirely wrong location for -Wunused-macros in C++
> mode, which
>     behavior was fixed by r13-1903, but before closing it out I
> wanted to also
>     address a second point brought up in the PR comments, namely that
> we do not
>     include column information when emitting diagnostics for macro
> names, such as
>     is done for -Wunused-macros. The attached patch updates the
> location stored in
>     the cpp_macro object so that it includes the column and range
> information for
>     the token comprising the macro name; previously, the location was
> just the
>     generic one pointing to the whole line.
>     
>     The change to libcpp is very small, the reason for all the
> testsuite changes is
>     that I have updated all tests explicitly looking for the
> columnless diagnostics
>     (with the "-:" syntax to dg-warning et al) so that they expect a
> column
>     instead. I also added a new test which verifies the expected
> range information
>     in diagnostics with carets.
>     
>     Bootstrap + regtest on x86-64 Linux looks good. Please let me
> know if it looks
>     OK? Thanks!
>     
>     -Lewis
> 
>  libcpp/directives.cc  |  13 +-
>  libcpp/internal.h |   2 +-
>  libcpp/macro.cc   |  12 +-
>  libcpp/pch.cc |   2 +-
>  gcc/testsuite/c-c++-common/cpp/line-2.c   |   2 +-
>  gcc/testsuite/c-c++-common/cpp/line-3.c   |   2 +-
>  .../c-c++-common/cpp/macro-arg-count-1.c  |   4 +-
>  gcc/testsuite/c-c++-common/cpp/macro-ranges.c |  52 ++
>  gcc/testsuite/c-c++-common/cpp/pr58844-1.c    |   4 +-
>  gcc/testsuite/c-c++-common/cpp/pr58844-2.c    |   4 +-
>  .../c-c++-common/cpp/warning-zero-location.c  |   2 +-
>  gcc/testsuite/c-c++-common/pragma-diag-14.c   |   2 +-
>  gcc/testsuite/c-c++-common/pragma-diag-15.c   |   2 +-
>  gcc/testsuite/g++.dg/modules/macro-2_d.C  |   4 +-
>  gcc/testsuite/g++.dg/modules/macro-4_d.C  |   4 +-
>  gcc/testsuite/g++.dg/modules/macro-4_e.C  |   2 +-
>  .../g++.dg/spellcheck-macro-ordering.C    |   2 +-
>  gcc/testsuite/gcc.dg/builtin-redefine.c   |  18 +-
>  gcc/testsuite/gcc.dg/cpp/Wunused.c    |   6 +-
>  gcc/testsuite/gcc.dg/cpp/redef2.c |

Re: [PATCH v7 0/6] c++, libstdc++: get std::is_object to dispatch to new built-in traits

2023-06-20 Thread Patrick Palka via Gcc-patches
On Thu, 15 Jun 2023, Ken Matsui via Libstdc++ wrote:

> Hi,
> 
> For those curious about the performance improvements of this patch, I
> conducted a benchmark that instantiates 256k specializations of
> is_object_v based on Patrick's code. You can find the benchmark code
> at this link:
> 
> https://github.com/ken-matsui/gcc-benches/blob/main/is_object_benchmark.cc
> 
> On my computer, using the gcc HEAD of this patch for a release build,
> the patch with -DUSE_BUILTIN took 64% less time and used 44-47% less
> memory compared to not using it.

That's more like it :D  Though the benchmark should also invoke the
trait on non-object types too, e.g. Instantiator& or Instantiator(int).

> 
> Sincerely,
> Ken Matsui
> 
> On Mon, Jun 12, 2023 at 3:49 PM Ken Matsui  wrote:
> >
> > Hi,
> >
> > This patch series gets std::is_object to dispatch to built-in traits and
> > implements the following built-in traits, on which std::object depends.
> >
> > * __is_reference
> > * __is_function
> > * __is_void
> >
> > std::is_object was depending on them with disjunction and negation.
> >
> > __not_<__or_, is_reference<_Tp>, is_void<_Tp>>>::type
> >
> > Therefore, this patch uses them directly instead of implementing an 
> > additional
> > built-in trait __is_object, which makes the compiler slightly bigger and
> > slower.
> >
> > __bool_constant > __is_void(_Tp))>
> >
> > This would instantiate only __bool_constant and 
> > __bool_constant,
> > which can be mostly shared. That is, the purpose of built-in traits is
> > considered as achieved.
> >
> > Changes in v7
> >
> > * Removed an unnecessary new line.
> >
> > Ken Matsui (6):
> >   c++: implement __is_reference built-in trait
> >   libstdc++: use new built-in trait __is_reference for std::is_reference
> >   c++: implement __is_function built-in trait
> >   libstdc++: use new built-in trait __is_function for std::is_function
> >   c++, libstdc++: implement __is_void built-in trait
> >   libstdc++: make std::is_object dispatch to new built-in traits
> >
> >  gcc/cp/constraint.cc  |  9 +++
> >  gcc/cp/cp-trait.def   |  3 +
> >  gcc/cp/semantics.cc   | 12 
> >  gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  9 +++
> >  gcc/testsuite/g++.dg/ext/is_function.C| 58 +++
> >  gcc/testsuite/g++.dg/ext/is_reference.C   | 34 +++
> >  gcc/testsuite/g++.dg/ext/is_void.C| 35 +++
> >  gcc/testsuite/g++.dg/tm/pr46567.C |  6 +-
> >  libstdc++-v3/include/bits/cpp_type_traits.h   | 15 -
> >  libstdc++-v3/include/debug/helper_functions.h |  5 +-
> >  libstdc++-v3/include/std/type_traits  | 51 
> >  11 files changed, 216 insertions(+), 21 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C
> >  create mode 100644 gcc/testsuite/g++.dg/ext/is_reference.C
> >  create mode 100644 gcc/testsuite/g++.dg/ext/is_void.C
> >
> > --
> > 2.41.0
> >
> 
> 


Re: [PATCH] RISC-V: Fix out of range memory access of machine mode table

2023-06-20 Thread Jakub Jelinek via Gcc-patches
On Tue, Jun 20, 2023 at 02:08:07PM +, Li, Pan2 via Gcc-patches wrote:
> Thanks Jakub for the explanation, I have a try like below patch but I am not 
> quite sure it is expected, and where should I put the assertion.
> 
> > If yes, it needs to
> > be unsigned short, if not, we should add an assertion (e.g. on streaming
> > in the LTO table) that MAX_MACHINE_MODE <= 256.
> 
> diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
> index 2cb83406db5..93ef97ec5d3 100644
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -1985,8 +1985,6 @@ lto_input_mode_table (struct lto_file_decl_data 
> *file_data)
>  internal_error ("cannot read LTO mode table from %s",
>   file_data->file_name);
>  
> -  unsigned char *table = ggc_cleared_vec_alloc (1 << 8);
> -  file_data->mode_table = table;
>const struct lto_simple_header_with_strings *header
>  = (const struct lto_simple_header_with_strings *) data;
>int string_offset;
> @@ -1994,6 +1992,9 @@ lto_input_mode_table (struct lto_file_decl_data 
> *file_data)
>string_offset = sizeof (*header) + header->main_size;
>  
>lto_input_block ib (data + sizeof (*header), header->main_size, NULL);
> +  unsigned char *table = ggc_cleared_vec_alloc (
> +1 << ib.mode_bits);
> +  file_data->mode_table = table;
>data_in = lto_data_in_create (file_data, data + string_offset,
>   header->string_size, vNULL);
>bitpack_d bp = streamer_read_bitpack (&ib);

Your ib.mode_bits is again the same ceil_log2 (MAX_MACHINE_MODE) value.
You need to stream that value out in lto-streamer-out.cc as perhaps the
first thing in the bitpack and stream it back here, so some
   mode_bits = bp_unpack_value (&bp, 5);
or so (perhaps 4 would be enough if we only support up to 15 bits for mode).
I.e. tell the offloading compiler what value had the host compiler when
streaming LTO out.

Then move those 3 lines from the above after that.  I'd put it next to
mode_table, so file_data->mode_bits.

The 
  unsigned char *table = ggc_cleared_vec_alloc (
1 << ib.mode_bits);
formatting is wrong, ( shouldn't if at all possible be the last character
on a line.
In this case,
> --- a/gcc/lto-streamer.h
> +++ b/gcc/lto-streamer.h
> @@ -352,6 +352,8 @@ public:
>  
>const char *data;
>const unsigned char *mode_table;
> +  /* Indicates how many bits of one machine mode will have.  */
> +  const unsigned int mode_bits = ceil_log2 (MAX_MACHINE_MODE) ;

As I said earlier, I'd put it elsewhere.  The formatting is wrong
(no space before semicolon) and please don't add NSDMIs in structures
which don't have them already.

>  inline machine_mode
>  bp_unpack_machine_mode (struct bitpack_d *bp)
>  {
> -  return (machine_mode)
> -((class lto_input_block *)
> - bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +  lto_input_block *input_block =  (class lto_input_block *)bp->stream;

Wrong formatting again, there shouldn't be two consecutive spaces in there.  On 
the
other hand, there should be a space between *) and bp.

> +  int index = bp_unpack_enum (bp, machine_mode, input_block->mode_bits);
> +
> +  return (machine_mode)input_block->mode_table[index];

And here similarly.

Jakub



Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Lehua Ding
> All done. Welcome Lehua.
I have received the system notification email, thank you very much.


Best,
Lehua

[V10][PATCH 1/3] Introduce IR bit TYPE_INCLUDES_FLEXARRAY for the GCC extension [PR77650]

2023-06-20 Thread Qing Zhao via Gcc-patches
on a structure with a C99 flexible array member being nested in
another structure

GCC extension accepts the case when a struct with a flexible array member
is embedded into another struct or union (possibly recursively) as the last
field.
This patch is to introduce the IR bit TYPE_INCLUDES_FLEXARRAY (reuse the
existing IR bit TYPE_NO_NAMED_ARGS_SATDARG_P), set it correctly in C FE,
stream it correctly in Middle-end, and print it during IR dumping.

PR C/77650

gcc/c/ChangeLog:

* c-decl.cc (finish_struct): Set TYPE_INCLUDES_FLEXARRAY for
struct/union type.

gcc/lto/ChangeLog:

* lto-common.cc (compare_tree_sccs_1): Compare bit
TYPE_NO_NAMED_ARGS_STDARG_P or TYPE_INCLUDES_FLEXARRAY properly
for its corresponding type.

gcc/ChangeLog:

* print-tree.cc (print_node): Print new bit type_include_flexarray.
* tree-core.h (struct tree_type_common): Use bit no_named_args_stdarg_p
as type_include_flexarray for RECORD_TYPE or UNION_TYPE.
* tree-streamer-in.cc (unpack_ts_type_common_value_fields): Stream
in bit no_named_args_stdarg_p properly for its corresponding type.
* tree-streamer-out.cc (pack_ts_type_common_value_fields): Stream
out bit no_named_args_stdarg_p properly for its corresponding type.
* tree.h (TYPE_INCLUDES_FLEXARRAY): New macro TYPE_INCLUDES_FLEXARRAY.
---
 gcc/c/c-decl.cc  | 11 +++
 gcc/lto/lto-common.cc|  5 -
 gcc/print-tree.cc|  5 +
 gcc/tree-core.h  |  2 ++
 gcc/tree-streamer-in.cc  |  5 -
 gcc/tree-streamer-out.cc |  5 -
 gcc/tree.h   |  7 ++-
 7 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 1af51c4acfc..e14f514cb6e 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -9267,6 +9267,17 @@ finish_struct (location_t loc, tree t, tree fieldlist, 
tree attributes,
   /* Set DECL_NOT_FLEXARRAY flag for FIELD_DECL x.  */
   DECL_NOT_FLEXARRAY (x) = !is_flexible_array_member_p (is_last_field, x);
 
+  /* Set TYPE_INCLUDES_FLEXARRAY for the context of x, t.
+when x is an array and is the last field.  */
+  if (TREE_CODE (TREE_TYPE (x)) == ARRAY_TYPE)
+   TYPE_INCLUDES_FLEXARRAY (t)
+ = is_last_field && flexible_array_member_type_p (TREE_TYPE (x));
+  /* Recursively set TYPE_INCLUDES_FLEXARRAY for the context of x, t
+when x is an union or record and is the last field.  */
+  else if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (x)))
+   TYPE_INCLUDES_FLEXARRAY (t)
+ = is_last_field && TYPE_INCLUDES_FLEXARRAY (TREE_TYPE (x));
+
   if (DECL_NAME (x)
  || RECORD_OR_UNION_TYPE_P (TREE_TYPE (x)))
saw_named_field = true;
diff --git a/gcc/lto/lto-common.cc b/gcc/lto/lto-common.cc
index 537570204b3..f6b85bbc6f7 100644
--- a/gcc/lto/lto-common.cc
+++ b/gcc/lto/lto-common.cc
@@ -1275,7 +1275,10 @@ compare_tree_sccs_1 (tree t1, tree t2, tree **map)
   if (AGGREGATE_TYPE_P (t1))
compare_values (TYPE_TYPELESS_STORAGE);
   compare_values (TYPE_EMPTY_P);
-  compare_values (TYPE_NO_NAMED_ARGS_STDARG_P);
+  if (FUNC_OR_METHOD_TYPE_P (t1))
+   compare_values (TYPE_NO_NAMED_ARGS_STDARG_P);
+  if (RECORD_OR_UNION_TYPE_P (t1))
+   compare_values (TYPE_INCLUDES_FLEXARRAY);
   compare_values (TYPE_PACKED);
   compare_values (TYPE_RESTRICT);
   compare_values (TYPE_USER_ALIGN);
diff --git a/gcc/print-tree.cc b/gcc/print-tree.cc
index ccecd3dc6a7..62451b6cf4e 100644
--- a/gcc/print-tree.cc
+++ b/gcc/print-tree.cc
@@ -632,6 +632,11 @@ print_node (FILE *file, const char *prefix, tree node, int 
indent,
  && TYPE_CXX_ODR_P (node))
fputs (" cxx-odr-p", file);
 
+  if ((code == RECORD_TYPE
+  || code == UNION_TYPE)
+ && TYPE_INCLUDES_FLEXARRAY (node))
+   fputs (" includes-flexarray", file);
+
   /* The transparent-union flag is used for different things in
 different nodes.  */
   if ((code == UNION_TYPE || code == RECORD_TYPE)
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index c48a12b378f..668808a29d0 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1713,6 +1713,8 @@ struct GTY(()) tree_type_common {
   unsigned typeless_storage : 1;
   unsigned empty_flag : 1;
   unsigned indivisible_p : 1;
+  /* TYPE_NO_NAMED_ARGS_STDARG_P for a stdarg function.
+ Or TYPE_INCLUDES_FLEXARRAY for RECORD_TYPE and UNION_TYPE.  */
   unsigned no_named_args_stdarg_p : 1;
   unsigned spare : 1;
 
diff --git a/gcc/tree-streamer-in.cc b/gcc/tree-streamer-in.cc
index c803800862c..be2bdbb7699 100644
--- a/gcc/tree-streamer-in.cc
+++ b/gcc/tree-streamer-in.cc
@@ -386,7 +386,10 @@ unpack_ts_type_common_value_fields (struct bitpack_d *bp, 
tree expr)
   if (AGGREGATE_TYPE_P (expr))
 TYPE_TYPELESS_STORAGE (expr) = (unsigned) bp_unpack_value (bp, 1);
   TYPE_EMPTY_P (expr) = (unsigned) bp_unpack_value (bp, 1);
-  TYPE_NO_NAMED_ARGS_STDAR

[V10][PATCH 3/3] Use TYPE_INCLUDES_FLEXARRAY in __builtin_object_size [PR101832]

2023-06-20 Thread Qing Zhao via Gcc-patches
__builtin_object_size should treat struct with TYPE_INCLUDES_FLEXARRAY as
flexible size.

gcc/ChangeLog:

PR tree-optimization/101832
* tree-object-size.cc (addr_object_size): Handle structure/union type
when it has flexible size.

gcc/testsuite/ChangeLog:

PR tree-optimization/101832
* gcc.dg/builtin-object-size-pr101832.c: New test.
---
 .../gcc.dg/builtin-object-size-pr101832.c | 134 ++
 gcc/tree-object-size.cc   |  23 ++-
 2 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c

diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c 
b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
new file mode 100644
index 000..60078e11634
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
@@ -0,0 +1,134 @@
+/* PR 101832: 
+   GCC extension accepts the case when a struct with a C99 flexible array
+   member is embedded into another struct (possibly recursively).
+   __builtin_object_size will treat such struct as flexible size.
+   However, when a structure with non-C99 flexible array member, i.e, trailing
+   [0], [1], or [4], is embedded into anther struct, the stucture will not
+   be treated as flexible size.  */ 
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "builtin-object-size-common.h"
+
+#define expect(p, _v) do { \
+  size_t v = _v; \
+  if (p == v) \
+__builtin_printf ("ok:  %s == %zd\n", #p, p); \
+  else {\
+__builtin_printf ("WAT: %s == %zd (expected %zd)\n", #p, p, v); \
+FAIL (); \
+  } \
+} while (0);
+
+
+struct A {
+  int n;
+  char data[];
+};
+
+struct B {
+  int m;
+  struct A a;
+};
+
+struct C {
+  int q;
+  struct B b;
+};
+
+struct A0 {
+  int n;
+  char data[0];
+};
+
+struct B0 {
+  int m;
+  struct A0 a;
+};
+
+struct C0 {
+  int q;
+  struct B0 b;
+};
+
+struct A1 {
+  int n;
+  char data[1];
+};
+
+struct B1 {
+  int m;
+  struct A1 a;
+};
+
+struct C1 {
+  int q;
+  struct B1 b;
+};
+
+struct An {
+  int n;
+  char data[8];
+};
+
+struct Bn {
+  int m;
+  struct An a;
+};
+
+struct Cn {
+  int q;
+  struct Bn b;
+};
+
+volatile void *magic1, *magic2;
+
+int main (int argc, char *argv[])
+{
+  struct B *outer;
+  struct C *outest;
+
+  /* Make sure optimization can't find some other object size. */
+  outer = (void *)magic1;
+  outest = (void *)magic2;
+
+  expect (__builtin_object_size (&outer->a, 1), -1);
+  expect (__builtin_object_size (&outest->b, 1), -1);
+  expect (__builtin_object_size (&outest->b.a, 1), -1);
+
+  struct B0 *outer0;
+  struct C0 *outest0;
+
+  /* Make sure optimization can't find some other object size. */
+  outer0 = (void *)magic1;
+  outest0 = (void *)magic2;
+
+  expect (__builtin_object_size (&outer0->a, 1), sizeof (outer0->a));
+  expect (__builtin_object_size (&outest0->b, 1), sizeof (outest0->b));
+  expect (__builtin_object_size (&outest0->b.a, 1), sizeof (outest0->b.a));
+
+  struct B1 *outer1;
+  struct C1 *outest1;
+
+  /* Make sure optimization can't find some other object size. */
+  outer1 = (void *)magic1;
+  outest1 = (void *)magic2;
+
+  expect (__builtin_object_size (&outer1->a, 1), sizeof (outer1->a));
+  expect (__builtin_object_size (&outest1->b, 1), sizeof (outest1->b));
+  expect (__builtin_object_size (&outest1->b.a, 1), sizeof (outest1->b.a));
+
+  struct Bn *outern;
+  struct Cn *outestn;
+
+  /* Make sure optimization can't find some other object size. */
+  outern = (void *)magic1;
+  outestn = (void *)magic2;
+
+  expect (__builtin_object_size (&outern->a, 1), sizeof (outern->a));
+  expect (__builtin_object_size (&outestn->b, 1), sizeof (outestn->b));
+  expect (__builtin_object_size (&outestn->b.a, 1), sizeof (outestn->b.a));
+
+  DONE ();
+  return 0;
+}
diff --git a/gcc/tree-object-size.cc b/gcc/tree-object-size.cc
index 9a936a91983..a62af050056 100644
--- a/gcc/tree-object-size.cc
+++ b/gcc/tree-object-size.cc
@@ -633,11 +633,32 @@ addr_object_size (struct object_size_info *osi, 
const_tree ptr,
v = NULL_TREE;
break;
  case COMPONENT_REF:
-   if (TREE_CODE (TREE_TYPE (v)) != ARRAY_TYPE)
+   /* When the ref is not to an aggregate type, i.e, an array,
+  a record or a union, it will not have flexible size,
+  compute the object size directly.  */
+   if (!AGGREGATE_TYPE_P (TREE_TYPE (v)))
  {
v = NULL_TREE;
break;
  }
+   /* if the ref is to a record or union type, but the type
+  does not include a flexible array recursively, compute
+  the object size directly.  */
+   if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (v)))
+ {
+   if (!TYPE_INCLUDES_FLEXARRAY (TREE_TYPE (v)))
+   

[V10][PATCH 2/3] Update documentation to clarify a GCC extension [PR77650]

2023-06-20 Thread Qing Zhao via Gcc-patches
on a structure with a C99 flexible array member being nested in
another structure.

"The GCC extension accepts a structure containing an ISO C99 "flexible array
member", or a union containing such a structure (possibly recursively)
to be a member of a structure.

 There are two situations:

   * A structure containing a C99 flexible array member, or a union
 containing such a structure, is the last field of another structure,
 for example:

  struct flex  { int length; char data[]; };
  union union_flex { int others; struct flex f; };

  struct out_flex_struct { int m; struct flex flex_data; };
  struct out_flex_union { int n; union union_flex flex_data; };

 In the above, both 'out_flex_struct.flex_data.data[]' and
 'out_flex_union.flex_data.f.data[]' are considered as flexible
 arrays too.

   * A structure containing a C99 flexible array member, or a union
 containing such a structure, is not the last field of another structure,
 for example:

  struct flex  { int length; char data[]; };

  struct mid_flex { int m; struct flex flex_data; int n; };

 In the above, accessing a member of the array 'mid_flex.flex_data.data[]'
 might have undefined behavior.  Compilers do not handle such a case
 consistently, Any code relying on this case should be modified to ensure
 that flexible array members only end up at the ends of structures.

 Please use the warning option '-Wflex-array-member-not-at-end' to
 identify all such cases in the source code and modify them.  This extension
 is now deprecated.
"

PR C/77650

gcc/c-family/ChangeLog:

* c.opt: New option -Wflex-array-member-not-at-end.

gcc/c/ChangeLog:

* c-decl.cc (finish_struct): Issue warnings for new option.

gcc/ChangeLog:

* doc/extend.texi: Document GCC extension on a structure containing
a flexible array member to be a member of another structure.

gcc/testsuite/ChangeLog:

* gcc.dg/variable-sized-type-flex-array.c: New test.
---
 gcc/c-family/c.opt|  5 +++
 gcc/c/c-decl.cc   |  9 
 gcc/doc/extend.texi   | 44 ++-
 .../gcc.dg/variable-sized-type-flex-array.c   | 31 +
 4 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/variable-sized-type-flex-array.c

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index cead1995561..fce66b1d225 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -737,6 +737,11 @@ Wformat-truncation=
 C ObjC C++ LTO ObjC++ Joined RejectNegative UInteger Var(warn_format_trunc) 
Warning LangEnabledBy(C ObjC C++ LTO ObjC++,Wformat=, warn_format >= 1, 0) 
IntegerRange(0, 2)
 Warn about calls to snprintf and similar functions that truncate output.
 
+Wflex-array-member-not-at-end
+C C++ Var(warn_flex_array_member_not_at_end) Warning
+Warn when a structure containing a C99 flexible array member as the last
+field is not at the end of another structure.
+
 Wif-not-aligned
 C ObjC C++ ObjC++ Var(warn_if_not_aligned) Init(1) Warning
 Warn when the field in a struct is not aligned.
diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index e14f514cb6e..ecd10ebb69c 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -9278,6 +9278,15 @@ finish_struct (location_t loc, tree t, tree fieldlist, 
tree attributes,
TYPE_INCLUDES_FLEXARRAY (t)
  = is_last_field && TYPE_INCLUDES_FLEXARRAY (TREE_TYPE (x));
 
+  if (warn_flex_array_member_not_at_end
+ && !is_last_field
+ && RECORD_OR_UNION_TYPE_P (TREE_TYPE (x))
+ && TYPE_INCLUDES_FLEXARRAY (TREE_TYPE (x)))
+   warning_at (DECL_SOURCE_LOCATION (x),
+   OPT_Wflex_array_member_not_at_end,
+   "structure containing a flexible array member"
+   " is not at the end of another structure");
+
   if (DECL_NAME (x)
  || RECORD_OR_UNION_TYPE_P (TREE_TYPE (x)))
saw_named_field = true;
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index cdbd4b34a35..fbb1d9708ba 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -1751,7 +1751,49 @@ Flexible array members may only appear as the last 
member of a
 A structure containing a flexible array member, or a union containing
 such a structure (possibly recursively), may not be a member of a
 structure or an element of an array.  (However, these uses are
-permitted by GCC as extensions.)
+permitted by GCC as extensions, see details below.)
+@end itemize
+
+The GCC extension accepts a structure containing an ISO C99 @dfn{flexible array
+member}, or a union containing such a structure (possibly recursively)
+to be a member of a structure.
+
+There are two situations:
+
+@itemize @bullet
+@item
+A structure containing a C99 flexible array member, or a union containing
+such a structure, is the last field of another structure, for example:
+
+@s

[V10][PATCH 0/3] Accept and Handle the case when a structure including a FAM nested in another structure

2023-06-20 Thread Qing Zhao via Gcc-patches
This is the 10th version of the patch, which rebased on the latest trunk.
This is an important patch needed by Linux Kernel security project. 

compared to the 9th patch, the ONLY change is:

I split the 1st patch into the following two patches:

patch 1/3:  Introduce IR bit TYPE_INCLUDES_FLEXARRAY for the GCC extension 
[PR77650]
patch 3/3:  Use TYPE_INCLUDES_FLEXARRAY in __builtin_object_size[PR101832]

so, in patch 1/3 includes all the FE changes, changes in Middle-end to support
the FE changes.
patch 3/3 only includes the change in gcc/tree-object-size.cc and the
testing case to support PR101832. 


Now, there are 3 patches:

patch 1/3:  Introduce IR bit TYPE_INCLUDES_FLEXARRAY for the GCC extension 
[PR77650]
patch 2/3:  Update documentation to clarify a GCC extension [PR77650]
patch 3/3:  Use TYPE_INCLUDES_FLEXARRAY in __builtin_object_size[PR101832]

among the above, the patch 1/3 and the patch 2/3 have been reviewed and
approved.

the patch 3/3 is a very simple change to use the new bit
TYPE_INCLUDES_FLEXARRAY in __builtin_object_size.

I have bootstrapped and regression-tested on both aarch64 and x86. no
issues.

I will commit the patch 1/3 and 2/3 tomorrow since they have been
approved already.

for the patch 3/3, I will wait for several days, if there is no
objection or new comments, I will commit it the end of this week.

Please let me know if you have comments and suggestions.


thanks.
Qing


Qing Zhao (3):
  Introduce IR bit TYPE_INCLUDES_FLEXARRAY for the GCC extension
[PR77650]
  Update documentation to clarify a GCC extension [PR77650]
  Use TYPE_INCLUDES_FLEXARRAY in __builtin_object_size [PR101832]

-- 
2.31.1



Re: [PATCH 1/2] c++: implement __remove_pointer built-in trait

2023-06-20 Thread Patrick Palka via Gcc-patches
On Sat, 17 Jun 2023, Ken Matsui via Gcc-patches wrote:

> Hi,
> 
> I conducted a benchmark for remove_pointer as well as is_object. Just
> like the is_object benchmark, here is the benchmark code:
> 
> https://github.com/ken-matsui/gcc-benches/blob/main/remove_pointer_benchmark.cc
> 
> On my computer, using the gcc HEAD of this patch for a release build,
> the patch with -DUSE_BUILTIN took 8.7% less time and used 4.3-4.9%
> less memory on average compared to not using it. Although the
> performance improvement was not as significant as with is_object, the
> benchmark demonstrated that the compilation was consistently more
> efficient.

Thanks for the benchmark.  The improvement is lesser than I expected,
but that might be because the benchmark is "biased":

  template 
  struct Instantiator : Instantiator {
  static_assert(!std::is_pointer_v>);
  };

This only invokes remove_pointer_t on the non-pointer type Instantiator,
and so the benchmark doesn't factor in the performance of the trait when
invoked on pointer types, and traits typically will have different
performance characteristics depending on the kind of type it's given.

To more holistically assess the real-world performance of the trait the
benchmark should also consider pointer types and maybe also cv-qualified
types (given that the original implementation is in terms of
__remove_cv_t and thus performance of the original implementation may be
sensitive to cv-qualification).  So we should probably uniformly
benchmark these classes of types, via doing e.g.:

  static_assert(!std::is_pointer_v>);
  static_assert(!std::is_pointer_v>);
  static_assert(!std::is_pointer_v>);
  static_assert(!std::is_pointer_v>);

(We could consider other kinds of types too, e.g. reference types and
integral types, but it seems clear based on the implementations being
benchmarked that performance won't be sensitive to reference-ness
or integral-ness.)

> 
> Sincerely,
> Ken Matsui
> 
> On Thu, Jun 15, 2023 at 5:22 AM Ken Matsui  wrote:
> >
> > This patch implements built-in trait for std::remove_pointer.
> >
> > gcc/cp/ChangeLog:
> >
> > * cp-trait.def: Define __remove_pointer.
> > * semantics.cc (finish_trait_type): Handle CPTK_REMOVE_POINTER.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * g++.dg/ext/has-builtin-1.C: Test existence of __remove_pointer.
> > * g++.dg/ext/remove_pointer.C: New test.
> >
> > Signed-off-by: Ken Matsui 
> > ---
> >  gcc/cp/cp-trait.def   |  1 +
> >  gcc/cp/semantics.cc   |  4 ++
> >  gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  3 ++
> >  gcc/testsuite/g++.dg/ext/remove_pointer.C | 51 +++
> >  4 files changed, 59 insertions(+)
> >  create mode 100644 gcc/testsuite/g++.dg/ext/remove_pointer.C
> >
> > diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> > index 8b7fece0cc8..07823e55579 100644
> > --- a/gcc/cp/cp-trait.def
> > +++ b/gcc/cp/cp-trait.def
> > @@ -90,6 +90,7 @@ DEFTRAIT_EXPR (IS_DEDUCIBLE, "__is_deducible ", 2)
> >  DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
> >  DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
> >  DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
> > +DEFTRAIT_TYPE (REMOVE_POINTER, "__remove_pointer", 1)
> >  DEFTRAIT_TYPE (UNDERLYING_TYPE,  "__underlying_type", 1)
> >  DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1)
> >
> > diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> > index 8fb47fd179e..885c7a6fb64 100644
> > --- a/gcc/cp/semantics.cc
> > +++ b/gcc/cp/semantics.cc
> > @@ -12373,6 +12373,10 @@ finish_trait_type (cp_trait_kind kind, tree type1, 
> > tree type2,
> >if (TYPE_REF_P (type1))
> > type1 = TREE_TYPE (type1);
> >return cv_unqualified (type1);
> > +case CPTK_REMOVE_POINTER:
> > +  if (TYPE_PTR_P (type1))
> > +type1 = TREE_TYPE (type1);
> > +  return type1;

Maybe add a newline before the 'case' to visually separate it from the
previous 'case'?  LGTM otherwise, thanks!

> >
> >  case CPTK_TYPE_PACK_ELEMENT:
> >return finish_type_pack_element (type1, type2, complain);
> > diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
> > b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> > index f343e153e56..e21e0a95509 100644
> > --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> > +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> > @@ -146,3 +146,6 @@
> >  #if !__has_builtin (__remove_cvref)
> >  # error "__has_builtin (__remove_cvref) failed"
> >  #endif
> > +#if !__has_builtin (__remove_pointer)
> > +# error "__has_builtin (__remove_pointer) failed"
> > +#endif
> > diff --git a/gcc/testsuite/g++.dg/ext/remove_pointer.C 
> > b/gcc/testsuite/g++.dg/ext/remove_pointer.C
> > new file mode 100644
> > index 000..7b13db93950
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/ext/remove_pointer.C
> > @@ -0,0 +1,51 @@
> > +// { dg-do compile { target c++11 } }
> > +
> > +#define SA(X) static_assert((X),#X)
> > +
> > +SA(__is_same(__remove_point

Re: Re: [PATCH] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer

2023-06-20 Thread 钟居哲
Thanks Richi's comments.
I have sent a new patch with addressing your comments.

I am so sorry V2 patch just broke on boostrap.
I sent the V3 patch that fixed it.

Thanks.


juzhe.zh...@rivai.ai
 
From: Richard Biener
Date: 2023-06-20 20:18
To: Ju-Zhe Zhong
CC: gcc-patches; richard.sandiford
Subject: Re: [PATCH] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer
On Tue, 20 Jun 2023, juzhe.zh...@rivai.ai wrote:
 
> From: Ju-Zhe Zhong 
> 
> This patch is apply LEN_MASK_{LOAD,STORE} into vectorizer.
> I refactor gimple IR build to make codes look cleaner.
> 
> gcc/ChangeLog:
> 
> * internal-fn.cc (expand_partial_store_optab_fn): Add 
> LEN_MASK_{LOAD,STORE} vectorizer support.
> (internal_load_fn_p): Ditto.
> (internal_store_fn_p): Ditto.
> (internal_fn_mask_index): Ditto.
> (internal_fn_stored_value_index): Ditto.
> (internal_len_load_store_bias): Ditto.
> * optabs-query.cc (can_vec_mask_load_store_p): Ditto.
> (get_len_load_store_mode): Ditto.
> * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
> (get_all_ones_mask): New function.
> (vectorizable_store): Add LEN_MASK_{LOAD,STORE} vectorizer support.
> (vectorizable_load): Ditto.
> 
> ---
>  gcc/internal-fn.cc |  35 +-
>  gcc/optabs-query.cc|  25 +++-
>  gcc/tree-vect-stmts.cc | 259 +
>  3 files changed, 213 insertions(+), 106 deletions(-)
> 
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index c911ae790cb..e10c21de5f1 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -2949,7 +2949,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
> convert_optab optab)
>   * OPTAB.  */
>  
>  static void
> -expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> +expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
> optab)
>  {
>class expand_operand ops[5];
>tree type, lhs, rhs, maskt, biast;
> @@ -2957,7 +2957,7 @@ expand_partial_store_optab_fn (internal_fn, gcall 
> *stmt, convert_optab optab)
>insn_code icode;
>  
>maskt = gimple_call_arg (stmt, 2);
> -  rhs = gimple_call_arg (stmt, 3);
> +  rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
>type = TREE_TYPE (rhs);
>lhs = expand_call_mem_ref (type, stmt, 0);
>  
> @@ -4435,6 +4435,7 @@ internal_load_fn_p (internal_fn fn)
>  case IFN_GATHER_LOAD:
>  case IFN_MASK_GATHER_LOAD:
>  case IFN_LEN_LOAD:
> +case IFN_LEN_MASK_LOAD:
>return true;
>  
>  default:
> @@ -4455,6 +4456,7 @@ internal_store_fn_p (internal_fn fn)
>  case IFN_SCATTER_STORE:
>  case IFN_MASK_SCATTER_STORE:
>  case IFN_LEN_STORE:
> +case IFN_LEN_MASK_STORE:
>return true;
>  
>  default:
> @@ -4494,6 +4496,10 @@ internal_fn_mask_index (internal_fn fn)
>  case IFN_MASK_STORE_LANES:
>return 2;
>  
> +case IFN_LEN_MASK_LOAD:
> +case IFN_LEN_MASK_STORE:
> +  return 3;
> +
>  case IFN_MASK_GATHER_LOAD:
>  case IFN_MASK_SCATTER_STORE:
>return 4;
> @@ -4519,6 +4525,9 @@ internal_fn_stored_value_index (internal_fn fn)
>  case IFN_LEN_STORE:
>return 3;
>  
> +case IFN_LEN_MASK_STORE:
> +  return 4;
> +
>  default:
>return -1;
>  }
> @@ -4583,13 +4592,31 @@ internal_len_load_store_bias (internal_fn ifn, 
> machine_mode mode)
>  {
>optab optab = direct_internal_fn_optab (ifn);
>insn_code icode = direct_optab_handler (optab, mode);
> +  int bias_argno = 3;
> +  if (icode == CODE_FOR_nothing)
> +{
> +  machine_mode mask_mode
> + = targetm.vectorize.get_mask_mode (mode).require ();
> +  if (ifn == IFN_LEN_LOAD)
> + {
> +   /* Try LEN_MASK_LOAD.  */
> +   optab = direct_internal_fn_optab (IFN_LEN_MASK_LOAD);
> + }
> +  else
> + {
> +   /* Try LEN_MASK_STORE.  */
> +   optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE);
> + }
> +  icode = convert_optab_handler (optab, mode, mask_mode);
> +  bias_argno = 4;
> +}
>  
>if (icode != CODE_FOR_nothing)
>  {
>/* For now we only support biases of 0 or -1.  Try both of them.  */
> -  if (insn_operand_matches (icode, 3, GEN_INT (0)))
> +  if (insn_operand_matches (icode, bias_argno, GEN_INT (0)))
>  return 0;
> -  if (insn_operand_matches (icode, 3, GEN_INT (-1)))
> +  if (insn_operand_matches (icode, bias_argno, GEN_INT (-1)))
>  return -1;
>  }
>  
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index 276f8408dd7..4394d391200 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -566,11 +566,14 @@ can_vec_mask_load_store_p (machine_mode mode,
> bool is_load)
>  {
>optab op = is_load ? maskload_optab : maskstore_optab;
> +  optab len_op = is_load ? len_maskload_optab : len_maskstore_optab;
>machine_mode vmode;
>  
>/* If mode is vector mode, check it directly.  */
>if (VECTOR_MODE_P (mode))
> -r

[PATCH V3] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer

2023-06-20 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/ChangeLog:

* internal-fn.cc (expand_partial_store_optab_fn): Add 
LEN_MASK_{LOAD,STORE} vectorizer support.
(internal_load_fn_p): Ditto.
(internal_store_fn_p): Ditto.
(internal_fn_mask_index): Ditto.
(internal_fn_stored_value_index): Ditto.
(internal_len_load_store_bias): Ditto.
* optabs-query.cc (can_vec_mask_load_store_p): Ditto.
(get_len_load_store_mode): Ditto.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
(get_all_ones_mask): New function.
(vectorizable_store): Add LEN_MASK_{LOAD,STORE} vectorizer support.
(vectorizable_load): Ditto.
D
---
 gcc/internal-fn.cc |  35 +-
 gcc/optabs-query.cc|  25 -
 gcc/tree-vect-stmts.cc | 234 ++---
 3 files changed, 227 insertions(+), 67 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index c911ae790cb..e10c21de5f1 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -2949,7 +2949,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
  * OPTAB.  */
 
 static void
-expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
optab)
 {
   class expand_operand ops[5];
   tree type, lhs, rhs, maskt, biast;
@@ -2957,7 +2957,7 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
   insn_code icode;
 
   maskt = gimple_call_arg (stmt, 2);
-  rhs = gimple_call_arg (stmt, 3);
+  rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
   type = TREE_TYPE (rhs);
   lhs = expand_call_mem_ref (type, stmt, 0);
 
@@ -4435,6 +4435,7 @@ internal_load_fn_p (internal_fn fn)
 case IFN_GATHER_LOAD:
 case IFN_MASK_GATHER_LOAD:
 case IFN_LEN_LOAD:
+case IFN_LEN_MASK_LOAD:
   return true;
 
 default:
@@ -4455,6 +4456,7 @@ internal_store_fn_p (internal_fn fn)
 case IFN_SCATTER_STORE:
 case IFN_MASK_SCATTER_STORE:
 case IFN_LEN_STORE:
+case IFN_LEN_MASK_STORE:
   return true;
 
 default:
@@ -4494,6 +4496,10 @@ internal_fn_mask_index (internal_fn fn)
 case IFN_MASK_STORE_LANES:
   return 2;
 
+case IFN_LEN_MASK_LOAD:
+case IFN_LEN_MASK_STORE:
+  return 3;
+
 case IFN_MASK_GATHER_LOAD:
 case IFN_MASK_SCATTER_STORE:
   return 4;
@@ -4519,6 +4525,9 @@ internal_fn_stored_value_index (internal_fn fn)
 case IFN_LEN_STORE:
   return 3;
 
+case IFN_LEN_MASK_STORE:
+  return 4;
+
 default:
   return -1;
 }
@@ -4583,13 +4592,31 @@ internal_len_load_store_bias (internal_fn ifn, 
machine_mode mode)
 {
   optab optab = direct_internal_fn_optab (ifn);
   insn_code icode = direct_optab_handler (optab, mode);
+  int bias_argno = 3;
+  if (icode == CODE_FOR_nothing)
+{
+  machine_mode mask_mode
+   = targetm.vectorize.get_mask_mode (mode).require ();
+  if (ifn == IFN_LEN_LOAD)
+   {
+ /* Try LEN_MASK_LOAD.  */
+ optab = direct_internal_fn_optab (IFN_LEN_MASK_LOAD);
+   }
+  else
+   {
+ /* Try LEN_MASK_STORE.  */
+ optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE);
+   }
+  icode = convert_optab_handler (optab, mode, mask_mode);
+  bias_argno = 4;
+}
 
   if (icode != CODE_FOR_nothing)
 {
   /* For now we only support biases of 0 or -1.  Try both of them.  */
-  if (insn_operand_matches (icode, 3, GEN_INT (0)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (0)))
return 0;
-  if (insn_operand_matches (icode, 3, GEN_INT (-1)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (-1)))
return -1;
 }
 
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 276f8408dd7..4394d391200 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -566,11 +566,14 @@ can_vec_mask_load_store_p (machine_mode mode,
   bool is_load)
 {
   optab op = is_load ? maskload_optab : maskstore_optab;
+  optab len_op = is_load ? len_maskload_optab : len_maskstore_optab;
   machine_mode vmode;
 
   /* If mode is vector mode, check it directly.  */
   if (VECTOR_MODE_P (mode))
-return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
+return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing
+  || convert_optab_handler (len_op, mode, mask_mode)
+   != CODE_FOR_nothing;
 
   /* Otherwise, return true if there is some vector mode with
  the mask load/store supported.  */
@@ -584,7 +587,9 @@ can_vec_mask_load_store_p (machine_mode mode,
   vmode = targetm.vectorize.preferred_simd_mode (smode);
   if (VECTOR_MODE_P (vmode)
   && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
-  && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
+  && (convert_optab_handler (op, vmode, mask_mode) != CODE

Re: [committed] libstdc++: Optimize std::to_array for trivial types [PR110167]

2023-06-20 Thread Jonathan Wakely via Gcc-patches
On Tue, 20 Jun 2023 at 09:29, Jonathan Wakely  wrote:

>
>
> On Tue, 20 Jun 2023 at 01:54, Patrick Palka  wrote:
>
>> On Fri, 9 Jun 2023, Jonathan Wakely via Libstdc++ wrote:
>>
>> > Tested powerpc64le-linux. Pushed to trunk.
>> >
>> > This makes sense to backport after some soak time on trunk.
>> >
>> > -- >8 --
>> >
>> > As reported in PR libstdc++/110167, std::to_array compiles extremely
>> > slowly for very large arrays. It needs to instantiate a very large
>> > specialization of std::index_sequence and then create a very large
>> > aggregate initializer from the pack expansion. For trivial types we can
>> > simply default-initialize the std::array and then use memcpy to copy the
>> > values. For non-trivial types we need to use the existing
>> > implementation, despite the compilation cost.
>> >
>> > As also noted in the PR, using a generic lambda instead of the
>> > __to_array helper compiles faster since gcc-13. It also produces
>> > slightly smaller code at -O1, due to additional inlining. The code at
>> > -Os, -O2 and -O3 seems to be the same. This new implementation requires
>> > __cpp_generic_lambdas >= 201707L (i.e. P0428R2) but that is supported
>> > since Clang 10 and since Intel icc 2021.5.0 (and since GCC 10.1).
>> >
>> > libstdc++-v3/ChangeLog:
>> >
>> >   PR libstdc++/110167
>> >   * include/std/array (to_array): Initialize arrays of trivial
>> >   types using memcpy. For non-trivial types, use lambda
>> >   expressions instead of a separate helper function.
>> >   (__to_array): Remove.
>> >   * testsuite/23_containers/array/creation/110167.cc: New test.
>> > ---
>> >  libstdc++-v3/include/std/array| 53 +--
>> >  .../23_containers/array/creation/110167.cc| 14 +
>> >  2 files changed, 51 insertions(+), 16 deletions(-)
>> >  create mode 100644
>> libstdc++-v3/testsuite/23_containers/array/creation/110167.cc
>> >
>> > diff --git a/libstdc++-v3/include/std/array
>> b/libstdc++-v3/include/std/array
>> > index 70280c1beeb..b791d86ddb2 100644
>> > --- a/libstdc++-v3/include/std/array
>> > +++ b/libstdc++-v3/include/std/array
>> > @@ -414,19 +414,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >return std::move(std::get<_Int>(__arr));
>> >  }
>> >
>> > -#if __cplusplus > 201703L
>> > +#if __cplusplus >= 202002L && __cpp_generic_lambdas >= 201707L
>> >  #define __cpp_lib_to_array 201907L
>> > -
>> > -  template
>> > -constexpr array, sizeof...(_Idx)>
>> > -__to_array(_Tp (&__a)[sizeof...(_Idx)], index_sequence<_Idx...>)
>> > -{
>> > -  if constexpr (_Move)
>> > - return {{std::move(__a[_Idx])...}};
>> > -  else
>> > - return {{__a[_Idx]...}};
>> > -}
>> > -
>> >template
>> >  [[nodiscard]]
>> >  constexpr array, _Nm>
>> > @@ -436,8 +425,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >static_assert(!is_array_v<_Tp>);
>> >static_assert(is_constructible_v<_Tp, _Tp&>);
>> >if constexpr (is_constructible_v<_Tp, _Tp&>)
>> > - return __to_array(__a, make_index_sequence<_Nm>{});
>> > -  __builtin_unreachable(); // FIXME: see PR c++/91388
>> > + {
>> > +   if constexpr (is_trivial_v<_Tp> && _Nm != 0)
>>
>> redundant _Nm != 0 test?
>>
>
> Ah yes, I added it below to ensure we don't use memcpy with a null
> __arr.data() and forgot to remove it here.
>
>
>>
>> > + {
>> > +   array, _Nm> __arr;
>> > +   if (!__is_constant_evaluated() && _Nm != 0)
>> > + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
>> > +   else
>> > + for (size_t __i = 0; __i < _Nm; ++__i)
>> > +   __arr._M_elems[__i] = __a[__i];
>> > +   return __arr;
>> > + }
>> > +   else
>> > + return [&__a](index_sequence<_Idx...>) {
>> > +   return array, _Nm>{{ __a[_Idx]... }};
>> > + }(make_index_sequence<_Nm>{});
>> > + }
>> > +  else
>> > + __builtin_unreachable(); // FIXME: see PR c++/91388
>> >  }
>> >
>> >template
>> > @@ -449,8 +454,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >static_assert(!is_array_v<_Tp>);
>> >static_assert(is_move_constructible_v<_Tp>);
>> >if constexpr (is_move_constructible_v<_Tp>)
>> > - return __to_array<1>(__a, make_index_sequence<_Nm>{});
>> > -  __builtin_unreachable(); // FIXME: see PR c++/91388
>> > + {
>> > +   if constexpr (is_trivial_v<_Tp>)
>> > + {
>> > +   array, _Nm> __arr;
>> > +   if (!__is_constant_evaluated() && _Nm != 0)
>> > + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
>> > +   else
>> > + for (size_t __i = 0; __i < _Nm; ++__i)
>> > +   __arr._M_elems[__i] = std::move(__a[__i]);
>>
>> IIUC this std::move is unnecessary for trivial arrays?
>>
>
> Good point, thanks.
>
> That makes the lvalue and rvalue overloads identical for trivial types. It
> seems a shame to duplicate the code, so the rvalue one could do:
>
>  

Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Mark Wielaard
Hi all,

On Tue, 2023-06-20 at 07:11 -0600, Jeff Law wrote:
> On 6/20/23 04:56, Robin Dapp wrote:
> > > Could you merge it ?
> > > By the way, could Lehua get the write access?
> > 
> > IMHO nothing stands in the way but I'll defer to Jeff to have
> > the "official seal" :)
> > Once he ACKs Lehua needs to go the usual way of requesting
> > sourceware access via https://sourceware.org/cgi-bin/pdw/ps_form.cgi.
> Lehua fills out that form.  List me as the approver and the process will 
> run from there.  Takes a day or two for everything to get into place.

All done. Welcome Lehua.

> ps.  If Lehua has already filled out the form with Robin as the > approver, 
>that's fine too.  Might take a bit longer as I suspect the
> IT folks may not recognize Robin. 

Also Robin is right, you are on the hook as approver for the "official
seal" :) Because the "IT folks" check that the approver is listed as a
gcc maintainer and not just has write after approval status.

Cheers,

Mark


RE: [PATCH] RISC-V: Fix out of range memory access of machine mode table

2023-06-20 Thread Li, Pan2 via Gcc-patches
Thanks Jakub for the explanation, I have a try like below patch but I am not 
quite sure it is expected, and where should I put the assertion.

> If yes, it needs to
> be unsigned short, if not, we should add an assertion (e.g. on streaming
> in the LTO table) that MAX_MACHINE_MODE <= 256.

diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index 2cb83406db5..93ef97ec5d3 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1985,8 +1985,6 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
 internal_error ("cannot read LTO mode table from %s",
file_data->file_name);
 
-  unsigned char *table = ggc_cleared_vec_alloc (1 << 8);
-  file_data->mode_table = table;
   const struct lto_simple_header_with_strings *header
 = (const struct lto_simple_header_with_strings *) data;
   int string_offset;
@@ -1994,6 +1992,9 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
   string_offset = sizeof (*header) + header->main_size;
 
   lto_input_block ib (data + sizeof (*header), header->main_size, NULL);
+  unsigned char *table = ggc_cleared_vec_alloc (
+1 << ib.mode_bits);
+  file_data->mode_table = table;
   data_in = lto_data_in_create (file_data, data + string_offset,
header->string_size, vNULL);
   bitpack_d bp = streamer_read_bitpack (&ib);
@@ -2001,13 +2002,13 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
   table[VOIDmode] = VOIDmode;
   table[BLKmode] = BLKmode;
   unsigned int m;
-  while ((m = bp_unpack_value (&bp, 8)) != VOIDmode)
+  while ((m = bp_unpack_value (&bp, ib.mode_bits)) != VOIDmode)
 {
   enum mode_class mclass
= bp_unpack_enum (&bp, mode_class, MAX_MODE_CLASS);
   poly_uint16 size = bp_unpack_poly_value (&bp, 16);
   poly_uint16 prec = bp_unpack_poly_value (&bp, 16);
-  machine_mode inner = (machine_mode) bp_unpack_value (&bp, 8);
+  machine_mode inner = (machine_mode) bp_unpack_value (&bp, ib.mode_bits);
   poly_uint16 nunits = bp_unpack_poly_value (&bp, 16);
   unsigned int ibit = 0, fbit = 0;
   unsigned int real_fmt_len = 0;
@@ -2018,8 +2019,8 @@ lto_input_mode_table (struct lto_file_decl_data 
*file_data)
case MODE_UFRACT:
case MODE_ACCUM:
case MODE_UACCUM:
- ibit = bp_unpack_value (&bp, 8);
- fbit = bp_unpack_value (&bp, 8);
+ ibit = bp_unpack_value (&bp, ib.mode_bits);
+ fbit = bp_unpack_value (&bp, ib.mode_bits);
  break;
case MODE_FLOAT:
case MODE_DECIMAL_FLOAT:
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index fc7133d07ba..f1d826d59e4 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -352,6 +352,8 @@ public:
 
   const char *data;
   const unsigned char *mode_table;
+  /* Indicates how many bits of one machine mode will have.  */
+  const unsigned int mode_bits = ceil_log2 (MAX_MACHINE_MODE) ;
   unsigned int p;
   unsigned int len;
 };
diff --git a/gcc/tree-streamer.cc b/gcc/tree-streamer.cc
index ed65a7692e3..a28ef9c7920 100644
--- a/gcc/tree-streamer.cc
+++ b/gcc/tree-streamer.cc
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
During streaming in, we translate the on the disk mode using this
table.  For normal LTO it is set to identity, for ACCEL_COMPILER
depending on the mode_table content.  */
-unsigned char streamer_mode_table[1 << 8];
+unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 
 /* Check that all the TS_* structures handled by the streamer_write_* and
streamer_read_* routines are exactly ALL the structures defined in
diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
index 170d61cf20b..9aa248cd2f5 100644
--- a/gcc/tree-streamer.h
+++ b/gcc/tree-streamer.h
@@ -75,7 +75,7 @@ void streamer_write_tree_body (struct output_block *, tree);
 void streamer_write_integer_cst (struct output_block *, tree);
 
 /* In tree-streamer.cc.  */
-extern unsigned char streamer_mode_table[1 << 8];
+extern unsigned char streamer_mode_table[MAX_MACHINE_MODE];
 void streamer_check_handled_ts_structures (void);
 bool streamer_tree_cache_insert (struct streamer_tree_cache_d *, tree,
 hashval_t, unsigned *);
@@ -108,15 +108,18 @@ inline void
 bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
 {
   streamer_mode_table[mode] = 1;
-  bp_pack_enum (bp, machine_mode, 1 << 8, mode);
+  int last = 1 << ceil_log2 (MAX_MACHINE_MODE);
+
+  bp_pack_enum (bp, machine_mode, last, mode);
 }
 
 inline machine_mode
 bp_unpack_machine_mode (struct bitpack_d *bp)
 {
-  return (machine_mode)
-  ((class lto_input_block *)
-   bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
+  lto_input_block *input_block =  (class lto_input_block *)bp->stream;
+  int index = bp_unpack_enum (bp, machine_mode, input_block->mode_bits);
+
+  return (machine_mode)input_block->mode_table[index];
 }
 
 #endif  /* GCC_TREE_STREAMER_H  */

Pan

-Or

RE: [PATCH v2] RISC-V: Set the natural size of constant vector mask modes to one RVV data vector.

2023-06-20 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff and Juzhe.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Tuesday, June 20, 2023 10:12 PM
To: juzhe.zh...@rivai.ai; Li Xu ; gcc-patches 

Cc: kito.cheng ; palmer 
Subject: Re: [PATCH v2] RISC-V: Set the natural size of constant vector mask 
modes to one RVV data vector.



On 6/20/23 00:47, juzhe.zh...@rivai.ai wrote:
> LGTM. Thanks!
OK for the trunk, of course.
jeff


Re: [PATCH v2] RISC-V: Set the natural size of constant vector mask modes to one RVV data vector.

2023-06-20 Thread Jeff Law via Gcc-patches




On 6/20/23 00:47, juzhe.zh...@rivai.ai wrote:

LGTM. Thanks!

OK for the trunk, of course.
jeff


Re: [PATCH][RFC] c-family: Implement __has_feature and __has_extension [PR60512]

2023-06-20 Thread Iain Sandoe
Hi Alex

again, thanks for working on this and for fixing the SDK blocker.

> On 20 Jun 2023, at 13:30, Alex Coplan  wrote:
> 

> The patch can now survive bootstrap on Darwin (it looks like we'll need
> to adjust some Objective-C++ tests in light of the new pedwarn, but that
> looks to be straightforward).

Yes, I’ll deal with that soon (I was trying to decide whether to fix the the
header we have copied from GNUStep, or whether to mark it as a system
header).

>> (one reason to allow target opt-in/out of specific features)
>> 
>>> with the following omissions:
>> 
>>> - Objective-C-specific features.
>> 
>> I can clearly append the objective-c(++) cases to the end of the respective
>> lists, but then we need to make them conditional on language, version and
>> dialect (some will not be appropriate to GNU runtime).
>> 
>> this is why I think we need more flexible predicates on declaring features
>> and extensions.
> 
> Would it help mitigate these concerns if I implemented some Objective-C
> features as part of this patch (say, those implemented by your WIP
> patch)?
> 
> My feeling is that the vast majority of extensions / features have
> similar logic, so we should exploit that redundancy to keep things terse
> in the encoding for the general case. Where we need more flexible
> predicates (e.g. for objc_nonfragile_abi in your WIP patch), those can
> be handled on a case-by-case basis by adding a new enumerator and logic
> to handle that specially.
> 
> What do you think, does that sound OK to you?

Sketching out what you have in mind using one or two examples would be
helpful.  Again, the fact that some of the answers are target-dependent, is
what makes me think of needing a little more generality.

>> What about things like this:
>> 
>> attribute_availability_tvos, 
>> attribute_availability_watchos, 
>> attribute_availability_driverkit, 
> 
> FWIW, clang looks to define these unconditionally, so restricting these
> to a given target would be deviating from its precedent.

Hmm.. i did not check that although (for the sake of keeping target-specific
code localised) my current availabilty attribute implementation is Darwin-
specific.

Having said that, interoperability with clang is also a very useful goal - for
Darwin, the SDK headers have only been (fully) tested with clang up to
now and I am sure we will find more gotchas as we expand what we can
parse.

> However, I don't think it would be hard to extend the implementation in
> this patch to support target-specific features if required. I think
> perhaps a langhook that targets can call to add their own features would
> be a reasonable approach.

Indeed, that could work if the result is needed later than pre-processing.

In my patch, IIRC, I added another entry to the libcpp callbacks to handle
target-specific __has_ queries.

cheers
Iain




Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Robin Dapp via Gcc-patches
Hi,

I'm going to commit the attached.  Thanks Lehua for reporting.

Regards
 Robin


>From 1a4dfe90f251e38e27104f2fa11feecd3b04c4c1 Mon Sep 17 00:00:00 2001
From: Robin Dapp 
Date: Tue, 20 Jun 2023 15:52:16 +0200
Subject: [PATCH] RISC-V: testsuite: Add missing -mabi=lp64d.

This fixes more cases of missing -mabi=lp64d.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c: Add
-mabi=lp64d.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c: Dito.
---
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c  | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
index c32c31ecd69..9ed7c4f1205 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b 
-fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -mabi=lp64d 
-fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
 
 #include 
 #include 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
index 34efd5f700a..9cb167a8cdc 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -Wno-pedantic 
-Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
index 5f3168a320a..2837ff58e2d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -Wno-pedantic 
-Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
index 7210327a4ff..47f30ed79f1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -Wno-pedantic 
-Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
index c5cb56a88c7..f7169f07506 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -Wno-pedantic 
-Wno-psabi" } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
index 28f11150f8f..3d60e635869 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls

RE: [PATCH V3] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Li, Pan2 via Gcc-patches
Committed, thanks Robin and Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Tuesday, June 20, 2023 9:18 PM
To: Robin Dapp ; Juzhe-Zhong ; 
gcc-patches@gcc.gnu.org
Cc: kito.ch...@gmail.com; kito.ch...@sifive.com; pal...@dabbelt.com; 
pal...@rivosinc.com
Subject: Re: [PATCH V3] RISC-V: Optimize codegen of VLA SLP



On 6/20/23 03:01, Robin Dapp wrote:
> LGTM.
Likewise -- that V2/V3 is a nice improvement over the original V1 approach.

jeff


Re: [COMMITTED] ada: Add CHERI intrinsic bindings and helper functions.

2023-06-20 Thread Marc Poulhiès via Gcc-patches
Hi,

>> The package Interfaces.CHERI provides intrinsic bindings and
>> helper functions to allow software to query, create, and
>> manipulate CHERI capabilities.
>
> I'm curious what the motivation for these intrinsic wrappers is, given that
> GCC trunk doesn't currently support them. Out of interest, can you share what
> the use case for these is?

We share the same Ada frontend with different GCC compilers and
contribute it in GCC's master branch.

You're correct that this particular change is not useful (yet) with
master, but we are testing/using it with a CHERI-aware GCC.

Does that answer your question?

Marc


Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Robin Dapp via Gcc-patches
> By the way, shouldn't these cases have the `-mabi=lp64d` option added,
> otherwise I get the following failure message when I run tests on RV32 GCC.
> 
>   FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c -std=c99 -O3 
> -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax (test for 
> excess errors)
>   Excess errors.
>   cc1: error: ABI requires '-march=rv32'

Arg, yes definitely, sorry.  I keep forgetting this... Will fix.

Regards
 Robin



Re: [PATCH V2] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer

2023-06-20 Thread juzhe.zh...@rivai.ai
Hi, Richi. I am not sure why I failed to send this patch to @sues email at the 
first time.
Now I cc your gmail.

This is the V2 patch addressing your comments for V1.

Thanks.


juzhe.zh...@rivai.ai
 
From: juzhe.zhong
Date: 2023-06-20 21:42
To: gcc-patches
CC: richard.sandiford; rguenther; Ju-Zhe Zhong
Subject: [PATCH V2] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer
From: Ju-Zhe Zhong 
 
gcc/ChangeLog:
 
* internal-fn.cc (expand_partial_store_optab_fn): Add 
LEN_MASK_{LOAD,STORE} vectorizer support.
(internal_load_fn_p): Ditto.
(internal_store_fn_p): Ditto.
(internal_fn_mask_index): Ditto.
(internal_fn_stored_value_index): Ditto.
(internal_len_load_store_bias): Ditto.
* optabs-query.cc (can_vec_mask_load_store_p): Ditto.
(get_len_load_store_mode): Ditto.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
(get_all_ones_mask): New function.
(vectorizable_store): Add LEN_MASK_{LOAD,STORE} vectorizer support.
(vectorizable_load): Ditto.
 
---
gcc/internal-fn.cc |  35 +-
gcc/optabs-query.cc|  25 -
gcc/tree-vect-stmts.cc | 234 ++---
3 files changed, 227 insertions(+), 67 deletions(-)
 
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index c911ae790cb..e10c21de5f1 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -2949,7 +2949,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
  * OPTAB.  */
static void
-expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
optab)
{
   class expand_operand ops[5];
   tree type, lhs, rhs, maskt, biast;
@@ -2957,7 +2957,7 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
   insn_code icode;
   maskt = gimple_call_arg (stmt, 2);
-  rhs = gimple_call_arg (stmt, 3);
+  rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
   type = TREE_TYPE (rhs);
   lhs = expand_call_mem_ref (type, stmt, 0);
@@ -4435,6 +4435,7 @@ internal_load_fn_p (internal_fn fn)
 case IFN_GATHER_LOAD:
 case IFN_MASK_GATHER_LOAD:
 case IFN_LEN_LOAD:
+case IFN_LEN_MASK_LOAD:
   return true;
 default:
@@ -4455,6 +4456,7 @@ internal_store_fn_p (internal_fn fn)
 case IFN_SCATTER_STORE:
 case IFN_MASK_SCATTER_STORE:
 case IFN_LEN_STORE:
+case IFN_LEN_MASK_STORE:
   return true;
 default:
@@ -4494,6 +4496,10 @@ internal_fn_mask_index (internal_fn fn)
 case IFN_MASK_STORE_LANES:
   return 2;
+case IFN_LEN_MASK_LOAD:
+case IFN_LEN_MASK_STORE:
+  return 3;
+
 case IFN_MASK_GATHER_LOAD:
 case IFN_MASK_SCATTER_STORE:
   return 4;
@@ -4519,6 +4525,9 @@ internal_fn_stored_value_index (internal_fn fn)
 case IFN_LEN_STORE:
   return 3;
+case IFN_LEN_MASK_STORE:
+  return 4;
+
 default:
   return -1;
 }
@@ -4583,13 +4592,31 @@ internal_len_load_store_bias (internal_fn ifn, 
machine_mode mode)
{
   optab optab = direct_internal_fn_optab (ifn);
   insn_code icode = direct_optab_handler (optab, mode);
+  int bias_argno = 3;
+  if (icode == CODE_FOR_nothing)
+{
+  machine_mode mask_mode
+ = targetm.vectorize.get_mask_mode (mode).require ();
+  if (ifn == IFN_LEN_LOAD)
+ {
+   /* Try LEN_MASK_LOAD.  */
+   optab = direct_internal_fn_optab (IFN_LEN_MASK_LOAD);
+ }
+  else
+ {
+   /* Try LEN_MASK_STORE.  */
+   optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE);
+ }
+  icode = convert_optab_handler (optab, mode, mask_mode);
+  bias_argno = 4;
+}
   if (icode != CODE_FOR_nothing)
 {
   /* For now we only support biases of 0 or -1.  Try both of them.  */
-  if (insn_operand_matches (icode, 3, GEN_INT (0)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (0)))
return 0;
-  if (insn_operand_matches (icode, 3, GEN_INT (-1)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (-1)))
return -1;
 }
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 276f8408dd7..4394d391200 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -566,11 +566,14 @@ can_vec_mask_load_store_p (machine_mode mode,
   bool is_load)
{
   optab op = is_load ? maskload_optab : maskstore_optab;
+  optab len_op = is_load ? len_maskload_optab : len_maskstore_optab;
   machine_mode vmode;
   /* If mode is vector mode, check it directly.  */
   if (VECTOR_MODE_P (mode))
-return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
+return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing
+|| convert_optab_handler (len_op, mode, mask_mode)
+ != CODE_FOR_nothing;
   /* Otherwise, return true if there is some vector mode with
  the mask load/store supported.  */
@@ -584,7 +587,9 @@ can_vec_mask_load_store_p (machine_mode mode,
   vmode = targetm.vectorize.preferred_simd_mode (smode);
   

Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Lehua Ding
> -/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
> +/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" 
} */

By the way, shouldn't these cases have the `-mabi=lp64d` option added,
otherwise I get the following failure message when I run tests on RV32 GCC.


  FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c -std=c99 -O3 
-ftree-vectorize --param riscv-autovec-preference=fixed-vlmax (test for excess 
errors)
  Excess errors.
  cc1: error: ABI requires '-march=rv32'



Best,
Lehua


-- Original --
From:  "Robin Dapp"

[PATCH V2] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer

2023-06-20 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/ChangeLog:

* internal-fn.cc (expand_partial_store_optab_fn): Add 
LEN_MASK_{LOAD,STORE} vectorizer support.
(internal_load_fn_p): Ditto.
(internal_store_fn_p): Ditto.
(internal_fn_mask_index): Ditto.
(internal_fn_stored_value_index): Ditto.
(internal_len_load_store_bias): Ditto.
* optabs-query.cc (can_vec_mask_load_store_p): Ditto.
(get_len_load_store_mode): Ditto.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
(get_all_ones_mask): New function.
(vectorizable_store): Add LEN_MASK_{LOAD,STORE} vectorizer support.
(vectorizable_load): Ditto.

---
 gcc/internal-fn.cc |  35 +-
 gcc/optabs-query.cc|  25 -
 gcc/tree-vect-stmts.cc | 234 ++---
 3 files changed, 227 insertions(+), 67 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index c911ae790cb..e10c21de5f1 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -2949,7 +2949,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
  * OPTAB.  */
 
 static void
-expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
optab)
 {
   class expand_operand ops[5];
   tree type, lhs, rhs, maskt, biast;
@@ -2957,7 +2957,7 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
   insn_code icode;
 
   maskt = gimple_call_arg (stmt, 2);
-  rhs = gimple_call_arg (stmt, 3);
+  rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
   type = TREE_TYPE (rhs);
   lhs = expand_call_mem_ref (type, stmt, 0);
 
@@ -4435,6 +4435,7 @@ internal_load_fn_p (internal_fn fn)
 case IFN_GATHER_LOAD:
 case IFN_MASK_GATHER_LOAD:
 case IFN_LEN_LOAD:
+case IFN_LEN_MASK_LOAD:
   return true;
 
 default:
@@ -4455,6 +4456,7 @@ internal_store_fn_p (internal_fn fn)
 case IFN_SCATTER_STORE:
 case IFN_MASK_SCATTER_STORE:
 case IFN_LEN_STORE:
+case IFN_LEN_MASK_STORE:
   return true;
 
 default:
@@ -4494,6 +4496,10 @@ internal_fn_mask_index (internal_fn fn)
 case IFN_MASK_STORE_LANES:
   return 2;
 
+case IFN_LEN_MASK_LOAD:
+case IFN_LEN_MASK_STORE:
+  return 3;
+
 case IFN_MASK_GATHER_LOAD:
 case IFN_MASK_SCATTER_STORE:
   return 4;
@@ -4519,6 +4525,9 @@ internal_fn_stored_value_index (internal_fn fn)
 case IFN_LEN_STORE:
   return 3;
 
+case IFN_LEN_MASK_STORE:
+  return 4;
+
 default:
   return -1;
 }
@@ -4583,13 +4592,31 @@ internal_len_load_store_bias (internal_fn ifn, 
machine_mode mode)
 {
   optab optab = direct_internal_fn_optab (ifn);
   insn_code icode = direct_optab_handler (optab, mode);
+  int bias_argno = 3;
+  if (icode == CODE_FOR_nothing)
+{
+  machine_mode mask_mode
+   = targetm.vectorize.get_mask_mode (mode).require ();
+  if (ifn == IFN_LEN_LOAD)
+   {
+ /* Try LEN_MASK_LOAD.  */
+ optab = direct_internal_fn_optab (IFN_LEN_MASK_LOAD);
+   }
+  else
+   {
+ /* Try LEN_MASK_STORE.  */
+ optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE);
+   }
+  icode = convert_optab_handler (optab, mode, mask_mode);
+  bias_argno = 4;
+}
 
   if (icode != CODE_FOR_nothing)
 {
   /* For now we only support biases of 0 or -1.  Try both of them.  */
-  if (insn_operand_matches (icode, 3, GEN_INT (0)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (0)))
return 0;
-  if (insn_operand_matches (icode, 3, GEN_INT (-1)))
+  if (insn_operand_matches (icode, bias_argno, GEN_INT (-1)))
return -1;
 }
 
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 276f8408dd7..4394d391200 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -566,11 +566,14 @@ can_vec_mask_load_store_p (machine_mode mode,
   bool is_load)
 {
   optab op = is_load ? maskload_optab : maskstore_optab;
+  optab len_op = is_load ? len_maskload_optab : len_maskstore_optab;
   machine_mode vmode;
 
   /* If mode is vector mode, check it directly.  */
   if (VECTOR_MODE_P (mode))
-return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
+return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing
+  || convert_optab_handler (len_op, mode, mask_mode)
+   != CODE_FOR_nothing;
 
   /* Otherwise, return true if there is some vector mode with
  the mask load/store supported.  */
@@ -584,7 +587,9 @@ can_vec_mask_load_store_p (machine_mode mode,
   vmode = targetm.vectorize.preferred_simd_mode (smode);
   if (VECTOR_MODE_P (vmode)
   && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
-  && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
+  && (convert_optab_handler (op, vmode, mask_mode) != CODE_

[PATCH] libstdc++: Use RAII in std::vector::_M_realloc_insert

2023-06-20 Thread Jonathan Wakely via Gcc-patches
I intend to push this to trunk once testing finishes.

I generated the diff with -b so the whitespace changes aren't shown,
because there was some re-indenting that makes the diff look larger than
it really is.

Honza, I don't think this is likely to make much difference for the PR
110287 testcases, but I think it simplifies the code and so is an
improvement in terms of maintenance and readability.

-- >8 --

Replace the try-block with RAII types for deallocating storage and
destroying elements.

libstdc++-v3/ChangeLog:

* include/bits/vector.tcc (_M_realloc_insert): Replace try-block
with RAII types.
---
 libstdc++-v3/include/bits/vector.tcc | 142 +--
 1 file changed, 89 insertions(+), 53 deletions(-)

diff --git a/libstdc++-v3/include/bits/vector.tcc 
b/libstdc++-v3/include/bits/vector.tcc
index acd11e2dc68..cda52fbbc4a 100644
--- a/libstdc++-v3/include/bits/vector.tcc
+++ b/libstdc++-v3/include/bits/vector.tcc
@@ -458,73 +458,109 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 _M_realloc_insert(iterator __position, const _Tp& __x)
 #endif
 {
-  const size_type __len =
-   _M_check_len(size_type(1), "vector::_M_realloc_insert");
+  const size_type __len = _M_check_len(1u, "vector::_M_realloc_insert");
   pointer __old_start = this->_M_impl._M_start;
   pointer __old_finish = this->_M_impl._M_finish;
   const size_type __elems_before = __position - begin();
   pointer __new_start(this->_M_allocate(__len));
   pointer __new_finish(__new_start);
-  __try
+
+  // RAII guard for allocated storage.
+  struct _Guard
+  {
+   pointer _M_storage; // Storage to deallocate
+   size_type _M_len;
+   _Tp_alloc_type& _M_alloc;
+
+   _GLIBCXX20_CONSTEXPR
+   _Guard(pointer __s, size_type __l, _Tp_alloc_type& __a)
+   : _M_storage(__s), _M_len(__l), _M_alloc(__a)
+   { }
+
+   _GLIBCXX20_CONSTEXPR
+   ~_Guard()
{
- // The order of the three operations is dictated by the C++11
- // case, where the moves could alter a new element belonging
- // to the existing vector.  This is an issue only for callers
- // taking the element by lvalue ref (see last bullet of C++11
- // [res.on.arguments]).
- _Alloc_traits::construct(this->_M_impl,
-  __new_start + __elems_before,
+ if (_M_storage)
+   __gnu_cxx::__alloc_traits<_Tp_alloc_type>::
+ deallocate(_M_alloc, _M_storage, _M_len);
+   }
+
+  private:
+   _Guard(const _Guard&);
+  };
+  _Guard __guard(__new_start, __len, _M_impl);
+
+  // The order of the three operations is dictated by the C++11
+  // case, where the moves could alter a new element belonging
+  // to the existing vector.  This is an issue only for callers
+  // taking the element by lvalue ref (see last bullet of C++11
+  // [res.on.arguments]).
+
+  // If this throws, the existing elements are unchanged.
 #if __cplusplus >= 201103L
-  std::forward<_Args>(__args)...);
+  _Alloc_traits::construct(this->_M_impl,
+  std::__to_address(__new_start + __elems_before),
+  std::forward<_Args>(__args)...);
 #else
-  __x);
+  _Alloc_traits::construct(this->_M_impl,
+  __new_start + __elems_before,
+  __x);
 #endif
- __new_finish = pointer();
 
 #if __cplusplus >= 201103L
- if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
-   {
- __new_finish = _S_relocate(__old_start, __position.base(),
-__new_start, _M_get_Tp_allocator());
-
- ++__new_finish;
-
- __new_finish = _S_relocate(__position.base(), __old_finish,
-__new_finish, _M_get_Tp_allocator());
-   }
- else
-#endif
-   {
- __new_finish
-   = std::__uninitialized_move_if_noexcept_a
-   (__old_start, __position.base(),
-__new_start, _M_get_Tp_allocator());
-
- ++__new_finish;
-
- __new_finish
-   = std::__uninitialized_move_if_noexcept_a
-   (__position.base(), __old_finish,
-__new_finish, _M_get_Tp_allocator());
-   }
-   }
-  __catch(...)
+  if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
{
- if (!__new_finish)
-   _Alloc_traits::destroy(this->_M_impl,
-  __new_start + __elems_before);
- else
-   std::_Destroy(__new_start, __new_finish, _M_get_Tp_allocator());
- _M_deallocate(__new_start, __len);
- __throw_exception_again;
+ // Relocation cannot throw.
+ __new_finish = _S_relocate(__old_start, __position.base(),
+

Re: [PATCH] Improve DSE to handle stores before __builtin_unreachable ()

2023-06-20 Thread Jeff Law via Gcc-patches




On 6/20/23 00:59, Richard Biener via Gcc-patches wrote:

DSE isn't good at identifying program points that end lifetime
of variables that are not associated with virtual operands.  But
at least for those that end basic-blocks we can handle the simple
case where this ending is in the same basic-block as the definition
we want to elide.  That should catch quite some common cases already.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

As you can see from the testcase I had to adjust this possibly can
lead to more severe issues when one forgets a return (the C++ frontend
places builtin_unreachable () there).  I'm still planning to push
this improvement unless I hear objections.

Thanks,
Richard.

* tree-ssa-dse.cc (dse_classify_store): When we found
no defs and the basic-block with the original definition
ends in __builtin_unreachable[_trap] the store is dead.

* gcc.dg/tree-ssa/ssa-dse-47.c: New testcase.
* c-c++-common/asan/pr106558.c: Avoid undefined behavior
due to missing return.
I thought during the introduction of erroneous path isolation that we 
concluded stores, calls and such had observable side effects that must 
be preserved, even when we hit a block that leads to __builtin_unreachable.


Don't get me wrong, I'm all for removing the memory references if it's 
safe to do so.


Jeff


Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Lehua Ding
> Lehua fills out that form.  List me as the approver and the process 
will
> run from there.  Takes a day or two for everything to get into place.


I just followed this step to submit the form, thanks to Robin, Jeff and Juzhe.


Best,
Lehua

Re: [PATCH 1/2] c++: implement __remove_pointer built-in trait

2023-06-20 Thread Ken Matsui via Gcc-patches
Just a quick update, the benchmark code link has been updated and can
now be accessed at
https://github.com/ken-matsui/gcc-benches/blob/main/remove_pointer.cc.
I have also created a report file which can be found at
https://github.com/ken-matsui/gcc-benches/blob/main/remove_pointer.md.

On Sat, Jun 17, 2023 at 5:35 AM Ken Matsui  wrote:
>
> Hi,
>
> I conducted a benchmark for remove_pointer as well as is_object. Just
> like the is_object benchmark, here is the benchmark code:
>
> https://github.com/ken-matsui/gcc-benches/blob/main/remove_pointer_benchmark.cc
>
> On my computer, using the gcc HEAD of this patch for a release build,
> the patch with -DUSE_BUILTIN took 8.7% less time and used 4.3-4.9%
> less memory on average compared to not using it. Although the
> performance improvement was not as significant as with is_object, the
> benchmark demonstrated that the compilation was consistently more
> efficient.
>
> Sincerely,
> Ken Matsui
>
> On Thu, Jun 15, 2023 at 5:22 AM Ken Matsui  wrote:
> >
> > This patch implements built-in trait for std::remove_pointer.
> >
> > gcc/cp/ChangeLog:
> >
> > * cp-trait.def: Define __remove_pointer.
> > * semantics.cc (finish_trait_type): Handle CPTK_REMOVE_POINTER.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * g++.dg/ext/has-builtin-1.C: Test existence of __remove_pointer.
> > * g++.dg/ext/remove_pointer.C: New test.
> >
> > Signed-off-by: Ken Matsui 
> > ---
> >  gcc/cp/cp-trait.def   |  1 +
> >  gcc/cp/semantics.cc   |  4 ++
> >  gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  3 ++
> >  gcc/testsuite/g++.dg/ext/remove_pointer.C | 51 +++
> >  4 files changed, 59 insertions(+)
> >  create mode 100644 gcc/testsuite/g++.dg/ext/remove_pointer.C
> >
> > diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> > index 8b7fece0cc8..07823e55579 100644
> > --- a/gcc/cp/cp-trait.def
> > +++ b/gcc/cp/cp-trait.def
> > @@ -90,6 +90,7 @@ DEFTRAIT_EXPR (IS_DEDUCIBLE, "__is_deducible ", 2)
> >  DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
> >  DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
> >  DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
> > +DEFTRAIT_TYPE (REMOVE_POINTER, "__remove_pointer", 1)
> >  DEFTRAIT_TYPE (UNDERLYING_TYPE,  "__underlying_type", 1)
> >  DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1)
> >
> > diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> > index 8fb47fd179e..885c7a6fb64 100644
> > --- a/gcc/cp/semantics.cc
> > +++ b/gcc/cp/semantics.cc
> > @@ -12373,6 +12373,10 @@ finish_trait_type (cp_trait_kind kind, tree type1, 
> > tree type2,
> >if (TYPE_REF_P (type1))
> > type1 = TREE_TYPE (type1);
> >return cv_unqualified (type1);
> > +case CPTK_REMOVE_POINTER:
> > +  if (TYPE_PTR_P (type1))
> > +type1 = TREE_TYPE (type1);
> > +  return type1;
> >
> >  case CPTK_TYPE_PACK_ELEMENT:
> >return finish_type_pack_element (type1, type2, complain);
> > diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
> > b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> > index f343e153e56..e21e0a95509 100644
> > --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> > +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> > @@ -146,3 +146,6 @@
> >  #if !__has_builtin (__remove_cvref)
> >  # error "__has_builtin (__remove_cvref) failed"
> >  #endif
> > +#if !__has_builtin (__remove_pointer)
> > +# error "__has_builtin (__remove_pointer) failed"
> > +#endif
> > diff --git a/gcc/testsuite/g++.dg/ext/remove_pointer.C 
> > b/gcc/testsuite/g++.dg/ext/remove_pointer.C
> > new file mode 100644
> > index 000..7b13db93950
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/ext/remove_pointer.C
> > @@ -0,0 +1,51 @@
> > +// { dg-do compile { target c++11 } }
> > +
> > +#define SA(X) static_assert((X),#X)
> > +
> > +SA(__is_same(__remove_pointer(int), int));
> > +SA(__is_same(__remove_pointer(int*), int));
> > +SA(__is_same(__remove_pointer(int**), int*));
> > +
> > +SA(__is_same(__remove_pointer(const int*), const int));
> > +SA(__is_same(__remove_pointer(const int**), const int*));
> > +SA(__is_same(__remove_pointer(int* const), int));
> > +SA(__is_same(__remove_pointer(int** const), int*));
> > +SA(__is_same(__remove_pointer(int* const* const), int* const));
> > +
> > +SA(__is_same(__remove_pointer(volatile int*), volatile int));
> > +SA(__is_same(__remove_pointer(volatile int**), volatile int*));
> > +SA(__is_same(__remove_pointer(int* volatile), int));
> > +SA(__is_same(__remove_pointer(int** volatile), int*));
> > +SA(__is_same(__remove_pointer(int* volatile* volatile), int* volatile));
> > +
> > +SA(__is_same(__remove_pointer(const volatile int*), const volatile int));
> > +SA(__is_same(__remove_pointer(const volatile int**), const volatile int*));
> > +SA(__is_same(__remove_pointer(const int* volatile), const int));
> > +SA(__is_same(__remove_pointer(volatile int* const), volatile int));
> > +SA(__is_same(__remove_pointer(int* con

Re: [PATCH v7 0/6] c++, libstdc++: get std::is_object to dispatch to new built-in traits

2023-06-20 Thread Ken Matsui via Gcc-patches
Just a quick update, the benchmark code link has been updated and can
now be accessed at
https://github.com/ken-matsui/gcc-benches/blob/main/is_object.cc. I
have also created a report file which can be found at
https://github.com/ken-matsui/gcc-benches/blob/main/is_object.md.

On Thu, Jun 15, 2023 at 3:49 AM Ken Matsui  wrote:
>
> Hi,
>
> For those curious about the performance improvements of this patch, I
> conducted a benchmark that instantiates 256k specializations of
> is_object_v based on Patrick's code. You can find the benchmark code
> at this link:
>
> https://github.com/ken-matsui/gcc-benches/blob/main/is_object_benchmark.cc
>
> On my computer, using the gcc HEAD of this patch for a release build,
> the patch with -DUSE_BUILTIN took 64% less time and used 44-47% less
> memory compared to not using it.
>
> Sincerely,
> Ken Matsui
>
> On Mon, Jun 12, 2023 at 3:49 PM Ken Matsui  wrote:
> >
> > Hi,
> >
> > This patch series gets std::is_object to dispatch to built-in traits and
> > implements the following built-in traits, on which std::object depends.
> >
> > * __is_reference
> > * __is_function
> > * __is_void
> >
> > std::is_object was depending on them with disjunction and negation.
> >
> > __not_<__or_, is_reference<_Tp>, is_void<_Tp>>>::type
> >
> > Therefore, this patch uses them directly instead of implementing an 
> > additional
> > built-in trait __is_object, which makes the compiler slightly bigger and
> > slower.
> >
> > __bool_constant > __is_void(_Tp))>
> >
> > This would instantiate only __bool_constant and 
> > __bool_constant,
> > which can be mostly shared. That is, the purpose of built-in traits is
> > considered as achieved.
> >
> > Changes in v7
> >
> > * Removed an unnecessary new line.
> >
> > Ken Matsui (6):
> >   c++: implement __is_reference built-in trait
> >   libstdc++: use new built-in trait __is_reference for std::is_reference
> >   c++: implement __is_function built-in trait
> >   libstdc++: use new built-in trait __is_function for std::is_function
> >   c++, libstdc++: implement __is_void built-in trait
> >   libstdc++: make std::is_object dispatch to new built-in traits
> >
> >  gcc/cp/constraint.cc  |  9 +++
> >  gcc/cp/cp-trait.def   |  3 +
> >  gcc/cp/semantics.cc   | 12 
> >  gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  9 +++
> >  gcc/testsuite/g++.dg/ext/is_function.C| 58 +++
> >  gcc/testsuite/g++.dg/ext/is_reference.C   | 34 +++
> >  gcc/testsuite/g++.dg/ext/is_void.C| 35 +++
> >  gcc/testsuite/g++.dg/tm/pr46567.C |  6 +-
> >  libstdc++-v3/include/bits/cpp_type_traits.h   | 15 -
> >  libstdc++-v3/include/debug/helper_functions.h |  5 +-
> >  libstdc++-v3/include/std/type_traits  | 51 
> >  11 files changed, 216 insertions(+), 21 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C
> >  create mode 100644 gcc/testsuite/g++.dg/ext/is_reference.C
> >  create mode 100644 gcc/testsuite/g++.dg/ext/is_void.C
> >
> > --
> > 2.41.0
> >


Re: [PATCH V3] RISC-V: Optimize codegen of VLA SLP

2023-06-20 Thread Jeff Law via Gcc-patches




On 6/20/23 03:01, Robin Dapp wrote:

LGTM.

Likewise -- that V2/V3 is a nice improvement over the original V1 approach.

jeff


Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Lehua Ding
> Actually they are already in for a bit :)
> 51795b910737 (Robin Dapp 2023-06-01 14:18:57 +0200  1) /* { dg-do 
compile } */
>I thought something is special about them that they somehow didn't run
> on your machine or so.


The time I just said is your commit time from this link
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=51795b91073798c718df6fafb01303861641a5af.


authorRobin Dapp 

Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Jeff Law via Gcc-patches




On 6/20/23 04:56, Robin Dapp wrote:

Could you merge it ?
By the way, could Lehua get the write access?


IMHO nothing stands in the way but I'll defer to Jeff to have
the "official seal" :)
Once he ACKs Lehua needs to go the usual way of requesting
sourceware access via https://sourceware.org/cgi-bin/pdw/ps_form.cgi.
Lehua fills out that form.  List me as the approver and the process will 
run from there.  Takes a day or two for everything to get into place.


jeff

ps.  If Lehua has already filled out the form with Robin as the 
approver, that's fine too.  Might take a bit longer as I suspect the IT 
folks may not recognize Robin.


Re: [PATCH] RISC-V: Fix compiler warning of riscv_arg_has_vector

2023-06-20 Thread Robin Dapp via Gcc-patches
> Could you merge it ?

Committed.

Regards
 Robin


Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Robin Dapp via Gcc-patches
> Oh, I should know why. These cases of yours were added yesterday,
> while I submitted the patch the day before, and then yesterday by Pan
> to help me merge in after your cases. Sorry for introducing this issue,
> I'll submit a new fix patch.

Actually they are already in for a bit :)
51795b910737 (Robin Dapp 2023-06-01 14:18:57 +0200  1) /* { dg-do compile } */

I thought something is special about them that they somehow didn't run
on your machine or so.

But no need for a new patch, thanks.  I already have it and will commit
it soon.

Regards
 Robin


Re: [COMMITTED] ada: Add CHERI intrinsic bindings and helper functions.

2023-06-20 Thread Alex Coplan via Gcc-patches
Hi,

On 20/06/2023 09:47, Marc Poulhiès via Gcc-patches wrote:
> From: Daniel King 
> 
> The package Interfaces.CHERI provides intrinsic bindings and
> helper functions to allow software to query, create, and
> manipulate CHERI capabilities.

I'm curious what the motivation for these intrinsic wrappers is, given that
GCC trunk doesn't currently support them. Out of interest, can you share what
the use case for these is?

Thanks,
Alex

> 
> gcc/ada/
> 
>   * libgnat/i-cheri.ads: Add CHERI intrinsics and helper functions.
>   * libgnat/i-cheri.adb: Likewise
> 
> Tested on x86_64-pc-linux-gnu, committed on master.
> 
> ---
>  gcc/ada/libgnat/i-cheri.adb |  75 ++
>  gcc/ada/libgnat/i-cheri.ads | 470 
>  2 files changed, 545 insertions(+)
>  create mode 100644 gcc/ada/libgnat/i-cheri.adb
>  create mode 100644 gcc/ada/libgnat/i-cheri.ads


Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Lehua Ding
> Lehua, would they not show up in your test runs?  You fixed several
> other tests but these somehow not?


Oh, I should know why. These cases of yours were added yesterday,
while I submitted the patch the day before, and then yesterday by Pan
to help me merge in after your cases. Sorry for introducing this issue,
I'll submit a new fix patch.


Best,
Lehua

[PATCH] RISC-V: Implement autovec copysign.

2023-06-20 Thread Robin Dapp via Gcc-patches
Hi,

this adds vector copysign, ncopysign and xorsign as well as the
accompanying tests.

In order to easily match the ncopysign patterns I changed the
builtin implementation slightly.  Juzhe might want to comment
on that.  For now I kept the  attribute's name even though
it doesn't emit an "n" anymore.

Regards
 Robin


gcc/ChangeLog:

* config/riscv/autovec.md (copysign3): Add expander.
(xorsign3): Dito.
* config/riscv/riscv-vector-builtins-bases.cc (class vfsgnjn):
New class.
* config/riscv/vector-iterators.md (copysign): Remove ncopysign.
(xorsign): Dito.
(n): Dito.
(x): Dito.
* config/riscv/vector.md (@pred_ncopysign): Split off.
(@pred_ncopysign_scalar): Dito.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/copysign-run.c: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-rv64gcv.c: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-rv32gcv.c: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-template.h: New test.
* gcc.target/riscv/rvv/autovec/binop/copysign-zvfh-run.c: New test.
---
 gcc/config/riscv/autovec.md   | 43 +
 .../riscv/riscv-vector-builtins-bases.cc  | 18 +++-
 gcc/config/riscv/vector-iterators.md  |  9 +-
 gcc/config/riscv/vector.md| 43 +
 .../riscv/rvv/autovec/binop/copysign-run.c| 89 +++
 .../rvv/autovec/binop/copysign-rv32gcv.c  |  8 ++
 .../rvv/autovec/binop/copysign-rv64gcv.c  |  8 ++
 .../rvv/autovec/binop/copysign-template.h | 78 
 .../rvv/autovec/binop/copysign-zvfh-run.c | 83 +
 9 files changed, 371 insertions(+), 8 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-run.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-rv32gcv.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-rv64gcv.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-template.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/copysign-zvfh-run.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f1641d7e1ea..f2e69aaf102 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -804,3 +804,46 @@ (define_expand "3"
 riscv_vector::RVV_BINOP, operands);
   DONE;
 })
+
+;; 
---
+;;  [FP] Sign copying
+;; 
---
+;; Includes:
+;; - vfsgnj.vv/vfsgnjn.vv
+;; - vfsgnj.vf/vfsgnjn.vf
+;; 
---
+
+;; Leave the pattern like this as to still allow combine to match
+;; a negated copysign (see vector.md) before adding the UNSPEC_VPREDICATE 
later.
+(define_insn_and_split "copysign3"
+  [(set (match_operand:VF 0 "register_operand"  "=vd, vd, vr, vr")
+(unspec:VF
+ [(match_operand:VF 1 "register_operand"" vr, vr, vr, vr")
+ (match_operand:VF 2 "register_operand" " vr, vr, vr, vr")] 
UNSPEC_VCOPYSIGN))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  riscv_vector::emit_vlmax_insn (code_for_pred (UNSPEC_VCOPYSIGN, mode),
+riscv_vector::RVV_BINOP, operands);
+  DONE;
+}
+  [(set_attr "type" "vfsgnj")
+   (set_attr "mode" "")])
+
+;; 
---
+;; Includes:
+;; - vfsgnjx.vv
+;; - vfsgnjx.vf
+;; 
---
+(define_expand "xorsign3"
+  [(match_operand:VF_AUTO 0 "register_operand")
+(match_operand:VF_AUTO 1 "register_operand")
+(match_operand:VF_AUTO 2 "register_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_vlmax_insn (code_for_pred (UNSPEC_VXORSIGN, mode),
+riscv_vector::RVV_BINOP, operands);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index c6c53dc13a5..0313986f6b9 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1212,7 +1212,7 @@ public:
   }
 };
 
-/* Implements vfsqrt7/vfrec7/vfclass/vfsgnj/vfsgnjn/vfsgnjx.  */
+/* Implements vfsqrt7/vfrec7/vfclass/vfsgnj/vfsgnjx.  */
 template
 class float_misc : public function_base
 {
@@ -1227,6 +1227,20 @@ public:
   }
 };
 
+/* Implements vfsgnjn.  */
+class vfsgnjn : public function_base
+{
+public:
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_exact_insn (code_for_pred_ncopysign_scalar (e.vector_mode 
()));
+if (e.op_info->op == OP_TYPE_vv)
+  r

Re: [PATCH] RISC-V: Fix vmul test expectation.

2023-06-20 Thread Robin Dapp via Gcc-patches
I just noticed there is also a -ffast-math missing in vadd-run.c
as well as one redundant in vrem-rv32gcv.c and added it to the
patch.

Going to commit the attached as obvious.

Regards
 Robin

Subject: [PATCH] RISC-V: testsuite: Fix vmul test expectation and fix 
-ffast-math.

I forgot to check for vfmul in the multiplication tests as well as
some -ffast-math arguments.  Fix this.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vadd-run.c: Add
-ffast-math.
* gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c: Remove
-ffast-math
* gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c: Check for
vfmul.
* gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c: Dito.
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c | 2 +-
 5 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c
index 5db0a3c79be..12fb952118e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-run.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
--param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
--param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
 
 #include "vadd-template.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c
index 1a11fe0fb30..30b467f50c7 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vadd-zvfh-run.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
--param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
--param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
 
 #include "vadd-template.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
index 1900c21121b..7d3dfade0ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c
@@ -4,3 +4,4 @@
 #include "vmul-template.h"
 
 /* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
index c8508bcc1f7..a549d6f7be4 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c
@@ -4,3 +4,4 @@
 #include "vmul-template.h"
 
 /* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c
index c6fe79e37b8..86607d03777 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c
@@ -1,4 +1,4 @@
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv 
-mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv 
-mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
 
 #include "vrem-template.h"
 
-- 
2.40.1



Re: [PATCH][RFC] c-family: Implement __has_feature and __has_extension [PR60512]

2023-06-20 Thread Alex Coplan via Gcc-patches
Hi Iain,

On 14/05/2023 17:05, Iain Sandoe wrote:
> Hi Alex,
> 
> thanks for working on this.
> 
> I’ve applied this patch and evaluated on a few Darwin versions (which is the
> target currently most affected, I believe):
> 
> > On 9 May 2023, at 13:07, Alex Coplan  wrote:
> 
> > This patch implements clang's __has_feature and __has_extension in GCC.
> 
> Thanks, this blocks consuming Darwin SDK headers “properly” (PR 90709 as
> linked to  60512) (which is why I had a WIP patch too).
> 
> So I am very keen to see this land in GCC-14, but have some  issues to deal 
> with and would be looking for ideas about how to handle them by extending or
> amending the patch.
> 
> The main concern I have at the moment is that it seems to me that we need
> more flexible and general predicates for declaring feature/ext support:
> 
>   a) on target (see below for examples)
>   b) on potentially multiple flags and language version at the same time (see 
> below)
>   c) what about features that exist for a closed range of language versions?
> 
> As mentioned by Jakub in a conversation about this on irc (months ago!) the
> current identifiers potentially clash with use symbols.
> 
> IFF we add feature designations (which IMO we should, since this approach does
> help simplify testcases and configurations) we should add them into the
> implementation namespace:
> 
> e.g. ( for C) 
> _GNU_nested_functions or __nested_functions
> 
> > Currently the patch aims to implement all documented features (and some
> > undocumented ones) following the documentation at
> > https://clang.llvm.org/docs/LanguageExtensions.html
> 
> TL;DR 
> without guards or target-specific opt out this breaks bootstrap on Darwin.

Thanks for trying out the patch and pointing this out, this blocker has
now been addressed by relaxing the C++ parser as per
g:b106f11dc6adb8df15cc5c268896d314c76ca35f.

The patch can now survive bootstrap on Darwin (it looks like we'll need
to adjust some Objective-C++ tests in light of the new pedwarn, but that
looks to be straightforward).



> (one reason to allow target opt-in/out of specific features)
> 
> > with the following omissions:
> 
> > - Objective-C-specific features.
> 
> I can clearly append the objective-c(++) cases to the end of the respective
> lists, but then we need to make them conditional on language, version and
> dialect (some will not be appropriate to GNU runtime).
> 
> this is why I think we need more flexible predicates on declaring features
> and extensions.

Would it help mitigate these concerns if I implemented some Objective-C
features as part of this patch (say, those implemented by your WIP
patch)?

My feeling is that the vast majority of extensions / features have
similar logic, so we should exploit that redundancy to keep things terse
in the encoding for the general case. Where we need more flexible
predicates (e.g. for objc_nonfragile_abi in your WIP patch), those can
be handled on a case-by-case basis by adding a new enumerator and logic
to handle that specially.

What do you think, does that sound OK to you?

> 
> 
> 
> index 2b4c82facf7..5b8429244b2 100644
> --- a/gcc/c-family/c-common.cc
> +++ b/gcc/c-family/c-common.cc
> 
> +struct hf_feature_info
> 
> +  { "enumerator_attributes",   0, 0 },
> +  { "tls", 0, 0 },
> 
> Do all GCC targets support tls?

This is a good point. In clang, the features tls, c_thread_local, and
cxx_thread_local are all gated on whether the target supports TLS.
But in clang, it is a hard error to use TLS variables on a target which
doesn't support TLS. So it seems the features are used to check whether
code can make use of TLS constructs.

In GCC, AFAICT, TLS variables never get rejected, since GCC just uses
emulated TLS in the case that the target doesn't support TLS for real.

This then begs the question of how these features should be interpreted.
For c{,xx}_thread_local I'd expect that we want them to return true
whenever the language-level constructs are useable (even if we end up
using emutls).

I think it's defensible to take the position that GCC "always supports
TLS" since (AFAIK) you can make use of thread-local variables regardless
of whether the target really supports TLS (since you just get emutls if
it doesn't). So it's not clear which interpretation we should use for
the "tls" feature.

> 
> What about things like this:
> 
>  attribute_availability_tvos, 
>  attribute_availability_watchos, 
>  attribute_availability_driverkit, 

FWIW, clang looks to define these unconditionally, so restricting these
to a given target would be deviating from its precedent.

However, I don't think it would be hard to extend the implementation in
this patch to support target-specific features if required. I think
perhaps a langhook that targets can call to add their own features would
be a reasonable approach.

> ?
> 
> Even if they are implemented centrally, it is unlikely that all targets would 
> want
> to claim support (although note that the availabili

Re: [PATCH] VECT: Apply LEN_MASK_{LOAD,STORE} into vectorizer

2023-06-20 Thread Richard Biener via Gcc-patches
On Tue, 20 Jun 2023, juzhe.zh...@rivai.ai wrote:

> From: Ju-Zhe Zhong 
> 
> This patch is apply LEN_MASK_{LOAD,STORE} into vectorizer.
> I refactor gimple IR build to make codes look cleaner.
> 
> gcc/ChangeLog:
> 
> * internal-fn.cc (expand_partial_store_optab_fn): Add 
> LEN_MASK_{LOAD,STORE} vectorizer support.
> (internal_load_fn_p): Ditto.
> (internal_store_fn_p): Ditto.
> (internal_fn_mask_index): Ditto.
> (internal_fn_stored_value_index): Ditto.
> (internal_len_load_store_bias): Ditto.
> * optabs-query.cc (can_vec_mask_load_store_p): Ditto.
> (get_len_load_store_mode): Ditto.
> * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
> (get_all_ones_mask): New function.
> (vectorizable_store): Add LEN_MASK_{LOAD,STORE} vectorizer support.
> (vectorizable_load): Ditto.
> 
> ---
>  gcc/internal-fn.cc |  35 +-
>  gcc/optabs-query.cc|  25 +++-
>  gcc/tree-vect-stmts.cc | 259 +
>  3 files changed, 213 insertions(+), 106 deletions(-)
> 
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index c911ae790cb..e10c21de5f1 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -2949,7 +2949,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
> convert_optab optab)
>   * OPTAB.  */
>  
>  static void
> -expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> +expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
> optab)
>  {
>class expand_operand ops[5];
>tree type, lhs, rhs, maskt, biast;
> @@ -2957,7 +2957,7 @@ expand_partial_store_optab_fn (internal_fn, gcall 
> *stmt, convert_optab optab)
>insn_code icode;
>  
>maskt = gimple_call_arg (stmt, 2);
> -  rhs = gimple_call_arg (stmt, 3);
> +  rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
>type = TREE_TYPE (rhs);
>lhs = expand_call_mem_ref (type, stmt, 0);
>  
> @@ -4435,6 +4435,7 @@ internal_load_fn_p (internal_fn fn)
>  case IFN_GATHER_LOAD:
>  case IFN_MASK_GATHER_LOAD:
>  case IFN_LEN_LOAD:
> +case IFN_LEN_MASK_LOAD:
>return true;
>  
>  default:
> @@ -4455,6 +4456,7 @@ internal_store_fn_p (internal_fn fn)
>  case IFN_SCATTER_STORE:
>  case IFN_MASK_SCATTER_STORE:
>  case IFN_LEN_STORE:
> +case IFN_LEN_MASK_STORE:
>return true;
>  
>  default:
> @@ -4494,6 +4496,10 @@ internal_fn_mask_index (internal_fn fn)
>  case IFN_MASK_STORE_LANES:
>return 2;
>  
> +case IFN_LEN_MASK_LOAD:
> +case IFN_LEN_MASK_STORE:
> +  return 3;
> +
>  case IFN_MASK_GATHER_LOAD:
>  case IFN_MASK_SCATTER_STORE:
>return 4;
> @@ -4519,6 +4525,9 @@ internal_fn_stored_value_index (internal_fn fn)
>  case IFN_LEN_STORE:
>return 3;
>  
> +case IFN_LEN_MASK_STORE:
> +  return 4;
> +
>  default:
>return -1;
>  }
> @@ -4583,13 +4592,31 @@ internal_len_load_store_bias (internal_fn ifn, 
> machine_mode mode)
>  {
>optab optab = direct_internal_fn_optab (ifn);
>insn_code icode = direct_optab_handler (optab, mode);
> +  int bias_argno = 3;
> +  if (icode == CODE_FOR_nothing)
> +{
> +  machine_mode mask_mode
> + = targetm.vectorize.get_mask_mode (mode).require ();
> +  if (ifn == IFN_LEN_LOAD)
> + {
> +   /* Try LEN_MASK_LOAD.  */
> +   optab = direct_internal_fn_optab (IFN_LEN_MASK_LOAD);
> + }
> +  else
> + {
> +   /* Try LEN_MASK_STORE.  */
> +   optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE);
> + }
> +  icode = convert_optab_handler (optab, mode, mask_mode);
> +  bias_argno = 4;
> +}
>  
>if (icode != CODE_FOR_nothing)
>  {
>/* For now we only support biases of 0 or -1.  Try both of them.  */
> -  if (insn_operand_matches (icode, 3, GEN_INT (0)))
> +  if (insn_operand_matches (icode, bias_argno, GEN_INT (0)))
>   return 0;
> -  if (insn_operand_matches (icode, 3, GEN_INT (-1)))
> +  if (insn_operand_matches (icode, bias_argno, GEN_INT (-1)))
>   return -1;
>  }
>  
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index 276f8408dd7..4394d391200 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -566,11 +566,14 @@ can_vec_mask_load_store_p (machine_mode mode,
>  bool is_load)
>  {
>optab op = is_load ? maskload_optab : maskstore_optab;
> +  optab len_op = is_load ? len_maskload_optab : len_maskstore_optab;
>machine_mode vmode;
>  
>/* If mode is vector mode, check it directly.  */
>if (VECTOR_MODE_P (mode))
> -return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
> +return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing
> +|| convert_optab_handler (len_op, mode, mask_mode)
> + != CODE_FOR_nothing;
>  
>/* Otherwise, return true if there is som

Re: [PATCH] RISC-V: Add tuple vector mode psABI checking and simplify code

2023-06-20 Thread Robin Dapp via Gcc-patches
> Committed, thanks Jeff.

The vec_set/vec_extract tests FAIL since this commit.  I'm going to
commit the attached as obvious.

Lehua, would they not show up in your test runs?  You fixed several
other tests but these somehow not?

Regards
 Robin

Subject: [PATCH] RISC-V: testsuite: Add -Wno-psabi to vec_set/vec_extract
 testcases.

This fixes some fallout from the recent psabi changes.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c: Add
-Wno-psabi.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c:
Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c: Dito.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-run.c: Dito.
---
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-run.c| 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
index 1a6e6dd83ee..34efd5f700a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
index 884c38e0bd8..5f3168a320a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
index 844ad392df0..7210327a4ff 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
index 04c234e7d2d..c5cb56a88c7 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic" } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c
index dd22dae5eb9..43110c0bb8d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "-std=c99 -Wno-pedantic" } */
+/* { dg-additional-options "-std=c99 -Wno-pedantic -Wno-psabi" } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
index 4fb4e822b93..28f11150f8f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.

[committed] Fortran: Fix parse-dump-tree for OpenMP ALLOCATE clause

2023-06-20 Thread Tobias Burnus

A rather obvious fix. The allocator(...) bit showed up with -fdump-parse-tree.

Committed as Rev. r14-1988-g99e3214f582b08

Side remark:

Regarding the example inside the commit log: OpenMP 5.1 permitted derived-type
components. Due to global wording changes, OpenMP 5.2 disabled it for both
the new 'allocators' and for the old 'allocate' directive. This turned out to be
an accidental change and it will be permitted in a future OpenMP version again
(for 'allocators' as the executable form of the 'allocate' directive was 
deprecated
and then removed; in GCC, we will have to support it for both for legacy support
reasons). – While n->expr is now support for the dump, it is currently 
unreachable
as dt-component parsing is disabled for both allocate and allocators. (To be 
changed.)

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 99e3214f582b08b69b11b53eb3fc73b0919ef4f1
Author: Tobias Burnus 
Date:   Tue Jun 20 13:46:11 2023 +0200

Fortran: Fix parse-dump-tree for OpenMP ALLOCATE clause

Commit r14-1301-gd64e8e1224708e added u2.allocator to gfc_omp_namelist
for better readability and to permit to use namelist->expr for code
like the following:
  !$omp allocators allocate(align(32) : dt%alloc_comp)
allocate (dt%alloc_comp(5))
  !$omp allocate(dt%alloc_comp2) align(64)
allocate (dt%alloc_comp2(10))
However, for the parse-tree dump the change was incomplete.

gcc/fortran/ChangeLog:

* dump-parse-tree.cc (show_omp_namelist): Fix dump of the allocator
modifier of OMP_LIST_ALLOCATE.

diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 99c8bdaadce..effcebe9325 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -1370,31 +1370,34 @@ show_omp_namelist (int list_type, gfc_omp_namelist *n)
 		  fputc (list_type == OMP_LIST_AFFINITY ? ':' : ',', dumpfile);
 		}
 	}
 	  ns_iter = n->u2.ns;
 	}
   if (list_type == OMP_LIST_ALLOCATE)
 	{
-	  if (n->expr)
+	  if (n->u2.allocator)
 	{
 	  fputs ("allocator(", dumpfile);
 	  show_expr (n->u2.allocator);
 	  fputc (')', dumpfile);
 	}
 	  if (n->expr && n->u.align)
 	fputc (',', dumpfile);
 	  if (n->u.align)
 	{
 	  fputs ("align(", dumpfile);
 	  show_expr (n->u.align);
 	  fputc (')', dumpfile);
 	}
-	  if (n->expr || n->u.align)
+	  if (n->u2.allocator || n->u.align)
 	fputc (':', dumpfile);
-	  fputs (n->sym->name, dumpfile);
+	  if (n->expr)
+	show_expr (n->expr);
+	  else
+	fputs (n->sym->name, dumpfile);
 	  if (n->next)
 	fputs (") ALLOCATE(", dumpfile);
 	  continue;
 	}
   if (list_type == OMP_LIST_REDUCTION)
 	switch (n->u.reduction_op)
 	  {


[COMMITTED] ada: Further fixes to handling of private views in instances

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This removes more bypasses for private views in instances that are present
in type predicates (Conforming_Types, Covers, Specific_Type and Wrong_Type),
which in exchange requires additional work in Sem_Ch12 to restore the proper
view of types during the instantiation of generic bodies.

The main mechanism for this is the Has_Private_View flag, but it comes with
the limitations that 1) there must be a direct reference to the global type
in the generic construct (either a reference to a global object of this type
or the explicit declaration of a local object of this type), which is not
always the case e.g. for loop parameters and 2) it can deal with a single
type at a time, e.g. it cannot deal with an array type and its component
type if their respective views are not the same in the instance.

To overcome the second limitation, a new Has_Secondary_Private_View flag
is introduced to deal with a secondary type, which as of this writing is
either the component type of an array type or the designated type of an
access type (together they make up the vast majority of the problematic
cases for the Has_Private_View flag alone). This new mechanism subsumes
a specific treatment for them that was added in Copy_Generic_Node a few
years ago, although a specific treatment still needs to be preserved for
comparison and equality operators in a narrower case.

Additional handling is also introduced to overcome the first limitation
for loop parameters in Copy_Generic_Node, and a relaxed condition is used
in Exp_Ch7.Convert_View to generate an unchecked conversion between views.

gcc/ada/

* exp_ch7.adb (Convert_View): Detect more cases of mismatches for
private types and use Implementation_Base_Type as main criterion.
* gen_il-fields.ads (Opt_Field_Enum): Add
Has_Secondary_Private_View
* gen_il-gen-gen_nodes.adb (N_Expanded_Name): Likewise.
(N_Direct_Name): Likewise.
(N_Op): Likewise.
* sem_ch12.ads (Check_Private_View): Document the usage of second
flag Has_Secondary_Private_View.
* sem_ch12.adb (Get_Associated_Entity): New function to retrieve
the ultimate associated entity, if any.
(Check_Private_View): Implement Has_Secondary_Private_View
support.
(Copy_Generic_Node): Remove specific treatment for Component_Type
of an array type and Designated_Type of an access type. Add
specific treatment for comparison and equality operators, as well
as iterator and loop parameter specifications.
(Instantiate_Type): Implement Has_Secondary_Private_View support.
(Requires_Delayed_Save): Call Get_Associated_Entity.
(Set_Global_Type): Implement Has_Secondary_Private_View support.
* sem_ch6.adb (Conforming_Types): Remove bypass for private views
in instances.
* sem_type.adb (Covers): Return true if Is_Subtype_Of does so.
Remove bypass for private views in instances.
(Specific_Type): Likewise.
* sem_util.adb (Wrong_Type): Likewise.
* sinfo.ads (Has_Secondary_Private_View): Document new flag.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch7.adb  |  12 +-
 gcc/ada/gen_il-fields.ads|   1 +
 gcc/ada/gen_il-gen-gen_nodes.adb |   7 +-
 gcc/ada/sem_ch12.adb | 295 ---
 gcc/ada/sem_ch12.ads |   4 +-
 gcc/ada/sem_ch6.adb  |  17 +-
 gcc/ada/sem_type.adb |  31 ++--
 gcc/ada/sem_util.adb |  50 --
 gcc/ada/sinfo.ads|  39 ++--
 9 files changed, 219 insertions(+), 237 deletions(-)

diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb
index f82301c0acd..1b16839ddf3 100644
--- a/gcc/ada/exp_ch7.adb
+++ b/gcc/ada/exp_ch7.adb
@@ -4413,11 +4413,13 @@ package body Exp_Ch7 is
   if Is_Abstract_Subprogram (Proc) and then Is_Tagged_Type (Ftyp) then
  return Unchecked_Convert_To (Class_Wide_Type (Ftyp), Arg);
 
-  elsif Ftyp /= Atyp
-and then Present (Atyp)
-and then (Is_Private_Type (Ftyp) or else Is_Private_Type (Atyp))
-and then Base_Type (Underlying_Type (Atyp)) =
- Base_Type (Underlying_Type (Ftyp))
+  elsif Present (Atyp)
+and then Atyp /= Ftyp
+and then (Is_Private_Type (Ftyp)
+   or else Is_Private_Type (Atyp)
+   or else Is_Private_Type (Base_Type (Atyp)))
+and then Implementation_Base_Type (Atyp) =
+ Implementation_Base_Type (Ftyp)
   then
  return Unchecked_Convert_To (Ftyp, Arg);
 
diff --git a/gcc/ada/gen_il-fields.ads b/gcc/ada/gen_il-fields.ads
index c62523d9075..a017f45d9a6 100644
--- a/gcc/ada/gen_il-fields.ads
+++ b/gcc/ada/gen_il-fields.ads
@@ -210,6 +210,7 @@ package Gen_IL.Fields is
   Has_Pragma_Suppress_All,
   Has_Private_View,
   Has_Relative_Deadline_Pragma,
+  Has_Secondary_Private_View,
   

[COMMITTED] ada: Minor tweaks

2023-06-20 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity) : Pass
the NULL_TREE explicitly and test imported_p in lieu of
Is_Imported. : Remove public_flag local variable and
make extern_flag local variable a constant.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/decl.cc | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
index b2b77787bc0..494b24e2111 100644
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -1162,7 +1162,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
 
gnu_expr = build_unary_op (ADDR_EXPR, gnu_type, gnu_expr);
 
-   create_var_decl (gnu_entity_name, gnu_ext_name,
+   create_var_decl (gnu_entity_name, NULL_TREE,
 TREE_TYPE (gnu_expr), gnu_expr,
 const_flag, Is_Public (gnat_entity),
 imported_p, static_flag, volatile_flag,
@@ -1533,7 +1533,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
 
/* If this name is external or a name was specified, use it, but don't
   use the Interface_Name with an address clause (see cd30005).  */
-   if ((Is_Public (gnat_entity) && !Is_Imported (gnat_entity))
+   if ((Is_Public (gnat_entity) && !imported_p)
|| (Present (Interface_Name (gnat_entity))
&& No (Address_Clause (gnat_entity
  gnu_ext_name = create_concat_name (gnat_entity, NULL);
@@ -3977,10 +3977,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree 
gnu_expr, bool definition)
  = gnu_ext_name_for_subprog (gnat_entity, gnu_entity_name);
const enum inline_status_t inline_status
  = inline_status_for_subprog (gnat_entity);
-   bool public_flag = Is_Public (gnat_entity) || imported_p;
/* Subprograms marked both Intrinsic and Always_Inline need not
   have a body of their own.  */
-   bool extern_flag
+   const bool extern_flag
  = ((Is_Public (gnat_entity) && !definition)
 || imported_p
 || (Is_Intrinsic_Subprogram (gnat_entity)
@@ -4135,10 +4134,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree 
gnu_expr, bool definition)
else
  gnu_decl
= create_subprog_decl (gnu_entity_name, gnu_ext_name,
-  gnu_type, gnu_param_list,
-  inline_status, public_flag,
-  extern_flag, artificial_p,
-  debug_info_p,
+  gnu_type, gnu_param_list, inline_status,
+  Is_Public (gnat_entity) || imported_p,
+  extern_flag, artificial_p, debug_info_p,
   definition && imported_p, attr_list,
   gnat_entity);
  }
-- 
2.40.0



  1   2   >