date:20221024

On 10/25/22 07:01, Martin Liška wrote:
> |More riscv_get_valid_option_values out of|

* Move

[PATCH] riscv: fix cross compiler

More riscv_get_valid_option_values out of

Fixes:
riscv/riscv-common.cc:1748:40: error: ‘riscv_get_valid_option_values’ was not 
declared in this scope

Now I get:

./xgcc -B. --completion=-mcp
-mcpu=sifive-e20
-mcpu=sifive-e21
-mcpu=sifive-e24
-mcpu=sifive-e31
-mcpu=sifive-e34
-mcpu=sifive-e76
-mcpu=sifive-s21
-mcpu=sifive-s51
-mcpu=sifive-s54
-mcpu=sifive-s76
-mcpu=sifive-u54
-mcpu=sifive-u74

./xgcc -B. --completion=-mtune
-mtune=rocket
-mtune=sifive-3-series
-mtune=sifive-5-series
-mtune=sifive-7-series
-mtune=thead-c906
-mtune=size
-mtune=sifive-e20
-mtune=sifive-e21
-mtune=sifive-e24
-mtune=sifive-e31
-mtune=sifive-e34
-mtune=sifive-e76
-mtune=sifive-s21
-mtune=sifive-s51
-mtune=sifive-s54
-mtune=sifive-s76
-mtune=sifive-u54
-mtune=sifive-u74

Ready for master?

Thanks,
Martin

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
  (riscv_get_valid_option_values): Get out of ifdef.
---
 gcc/common/config/riscv/riscv-common.cc | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 697bfe435c8..dead3802f83 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1691,6 +1691,10 @@ riscv_compute_multilib (
 return xstrdup (multilib_infos[best_match_multi_lib].path.c_str ());
 }
 
+#undef TARGET_COMPUTE_MULTILIB
+#define TARGET_COMPUTE_MULTILIB riscv_compute_multilib
+#endif
+
 vec
 riscv_get_valid_option_values (int option_code,
   const char *prefix ATTRIBUTE_UNUSED)
@@ -1726,10 +1730,6 @@ riscv_get_valid_option_values (int option_code,
   return v;
 }
 
-#undef TARGET_COMPUTE_MULTILIB
-#define TARGET_COMPUTE_MULTILIB riscv_compute_multilib
-#endif
-
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
 static const struct default_options riscv_option_optimization_table[] =
   {
-- 
2.38.0

Re: [PATCH] [x86_64] Zhaoxin lujiazui enablement

Hello.

I noticed this patch set which is kind of related to 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107364.

And I have a couple of questions:

1) I noticed you drop AVX and F16C features for the newly added "lujiazui". Why 
do you need it?
   I would expect these features would be properly detected by cpuid?

2) If you really need it, can you please test for me the attached patch? It 
should come up
   with a new function.

3) Have question about:

  else if (vendor == signature_CENTAUR_ebx && family < 0x07)
cpu_model->__cpu_vendor = VENDOR_CENTAUR;
  else if (vendor == signature_SHANGHAI_ebx
|| vendor == signature_CENTAUR_ebx)

Are there any signature_CENTAUR_ebx models with family == 0x7 ?
Similarly, are there any signature_SHANGHAI_ebx modes with family < 0x7 ?

Thanks,
MartinFrom fa0bd99da8fd92b15a2cee55737a5962657da212 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Tue, 25 Oct 2022 06:28:44 +0200
Subject: [PATCH] i386: add reset_cpu_feature

gcc/ChangeLog:

	* common/config/i386/cpuinfo.h (has_cpu_feature): Add comment.
	(reset_cpu_feature): New.
	(get_zhaoxin_cpu): Use reset_cpu_feature.
---
 gcc/common/config/i386/cpuinfo.h | 38 +++-
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index d45451c5704..19ea7132fd5 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -76,6 +76,8 @@ has_cpu_feature (struct __processor_model *cpu_model,
 }
 }
 
+/* Save FEATURE to either CPU_MODEL or CPU_FEATURES2.  */
+
 static inline void
 set_cpu_feature (struct __processor_model *cpu_model,
 		 unsigned int *cpu_features2,
@@ -100,6 +102,32 @@ set_cpu_feature (struct __processor_model *cpu_model,
 }
 }
 
+/* Drop FEATURE from either CPU_MODEL or CPU_FEATURES2.  */
+
+static inline void
+reset_cpu_feature (struct __processor_model *cpu_model,
+		   unsigned int *cpu_features2,
+		   enum processor_features feature)
+{
+  unsigned index, offset;
+  unsigned f = feature;
+
+  if (f < 32)
+{
+  /* The first 32 features.  */
+  cpu_model->__cpu_features[0] &= ~(1U << f);
+}
+  else
+{
+  /* The rest of features.  cpu_features2[i] contains features from
+	 (32 + i * 32) to (31 + 32 + i * 32), inclusively.  */
+  f -= 32;
+  index = f / 32;
+  offset = f % 32;
+  cpu_features2[index] &= ~(1U << offset);
+}
+}
+
 /* Get the specific type of AMD CPU and return AMD CPU name.  Return
NULL for unknown AMD CPU.  */
 
@@ -565,11 +593,11 @@ get_zhaoxin_cpu (struct __processor_model *cpu_model,
   cpu_model->__cpu_type = ZHAOXIN_FAM7H;
   if (model == 0x3b)
 	{
-	cpu = "lujiazui";
-	CHECK___builtin_cpu_is ("lujiazui");
-	cpu_model->__cpu_features[0] &= ~(1U <<(FEATURE_AVX & 31));
-	cpu_features2[0] &= ~(1U <<((FEATURE_F16C - 32) & 31));
-	cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI;
+	  cpu = "lujiazui";
+	  CHECK___builtin_cpu_is ("lujiazui");
+	  reset_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX);
+	  reset_cpu_feature (cpu_model, cpu_features2, FEATURE_F16C);
+	  cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI;
 	}
   break;
 default:
-- 
2.38.0

[PATCH][pushed] i386: fix pedantic warning

Pushed. Sorry for the stupid error, I noticed the test became
UNRESOLVED, but I forgot to investigate that.

Cheers,
Martin

PR target/107364

gcc/ChangeLog:

* common/config/i386/i386-cpuinfo.h (enum processor_vendor):
Fix pedantic warning.
---
 gcc/common/config/i386/i386-cpuinfo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/common/config/i386/i386-cpuinfo.h 
b/gcc/common/config/i386/i386-cpuinfo.h
index 586a1a2d0f6..761af2715f0 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -37,7 +37,7 @@ enum processor_vendor
 
   /* Maximum values must be at the end of this enum.  */
   VENDOR_MAX,
-  BUILTIN_VENDOR_MAX = VENDOR_OTHER,
+  BUILTIN_VENDOR_MAX = VENDOR_OTHER
 };
 
 /* Any new types or subtypes have to be inserted at the end. */
-- 
2.38.0

[r13-3463 Regression] FAIL: gcc.target/i386/builtin_target.c (test for excess errors) on Linux/x86_64

2022-10-24 Thread haochen.jiang via Gcc-patches

On Linux/x86_64,

f751bf4c5d1aaa1aacfcbdec62881c5ea1175dfb is the first bad commit
commit f751bf4c5d1aaa1aacfcbdec62881c5ea1175dfb
Author: Martin Liska 
Date:   Mon Oct 24 15:34:39 2022 +0200

x86: fix VENDOR_MAX enum value

caused

FAIL: gcc.target/i386/builtin_target.c (test for excess errors)

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r13-3463/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/builtin_target.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/builtin_target.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/builtin_target.c 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/builtin_target.c 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com)

[PATCH] RISC-V: ADJUST_NUNITS according to -march.

2022-10-24 Thread juzhe . zhong

From: Ju-Zhe Zhong 

This patch fixed PR107357: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107357
gcc/ChangeLog:

* config/riscv/riscv-modes.def (VECTOR_BOOL_MODE): Set to minimum size.
(ADJUST_NUNITS): Adjust according to -march.
(ADJUST_BYTESIZE): Ditto.
* config/riscv/riscv-protos.h (riscv_v_ext_enabled_vector_mode_p): 
Remove.
(riscv_v_ext_vector_mode_p): Change function implementation.
* config/riscv/riscv-vector-builtins.cc (rvv_switcher::rvv_switcher): 
Change to riscv_v_ext_vector_mode_p.
(register_builtin_type): Ditto.
* config/riscv/riscv.cc (riscv_v_ext_vector_mode_p): Change to enabled 
modes.
(ENTRY): Ditto.
(riscv_v_ext_enabled_vector_mode_p): Remove.
(riscv_v_adjust_nunits): New function.
(riscv_vector_mode_supported_p): Use riscv_v_ext_vector_mode_p instead.
* config/riscv/riscv.h (riscv_v_adjust_nunits): New function.

---
 gcc/config/riscv/riscv-modes.def  | 63 ---
 gcc/config/riscv/riscv-protos.h   |  2 +-
 gcc/config/riscv/riscv-vector-builtins.cc |  4 +-
 gcc/config/riscv/riscv.cc | 33 +---
 gcc/config/riscv/riscv.h  |  1 +
 5 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index ea88442e117..556b5c55253 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -37,21 +37,24 @@ FLOAT_MODE (TF, 16, ieee_quad_format);
| VNx32BI  | 1 | 2 |
| VNx64BI  | N/A   | 1 |  */
 
-VECTOR_BOOL_MODE (VNx1BI, 1, BI, 8);
-VECTOR_BOOL_MODE (VNx2BI, 2, BI, 8);
-VECTOR_BOOL_MODE (VNx4BI, 4, BI, 8);
-VECTOR_BOOL_MODE (VNx8BI, 8, BI, 8);
-VECTOR_BOOL_MODE (VNx16BI, 16, BI, 8);
-VECTOR_BOOL_MODE (VNx32BI, 32, BI, 8);
+/* For RVV modes, each boolean value occupies 1-bit.
+   4th argument is specify the minmial possible size of the vector mode,
+   and will adjust to the right size by ADJUST_BYTESIZE.  */
+VECTOR_BOOL_MODE (VNx1BI, 1, BI, 1);
+VECTOR_BOOL_MODE (VNx2BI, 2, BI, 1);
+VECTOR_BOOL_MODE (VNx4BI, 4, BI, 1);
+VECTOR_BOOL_MODE (VNx8BI, 8, BI, 1);
+VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
+VECTOR_BOOL_MODE (VNx32BI, 32, BI, 4);
 VECTOR_BOOL_MODE (VNx64BI, 64, BI, 8);
 
-ADJUST_NUNITS (VNx1BI, riscv_vector_chunks * 1);
-ADJUST_NUNITS (VNx2BI, riscv_vector_chunks * 2);
-ADJUST_NUNITS (VNx4BI, riscv_vector_chunks * 4);
-ADJUST_NUNITS (VNx8BI, riscv_vector_chunks * 8);
-ADJUST_NUNITS (VNx16BI, riscv_vector_chunks * 16);
-ADJUST_NUNITS (VNx32BI, riscv_vector_chunks * 32);
-ADJUST_NUNITS (VNx64BI, riscv_vector_chunks * 64);
+ADJUST_NUNITS (VNx1BI, riscv_v_adjust_nunits (VNx1BImode, 1));
+ADJUST_NUNITS (VNx2BI, riscv_v_adjust_nunits (VNx2BImode, 2));
+ADJUST_NUNITS (VNx4BI, riscv_v_adjust_nunits (VNx4BImode, 4));
+ADJUST_NUNITS (VNx8BI, riscv_v_adjust_nunits (VNx8BImode, 8));
+ADJUST_NUNITS (VNx16BI, riscv_v_adjust_nunits (VNx16BImode, 16));
+ADJUST_NUNITS (VNx32BI, riscv_v_adjust_nunits (VNx32BImode, 32));
+ADJUST_NUNITS (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 64));
 
 ADJUST_ALIGNMENT (VNx1BI, 1);
 ADJUST_ALIGNMENT (VNx2BI, 1);
@@ -67,7 +70,7 @@ ADJUST_BYTESIZE (VNx4BI, riscv_vector_chunks * 
riscv_bytes_per_vector_chunk);
 ADJUST_BYTESIZE (VNx8BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
 ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
 ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx64BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
+ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
 
 /*
| Mode| MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
@@ -101,13 +104,13 @@ ADJUST_BYTESIZE (VNx64BI, riscv_vector_chunks * 
riscv_bytes_per_vector_chunk);
   VECTOR_MODES_WITH_PREFIX (VNx, INT, 8 * NVECS, 0);   
\
   VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 8 * NVECS, 0); 
\

\
-  ADJUST_NUNITS (VB##QI, riscv_vector_chunks * NVECS * 8); 
\
-  ADJUST_NUNITS (VH##HI, riscv_vector_chunks * NVECS * 4); 
\
-  ADJUST_NUNITS (VS##SI, riscv_vector_chunks * NVECS * 2); 
\
-  ADJUST_NUNITS (VD##DI, riscv_vector_chunks * NVECS); 
\
-  ADJUST_NUNITS (VH##HF, riscv_vector_chunks * NVECS * 4); 
\
-  ADJUST_NUNITS (VS##SF, riscv_vector_chunks * NVECS * 2); 
\
-  ADJUST_NUNITS (VD##DF, riscv_vector_chunks * NVECS); 
\
+  ADJUST_NUNITS (VB##QI, riscv_v_adjust_nunits (VB##QI##mode, NVECS * 8)); 
\
+  ADJUST_NUNITS (VH##HI, riscv_v_adjust_nunits (VH##HI##mode, NVECS * 4)); 
\
+  ADJUST_NUNITS (VS##SI, riscv_v_adjust_nunits (VS##SI##mode, NVECS * 2)); 
\
+  ADJUST_NUNITS

Re: [PATCH zero-call-used-regs] Add leafy mode for zero-call-used-regs

2022-10-24 Thread Alexandre Oliva via Gcc-patches

Hello, Qing,

It was a pleasure to meet you at the Cauldron.

On Oct 21, 2022, Qing Zhao  wrote:

> Hi, Alexandre,
> Could you please explain a little bit on the motivation of this patch first?

It was a suggestion I got after the Cauldron presentation.
It made sense to me, and was easy enough to implement.

'all' for leaf functions is likely wasteful.  If no other functions are
called, one can determine exactly which registers might carry
information out and thus need zeroing, and 'used' is thus likely enough,
depending on the purpose of register scrubbing.  (In some scenarios, it
might make sense to want scrubbing of all registers, even unused ones
that carry incoming values)

Though some functions are coded as leaf functions, others may become
leaf functions because of inlining or other optimizations.  It's hard
for users to predict, so it makes sense to have a mode that tells the
compiler to figure it out.

There's room for a follow-up improvement, to save on a little more
potentially-wasteful anti-leaking scrubbing even in non-leaf functions:
for this purpose, they need not scrub registers that they don't use
themselves, if all potential callees are known to have scrubbed them.

I have not (yet?) implemented this variant; I haven't even found a name
I'm happy with for it.  (seal?  plug?  cork?  another leak antonym?)

I'm not entirely happy with leafy either, FWIW.  Bikeshedding anyone? :-)

https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604083.html

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about

[PATCH v6] tree-optimization/101186 - extend FRE with "equivalence map" for condition prediction

2022-10-24 Thread Di Zhao OS via Gcc-patches

Sorry for the late update. I've been on a vacation and then I
spent some time updating and verifying the patch.

Attached is a new version of the patch. There are some changes:

1. Store equivalences in a vn_pval chain in vn_ssa_aux, rather than
   in the expression hash table. (Following Richard's suggestion.)
2. Extracted insert_single_predicated_value function.
3. Simplify record_equiv_from_prev_phi a bit.
4. Changed some of the functions' names and tried to improve the
   comments a little.

Current status of the new testcases in the patch:

ssa-fre-200.c   Can also be optimized by evrp
ssa-fre-201.c   Not optimized in trunk.
ssa-fre-202.c   foo() can be removed by evrp; while x + b is not
folded.
ssa-pre-34.cNot optimized in trunk.

Initially, this patch is motivated to remove the unreachable codes
in case like ssa-pre-34.c, in which we need to use equivalence
relation produced from a preceding condition for another condition.
VRP didn't optimize that because it needs jump threading to make
the relation valid at the second condition.

After browsing the mechanisms of VRP and FRE, it seems to me there
are two options: 1) Teach VRP to identify related but not threaded
conditions. That might require introducing value-numbering into VRP 
to detect common expressions, and I think is too much for this. 
2) Introduce temporary equivalence in sccvn, which I thought would
change less on current code. (And along the reviews and updating
patch I see how ad-hoc it was.)

I saw from the talk about VN there's plan to replace predicated
values by ranger. So how does it goes? Is there something I can help
with? (For the case ssa-pre-34.c, I think maybe it still needs the
predicated-value support, to lookup related conditional expressions.)

Below are about questions in the last review:

> >  /* Valid hashtables storing information we have proven to be
> > correct.  */
> > @@ -490,9 +492,9 @@ VN_INFO (tree name)
> > nary->predicated_values = 0;
> > nary->u.result = boolean_true_node;
> > vn_nary_op_insert_into (nary, valid_info->nary);
> > -   gcc_assert (nary->unwind_to == NULL);
> 
> why's that?  doesn't this mean unwinding will be broken?

Previously, predicate "argument_x == NULL" or "argument_x != NULL"
is always new here (because argument_x's VN is just inserted.)
But with the patch, there can be slot for "argument_x == NULL"
or "argument_x != NULL" already. It won't break unwinding as the
new value is not linked to the unwind-chain.

> 
> > /* Also do not link it into the undo chain.  */
> > last_inserted_nary = nary->next;
> > +   /* There could be a predicate already.  */
> > nary->next = (vn_nary_op_t)(void *)-1;
> > nary = alloc_vn_nary_op_noinit (2, _tables_insert_obstack);
> > init_vn_nary_op_from_pieces (nary, 2, EQ_EXPR,

> >  /* Compute and return the hash value for nary operation VBO1.  */
> >  
> >  hashval_t
> > @@ -4226,6 +4342,9 @@ init_vn_nary_op_from_stmt (vn_nary_op_t vno, gassign 
> > *stmt)
> >for (i = 0; i < vno->length; ++i)
> > vno->op[i] = gimple_op (stmt, i + 1);
> >  }
> > +  /* Insert and lookup N-ary results by the operands' equivalence heads.  
> > */
> > +  if (gimple_bb (stmt))
> > +lookup_equiv_heads (vno->length, vno->op, vno->op, gimple_bb (stmt));
> 
> That seems like the wrong place, the function didn't even valueize before.

To utilize temp-equivalences and get more simplified result, n-ary
expressions should be always inserted and lookup by the operands'
equivalence heads. So practically all the places
init_vn_nary_op_from_stmt is used, lookup_equiv_heads (changed to
get_equiv_heads) should be called. As I haven't found better place
to put that, I just left it here in the patch..

> >  visit_nary_op (tree lhs, gassign *stmt)
> >  {
> >vn_nary_op_t vnresult;
> > -  tree result = vn_nary_op_lookup_stmt (stmt, );
> > -  if (! result && vnresult)
> > +  unsigned length = vn_nary_length_from_stmt (stmt);
> > +  vn_nary_op_t vno
> > += XALLOCAVAR (struct vn_nary_op_s, sizeof_vn_nary_op (length));
> > +  init_vn_nary_op_from_stmt (vno, stmt);
> > +  tree result = NULL_TREE;
> > +  /* Try to get a simplified result.  */
> > +  /* Do not simplify variable used in PHI at loop exit, or
> > + simplify_peeled_chrec/constant_after_peeling may miss the loop.  */
> > +  gimple *use_stmt;
> > +  use_operand_p use_p;
> > +  if (!(single_imm_use (lhs, _p, _stmt)
> > +   && gimple_code (use_stmt) == GIMPLE_PHI
> > +   && single_succ_p (gimple_bb (use_stmt))
> > +   && (single_succ_edge (gimple_bb (use_stmt))->flags & EDGE_DFS_BACK)))
> > +result = fold_const_from_equiv_heads (vno->length, vno->opcode, 
> > vno->op,
> > + vno->type);
> 
> copy propagating conditional equivalences has proved problematic, why
> do this at all when there's no actual simplification?  It's a bit odd that
> we need a special fold_const_from_equiv_heads

Re: [PATCH] Add -gcodeview option

2022-10-24 Thread Mark Harmstone


On 24/10/22 12:08, Martin Storsjö wrote:
Hmm, what does this end up passing to the linker in the end - does it just pass "-pdb="? (What does the "*" parameter do here?) If that's the case - that sounds reasonable - assuming that if a user passes an extra -Wl,--pdb,myspecificname.pdb, that would take precedence (i.e. be passed after the compiler's default one). 


That's right. The "*" means "all languages".

Mark

Re: [PATCH-2, rs6000] Reverse V8HI on Power8 by vector rotation [PR100866]

2022-10-24 Thread Segher Boessenkool

Hi!

On Mon, Oct 24, 2022 at 11:14:20AM +0800, HAO CHEN GUI wrote:
>   This patch implements V8HI byte reverse on Power8 by vector rotation.

Please put *byte* reverse as the commit subject as well?

> It should be effecient than orignial vector permute. The patch comes from
> Xionghu's comments in PR. I just added a test case for it.

Yeah, on all existing CPUs such a rotate is as fast or faster than a
permute insn.  And for bigger modes, we need more insns two dependent
rotates for V4SI, and that is unlikely to be faster than a single
permutation, certainly not if code can be unrolled.

Okay for trunk.  Thanks!

Segher

Re: [PATCH 1/2] Add a parameter for the builtin function of prefetch to align with LLVM

2022-10-24 Thread Segher Boessenkool

On Mon, Oct 24, 2022 at 11:00:26AM +0100, Richard Sandiford wrote:
> Segher Boessenkool  writes:
> > On Thu, Oct 20, 2022 at 07:34:13AM +, Jiang, Haochen wrote:
> >> > > +  /* Argument 3 must be either zero or one.  */
> >> > > +  if (INTVAL (op3) != 0 && INTVAL (op3) != 1)
> >> > > +{
> >> > > +  warning (0, "invalid fourth argument to %<__builtin_prefetch%>;"
> >> > > +  " using one");
> >> > 
> >> > "using 1" makes sense maybe, but "using one" reads as "using an
> >> > argument", not very sane.
> >> > 
> >> > An error would be better here anyway?
> >> 
> >> Will change to 1 to avoid confusion in that. The reason why this is a 
> >> warning
> >> is because previous ones related to constant arguments out of range in 
> >> prefetch
> >> are also using warning.
> >
> > Please don't repeat historical mistakes.  You might not want to fix the
> > existing code (since that can in theory break existing user code), but
> > that is not a reason to punish users of a new feature as well ;-)
> 
> I agree an error would be appropriate for something like
> __builtin_clear_cache.  But __builtin_prefetch is a hint only.
> Nothing should break if the compiler simply evaluates the arguments
> and does nothing else.
> 
> Using a warning in that situation means that, if the ranges of
> parameters are increased in future, older compilers won't needlessly
> reject new code.

It means that if we want "2" to have a new meaning in the future, we can
not do that, since it will use the semantics of "1" on older compilers
(and that might well not be compatible).

And for what?  Is it ever so convenient for people to write random
numbers here?


Segher

[commited] analyzer: fix ICE on va_copy [PR107349]

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Lightly tested with aarch64-unknown-linux.
Pushed to trunk as r13-3470-g687d11fd74e7de

gcc/analyzer/ChangeLog:
PR analyzer/107349
* varargs.cc (get_va_copy_arg): Fix the non-pointer case.

gcc/testsuite/ChangeLog:
PR analyzer/107349
* gcc.dg/analyzer/stdarg-1-ms_abi.c (pr107349): New.
* gcc.dg/analyzer/stdarg-1-sysv_abi.c (pr107349): New.
* gcc.dg/analyzer/stdarg-1.c (pr107349): New.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/varargs.cc   | 5 ++---
 gcc/testsuite/gcc.dg/analyzer/stdarg-1-ms_abi.c   | 6 ++
 gcc/testsuite/gcc.dg/analyzer/stdarg-1-sysv_abi.c | 6 ++
 gcc/testsuite/gcc.dg/analyzer/stdarg-1.c  | 6 ++
 4 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/gcc/analyzer/varargs.cc b/gcc/analyzer/varargs.cc
index 20c83dbbadc..e4dbad79f42 100644
--- a/gcc/analyzer/varargs.cc
+++ b/gcc/analyzer/varargs.cc
@@ -171,9 +171,8 @@ get_va_copy_arg (const region_model *model,
   const svalue *arg_sval = model->get_rvalue (arg, ctxt);
   if (const svalue *cast = arg_sval->maybe_undo_cast ())
 arg_sval = cast;
-  /* Expect a POINTER_TYPE; does it point to an array type?  */
-  gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
-  if (TREE_CODE (TREE_TYPE (TREE_TYPE (arg))) == ARRAY_TYPE)
+  if (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE
+  && TREE_CODE (TREE_TYPE (TREE_TYPE (arg))) == ARRAY_TYPE)
 {
   /* va_list_arg_type_node is a pointer to a va_list;
 return *ARG_SVAL.  */
diff --git a/gcc/testsuite/gcc.dg/analyzer/stdarg-1-ms_abi.c 
b/gcc/testsuite/gcc.dg/analyzer/stdarg-1-ms_abi.c
index b0143a7d3e3..e55f10de66b 100644
--- a/gcc/testsuite/gcc.dg/analyzer/stdarg-1-ms_abi.c
+++ b/gcc/testsuite/gcc.dg/analyzer/stdarg-1-ms_abi.c
@@ -435,3 +435,9 @@ void test_va_arg_after_return (void)
   __analyzer_called_by_test_va_arg_after_return (42, 1066);
   i = __builtin_va_arg (global_ap, int); /* { dg-warning "dereferencing 
pointer 'global_ap' to within stale stack frame" } */
 }
+
+void pr107349 (void)
+{
+  __builtin_ms_va_list x,y;
+  __builtin_ms_va_copy(x,y); /* { dg-warning "use of uninitialized value 'y'" 
} */
+} /* { dg-warning "missing call to 'va_end'" } */
diff --git a/gcc/testsuite/gcc.dg/analyzer/stdarg-1-sysv_abi.c 
b/gcc/testsuite/gcc.dg/analyzer/stdarg-1-sysv_abi.c
index 1dc97ea3a44..fb49b3584e2 100644
--- a/gcc/testsuite/gcc.dg/analyzer/stdarg-1-sysv_abi.c
+++ b/gcc/testsuite/gcc.dg/analyzer/stdarg-1-sysv_abi.c
@@ -435,3 +435,9 @@ void test_va_arg_after_return (void)
   __analyzer_called_by_test_va_arg_after_return (42, 1066);
   i = __builtin_va_arg (global_ap, int); /* { dg-warning "dereferencing 
pointer 'global_ap' to within stale stack frame" } */
 }
+
+void pr107349 (void)
+{
+  __builtin_sysv_va_list x,y;
+  __builtin_sysv_va_copy(x,y); /* { dg-warning "use of uninitialized value 
'y'" } */
+} /* { dg-warning "missing call to 'va_end'" } */
diff --git a/gcc/testsuite/gcc.dg/analyzer/stdarg-1.c 
b/gcc/testsuite/gcc.dg/analyzer/stdarg-1.c
index f23d28c5b89..f2766a50522 100644
--- a/gcc/testsuite/gcc.dg/analyzer/stdarg-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/stdarg-1.c
@@ -431,3 +431,9 @@ void test_va_arg_after_return (void)
   __analyzer_called_by_test_va_arg_after_return (42, 1066);
   i = __builtin_va_arg (global_ap, int); /* { dg-warning "dereferencing 
pointer 'global_ap' to within stale stack frame" } */
 }
+
+void pr107349 (void)
+{
+  __builtin_va_list x,y;
+  __builtin_va_copy(x,y); /* { dg-warning "use of uninitialized value 'y'" } */
+} /* { dg-warning "missing call to 'va_end'" } */
-- 
2.26.3

[committed] diagnostics: fix ICE in sarif output with NULL filename [PR107366]

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-3469-g2e8a0553918adc.

gcc/ChangeLog:
PR analyzer/107366
* diagnostic-format-sarif.cc
(sarif_builder::maybe_make_physical_location_object): Gracefully
reject locations with NULL filename.

gcc/testsuite/ChangeLog:
PR analyzer/107366
* gcc.dg/analyzer/sarif-pr107366.c: New test.

Signed-off-by: David Malcolm 
---
 gcc/diagnostic-format-sarif.cc|  2 +-
 .../gcc.dg/analyzer/sarif-pr107366.c  | 20 +++
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/sarif-pr107366.c

diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index fc28d160c38..7110db4edd6 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -595,7 +595,7 @@ sarif_builder::make_location_object (const diagnostic_event 
)
 json::object *
 sarif_builder::maybe_make_physical_location_object (location_t loc)
 {
-  if (loc <= BUILTINS_LOCATION)
+  if (loc <= BUILTINS_LOCATION || LOCATION_FILE (loc) == NULL)
 return NULL;
 
   json::object *phys_loc_obj = new json::object ();
diff --git a/gcc/testsuite/gcc.dg/analyzer/sarif-pr107366.c 
b/gcc/testsuite/gcc.dg/analyzer/sarif-pr107366.c
new file mode 100644
index 000..997cf56586d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/sarif-pr107366.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdiagnostics-format=sarif-file" } */
+
+typedef enum {
+  HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO
+} hwloc_topology_diff_obj_attr_type_t;
+enum { HWLOC_TOPOLOGY_DIFF_OBJ_ATTR } hwloc_apply_diff_one_diff_0_0;
+
+void
+hwloc_apply_diff_one() {
+  switch (hwloc_apply_diff_one_diff_0_0)
+  case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR: {
+hwloc_topology_diff_obj_attr_type_t obj_attr_2_0_0;
+switch (obj_attr_2_0_0)
+case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO: {
+  unsigned ii = 0;
+}
+  }
+}
+
-- 
2.26.3

[committed] analyzer: handle (NULL == ) [PR107345]

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-3468-g18faaeb3af42f3.

gcc/analyzer/ChangeLog:
PR analyzer/107345
* region-model.cc (region_model::eval_condition_without_cm):
Ensure that constants are on the right-hand side before checking
for them.

gcc/testsuite/ChangeLog:
PR analyzer/107345
* gcc.dg/analyzer/pr107345.c: New test.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/region-model.cc | 15 ---
 gcc/testsuite/gcc.dg/analyzer/pr107345.c | 17 +
 2 files changed, 29 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr107345.c

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 608fcd58fab..7c44fc9e253 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -4212,10 +4212,19 @@ region_model::eval_condition_without_cm (const svalue 
*lhs,
/* Otherwise, only known through constraints.  */
   }
 
-  /* If we have a pair of constants, compare them.  */
   if (const constant_svalue *cst_lhs = lhs->dyn_cast_constant_svalue ())
-if (const constant_svalue *cst_rhs = rhs->dyn_cast_constant_svalue ())
-  return constant_svalue::eval_condition (cst_lhs, op, cst_rhs);
+{
+  /* If we have a pair of constants, compare them.  */
+  if (const constant_svalue *cst_rhs = rhs->dyn_cast_constant_svalue ())
+   return constant_svalue::eval_condition (cst_lhs, op, cst_rhs);
+  else
+   {
+ /* When we have one constant, put it on the RHS.  */
+ std::swap (lhs, rhs);
+ op = swap_tree_comparison (op);
+   }
+}
+  gcc_assert (lhs->get_kind () != SK_CONSTANT);
 
   /* Handle comparison against zero.  */
   if (const constant_svalue *cst_rhs = rhs->dyn_cast_constant_svalue ())
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr107345.c 
b/gcc/testsuite/gcc.dg/analyzer/pr107345.c
new file mode 100644
index 000..540596d1182
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr107345.c
@@ -0,0 +1,17 @@
+/* Ensure the analyzer treats (NULL == ) as being false for this case,
+   where the logic is sufficiently complicated to not be optimized away.  */
+
+#include 
+
+int main() {   
+  int e = 10086;
+  int *f = 
+  int g = 0;
+  int *h[2][1];
+  h[1][0] = f;
+  if (g == (h[1][0])) { /* { dg-warning "comparison between pointer and 
integer" } */
+unsigned int *i = 0;
+  }
+  printf("NPD_FLAG: %d\n ", *f);
+  return 0;
+}
-- 
2.26.3

[committed] analyzer: simplify sm_state_map lookup

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-3467-g53881c47e4b357.

gcc/analyzer/ChangeLog:
* engine.cc (impl_region_model_context::get_malloc_map): Replace
with...
(impl_region_model_context::get_state_map_by_name): ...this.
(impl_region_model_context::get_fd_map): Delete.
(impl_region_model_context::get_taint_map): Delete.
* exploded-graph.h (impl_region_model_context::get_fd_map):
Delete.
(impl_region_model_context::get_malloc_map): Delete.
(impl_region_model_context::get_taint_map): Delete.
(impl_region_model_context::get_state_map_by_name): New.
* region-model.h (region_model_context::get_state_map_by_name):
New vfunc.
(region_model_context::get_fd_map): Convert from vfunc to
function.
(region_model_context::get_malloc_map): Likewise.
(region_model_context::get_taint_map): Likewise.
(noop_region_model_context::get_state_map_by_name): New.
(noop_region_model_context::get_fd_map): Delete.
(noop_region_model_context::get_malloc_map): Delete.
(noop_region_model_context::get_taint_map): Delete.
(region_model_context_decorator::get_state_map_by_name): New.
(region_model_context_decorator::get_fd_map): Delete.
(region_model_context_decorator::get_malloc_map): Delete.
(region_model_context_decorator::get_taint_map): Delete.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/engine.cc| 47 
 gcc/analyzer/exploded-graph.h | 13 ++
 gcc/analyzer/region-model.h   | 80 +++
 3 files changed, 48 insertions(+), 92 deletions(-)

diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index a664a99eb78..52978dd0d37 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -214,50 +214,21 @@ impl_region_model_context::terminate_path ()
 }
 
 bool
-impl_region_model_context::get_malloc_map (sm_state_map **out_smap,
-  const state_machine **out_sm,
-  unsigned *out_sm_idx)
-{
-  unsigned malloc_sm_idx;
-  if (!m_ext_state.get_sm_idx_by_name ("malloc", _sm_idx))
-return false;
-
-  *out_smap = m_new_state->m_checker_states[malloc_sm_idx];
-  *out_sm = _ext_state.get_sm (malloc_sm_idx);
-  *out_sm_idx = malloc_sm_idx;
-  return true;
-}
-
-bool
-impl_region_model_context::get_fd_map (sm_state_map **out_smap,
-  const state_machine **out_sm,
-  unsigned *out_sm_idx)
-{
-  unsigned fd_sm_idx;
-  if (!m_ext_state.get_sm_idx_by_name ("file-descriptor", _sm_idx))
-return false;
-
-  *out_smap = m_new_state->m_checker_states[fd_sm_idx];
-  *out_sm = _ext_state.get_sm (fd_sm_idx);
-  *out_sm_idx = fd_sm_idx;
-  return true;
-}
-
-bool
-impl_region_model_context::get_taint_map (sm_state_map **out_smap,
- const state_machine **out_sm,
- unsigned *out_sm_idx)
+impl_region_model_context::get_state_map_by_name (const char *name,
+ sm_state_map **out_smap,
+ const state_machine **out_sm,
+ unsigned *out_sm_idx)
 {
   if (!m_new_state)
 return false;
 
-  unsigned taint_sm_idx;
-  if (!m_ext_state.get_sm_idx_by_name ("taint", _sm_idx))
+  unsigned sm_idx;
+  if (!m_ext_state.get_sm_idx_by_name (name, _idx))
 return false;
 
-  *out_smap = m_new_state->m_checker_states[taint_sm_idx];
-  *out_sm = _ext_state.get_sm (taint_sm_idx);
-  *out_sm_idx = taint_sm_idx;
+  *out_smap = m_new_state->m_checker_states[sm_idx];
+  *out_sm = _ext_state.get_sm (sm_idx);
+  *out_sm_idx = sm_idx;
   return true;
 }
 
diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h
index ad278e277dc..5996252f1fb 100644
--- a/gcc/analyzer/exploded-graph.h
+++ b/gcc/analyzer/exploded-graph.h
@@ -96,15 +96,10 @@ class impl_region_model_context : public 
region_model_context
   {
 return _ext_state;
   }
-  bool get_fd_map (sm_state_map **out_smap,
-  const state_machine **out_sm,
-  unsigned *out_sm_idx) final override;
-  bool get_malloc_map (sm_state_map **out_smap,
-  const state_machine **out_sm,
-  unsigned *out_sm_idx) final override;
-  bool get_taint_map (sm_state_map **out_smap,
-  const state_machine **out_sm,
-  unsigned *out_sm_idx) final override;
+  bool get_state_map_by_name (const char *name,
+ sm_state_map **out_smap,
+ const state_machine **out_sm,
+ unsigned *out_sm_idx) override;
 
   const gimple *get_stmt () const override { return m_stmt; }

[committed] analyzer: handle "pipe" and "pipe2" [PR106300]

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-3466-g792f039fc37faa.

gcc/analyzer/ChangeLog:
PR analyzer/106300
* engine.cc (impl_region_model_context::get_fd_map): New.
* exploded-graph.h (impl_region_model_context::get_fd_map): New
decl.
* region-model-impl-calls.cc (region_model::impl_call_pipe): New.
* region-model.cc (region_model::update_for_int_cst_return): New,
based on...
(region_model::update_for_zero_return): ...this.  Reimplement in
terms of the former.
(region_model::on_call_pre): Handle "pipe" and "pipe2".
(region_model::on_call_post): Likewise.
* region-model.h (region_model::impl_call_pipe): New decl.
(region_model::update_for_int_cst_return): New decl.
(region_model::mark_as_valid_fd): New decl.
(region_model_context::get_fd_map): New pure virtual fn.
(noop_region_model_context::get_fd_map): New.
(region_model_context_decorator::get_fd_map): New.
* sm-fd.cc: Include "analyzer/program-state.h".
(fd_state_machine::describe_state_change): Handle transitions from
start state to valid states.
(fd_state_machine::mark_as_valid_fd): New.
(fd_state_machine::on_stmt): Add missing return for "creat".
(region_model::mark_as_valid_fd): New.

gcc/ChangeLog:
PR analyzer/106300
* doc/invoke.texi (Static Analyzer Options): Add "pipe" and
"pipe2" to the list of functions the analyzer has hardcoded
knowledge of.

gcc/testsuite/ChangeLog:
PR analyzer/106300
* gcc.dg/analyzer/pipe-1.c: New test.
* gcc.dg/analyzer/pipe-glibc.c: New test.
* gcc.dg/analyzer/pipe-manpages.c: New test.
* gcc.dg/analyzer/pipe2-1.c: New test.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/engine.cc| 15 
 gcc/analyzer/exploded-graph.h |  3 +
 gcc/analyzer/region-model-impl-calls.cc   | 70 +
 gcc/analyzer/region-model.cc  | 35 -
 gcc/analyzer/region-model.h   | 26 ++-
 gcc/analyzer/sm-fd.cc | 56 +-
 gcc/doc/invoke.texi   |  1 +
 gcc/testsuite/gcc.dg/analyzer/pipe-1.c| 38 ++
 gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c| 71 +
 gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c | 76 +++
 gcc/testsuite/gcc.dg/analyzer/pipe2-1.c   | 38 ++
 11 files changed, 420 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pipe-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pipe2-1.c

diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index 46bcaeda837..a664a99eb78 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -228,6 +228,21 @@ impl_region_model_context::get_malloc_map (sm_state_map 
**out_smap,
   return true;
 }
 
+bool
+impl_region_model_context::get_fd_map (sm_state_map **out_smap,
+  const state_machine **out_sm,
+  unsigned *out_sm_idx)
+{
+  unsigned fd_sm_idx;
+  if (!m_ext_state.get_sm_idx_by_name ("file-descriptor", _sm_idx))
+return false;
+
+  *out_smap = m_new_state->m_checker_states[fd_sm_idx];
+  *out_sm = _ext_state.get_sm (fd_sm_idx);
+  *out_sm_idx = fd_sm_idx;
+  return true;
+}
+
 bool
 impl_region_model_context::get_taint_map (sm_state_map **out_smap,
  const state_machine **out_sm,
diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h
index 11e46cab160..ad278e277dc 100644
--- a/gcc/analyzer/exploded-graph.h
+++ b/gcc/analyzer/exploded-graph.h
@@ -96,6 +96,9 @@ class impl_region_model_context : public region_model_context
   {
 return _ext_state;
   }
+  bool get_fd_map (sm_state_map **out_smap,
+  const state_machine **out_sm,
+  unsigned *out_sm_idx) final override;
   bool get_malloc_map (sm_state_map **out_smap,
   const state_machine **out_sm,
   unsigned *out_sm_idx) final override;
diff --git a/gcc/analyzer/region-model-impl-calls.cc 
b/gcc/analyzer/region-model-impl-calls.cc
index 8f4940a4d55..52c4205cbeb 100644
--- a/gcc/analyzer/region-model-impl-calls.cc
+++ b/gcc/analyzer/region-model-impl-calls.cc
@@ -563,6 +563,76 @@ region_model::impl_call_memset (const call_details )
   fill_region (sized_dest_reg, fill_value_u8);
 }
 
+/* Handle the on_call_post part of "pipe".  */
+
+void
+region_model::impl_call_pipe (const call_details )
+{
+  class failure : public failed_call_info
+  {
+  public:
+failure (const call_details ) : failed_call_info (cd) {}
+
+bool update_model (region_model *model,
+

Re: Adding a new thread model to GCC

2022-10-24 Thread Jacek Caban via Gcc-patches

On 10/24/22 05:40, LIU Hao via Gcc-patches wrote:

在 2022/10/21 20:34, i.nix...@autistici.org 写道:

got it...
anyway it seems logical to me the way I proposed :)

Below is a message forwarded from mingw-w64-public, elaborating the
necessity of a new thread model.

As there are objections from other mingw-w64 developers, I am putting
those patches against mingw-w64-crt on hold for now. Despite that, all
threading facilities - mutexes, condition variables, once flags, etc.
- are still fully functional within the mcf thread model.

In addition, I will keep maintaining my personal builds (from GCC 12
release branch) with these patches at https://gcc-mcf.lhmouse.com/.

Forwarded Message
在 2022/10/23 18:06, Jacek Caban 写道:
>
> Please, let's not do that. It's possible to fix existing
implementations, we don't need to make

> things more complicated than they are.
>

Okay okay, I think I have to compose a thorough list of problems that
we are facing at the moment, and had better have a permalink to the
mailing list archive that I can reference elsewhere. I have been tired
of repeating the same grounds of arguments again and again:

1. In a DLL, destructors of static objects and callbacks that are
registered
with `atexit()`, are executed by `LdrShutdownProcess()`, after all
the other
thread have been terminated `ZwTerminateProcessO(NULL, status)`.
This means
that, if another thread has been terminated while holding a mutex,
the mutex
can never get unlocked. If a destructor attempts to lock the same
mutex,
deadlocks will occur. Destructors of executables do not suffer
from this

issue, because they are executed before `RtlExitUserProcess()`.

Standard behavior: Static destructors and exit callbacks should be
executed
while other threads are running. If another thread attempts to
access a
destroyed object, the behavior is undefined; the user is
responsible to

prevent this from happening, by joining or suspending it.

2. Following 1, in a DLL, static destructors and exit callbacks are still
invoked when `_Exit()` or `quick_exit()` is called.

Standard behavior: `_Exit()` should not perform any cleanup; not
even open
files are flushed. `quick_exit()` shall invoke all quick-exit
callbacks in

reverse order, then call `_Exit()`.

3. There is a use-after-free bug [1] about thread-local destructors. I
suspect

this is caused by emutls, because GCC uses `__cxa_thread_atexit()` to
register thread-local destructors, which could interleave with
`emutls_destroy()`.

Standard behavior: This is not allowed to happen. mcfgthread
solves this

issue by running thread-local destructors and thread-specific key
destructors as two separate passes [3].

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80816
[2]
https://github.com/gcc-mirror/gcc/blob/f84e4fb44aa26b71fbc64e0532fd24d96e5caa3f/libgcc/emutls.c#L96
[3]
https://github.com/lhmouse/mcfgthread/blob/63e034d375caf585e2921cd3455f1048feb2172d/src/xglobals.c#L249

4. In the win32 thread model, thread-specific key destructors are
called at

process exit [4], after static destructors.

Standard behavior: They shall be called only when a thread exits,
and the
associated thread-specific values are not a null pointer. They
shall not be

called when a program terminates; instead, users are responsible for
deallocating such resources before calling `exit()`. This
requirement is
missing in POSIX, but formally specified by ISO/IEC 9899:2017, as
the 4th

paragraph in '7.26.6.1 The tss_create function'.

[4]
https://github.com/mingw-w64/mingw-w64/blob/d0a034a04d312434b842c4869a8a900568d8db98/mingw-w64-crt/crt/tlsthrd.c#L134

Those 4 points describes problems that you solve in the new threading
model, but there is no reason they can't be fixed for existing threading
models. In fact, ideally they would be fixed for all threading models.
Except now we need to worry about one more threading model, meaning that
future bugs will be even harder to fix.

5. Wait operations, of timed mutexes and condition variables, should take
absolute time points as `struct timespec`.

Standard behavior: Both POSIX and ISO C specifies them as such,
while all
Windows APIs take relative durations as a 32-bit integer of
milliseconds,

which can also easily get overflown.

This also may be supported in existing threading models. Overflow is
trivial to fix by waiting in a loop. (There are other reasons why OS
support for absolute timeout is slightly better, but the price of this
design decision makes it questionable. I plan to elaborate more on that
on mingw ML, but I need to find time to do a bit of research first).

Jacek

[committed] wwwdocs: search: Remove trailing slashes on tags

2022-10-24 Thread Gerald Pfeifer

HTML 5 recommends against those.

Pushed.
---
 htdocs/search.html | 46 +++---
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/htdocs/search.html b/htdocs/search.html
index 83f26fb5..b3cfa3ce 100644
--- a/htdocs/search.html
+++ b/htdocs/search.html
@@ -35,8 +35,8 @@ Sort by: 
 Search-syntax
 
 
-
-
+
+
 
 
 
 Web page areas:
 
+value="g/a|g/b|g/c|g/d|g/e|g/f|g/g|g/h|g/i|g/j|g/l|g/mi|g/n|g/p|g/r|g/s|g/ti|g/th|g/w">
 "Main" GCC
-
+
 
 Documentation
 
@@ -89,29 +89,29 @@ Mailing lists, including those for the old egcs project.  
Ticking a box is like
 
 
 
-
+
  gcc
 
 
-
+
  ...-bugs
 
 
-
+
  ...-patches
 
 
-
+
  ...-help
 
 
 
-
+
  ...-cvs
 
 
-
+
  ...-testresults
 
 
@@ -1205,23 +1205,23 @@ Mailing lists, including those for the old egcs 
project.  Ticking a box is like
 
 
 
-
+
  ...-regression
 
 
-
+
  ...-prs
 
 
-
+
  ...-announce
 
 
-
+
  ...-cvs-wwwdocs
 
 
-
+
  fortran
 
 
@@ -1605,15 +1605,15 @@ Mailing lists, including those for the old egcs 
project.  Ticking a box is like
 
 
 
-
+
  libstdc++
 
 
-
+
  ...-cvs
 
 
-
+
  ...-prs
 
 
@@ -1907,19 +1907,19 @@ Mailing lists, including those for the old egcs 
project.  Ticking a box is like
 
 
 
-
+
  java
 
 
-
+
  ...-patches
 
 
-
+
  ...-prs
 
 
-
+
  ...-announce
 
 
@@ -2235,7 +2235,7 @@ Mailing lists, including those for the old egcs project.  
Ticking a box is like
 
 
 
-  
+
 
 
 
-- 
2.38.0

Re: [PATCH] c, c++: Fix up excess precision handling of scalar_to_vector conversion [PR107358]


On 10/24/22 03:19, Jakub Jelinek wrote:

Hi!

As mentioned earlier in the C++ excess precision support mail, the following
testcase is broken with excess precision both in C and C++ (though just in C++
it was triggered in real-world code).
scalar_to_vector is called in both FEs after the excess precision promotions
(or stripping of EXCESS_PRECISION_EXPR), so we can then get invalid
diagnostics that say float vector + float involves truncation (on ia32
from long double to float).

The following patch fixes that by calling scalar_to_vector on the operands
before the excess precision promotions, let scalar_to_vector just do the
diagnostics (it does e.g. fold_for_warn so it will fold
EXCESS_PRECISION_EXPR around REAL_CST to constants etc.) but will then
do the actual conversions using the excess precision promoted operands
(so say if we have vector double + (float + float) we don't actually do
vector double + (float) ((long double) float + (long double) float)
but
vector double + (double) ((long double) float + (long double) float)

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK on Wednesday if Joseph doesn't object.


2022-10-24  Jakub Jelinek  

PR c++/107358
c/
* c-typeck.cc (build_binary_op): Pass operands before excess precision
promotions to scalar_to_vector call.
cp/
* typeck.cc (cp_build_binary_op): Pass operands before excess precision
promotions to scalar_to_vector call.
testsuite/
* c-c++-common/pr107358.c: New test.
* g++.dg/cpp1y/pr68180.C: Remove -fexcess-precision=fast from
dg-options.

--- gcc/c/c-typeck.cc.jj2022-10-14 09:35:56.10261 +0200
+++ gcc/c/c-typeck.cc   2022-10-22 17:54:24.378839301 +0200
@@ -11995,8 +11995,8 @@ build_binary_op (location_t location, en
if ((gnu_vector_type_p (type0) && code1 != VECTOR_TYPE)
|| (gnu_vector_type_p (type1) && code0 != VECTOR_TYPE))
  {
-  enum stv_conv convert_flag = scalar_to_vector (location, code, op0, op1,
-true);
+  enum stv_conv convert_flag = scalar_to_vector (location, code, orig_op0,
+orig_op1, true);
  
switch (convert_flag)

{
--- gcc/cp/typeck.cc.jj 2022-10-20 13:54:22.535670240 +0200
+++ gcc/cp/typeck.cc2022-10-22 17:56:58.589715301 +0200
@@ -5191,6 +5191,8 @@ cp_build_binary_op (const op_location_t
  
orig_type0 = type0 = TREE_TYPE (op0);

orig_type1 = type1 = TREE_TYPE (op1);
+  tree non_ep_op0 = op0;
+  tree non_ep_op1 = op1;
  
/* The expression codes of the data types of the arguments tell us

   whether the arguments are integers, floating, pointers, etc.  */
@@ -5303,8 +5305,9 @@ cp_build_binary_op (const op_location_t
if ((gnu_vector_type_p (type0) && code1 != VECTOR_TYPE)
|| (gnu_vector_type_p (type1) && code0 != VECTOR_TYPE))
  {
-  enum stv_conv convert_flag = scalar_to_vector (location, code, op0, op1,
-complain & tf_error);
+  enum stv_conv convert_flag
+   = scalar_to_vector (location, code, non_ep_op0, non_ep_op1,
+   complain & tf_error);
  
switch (convert_flag)

  {
--- gcc/testsuite/c-c++-common/pr107358.c.jj2022-10-22 18:46:59.390375310 
+0200
+++ gcc/testsuite/c-c++-common/pr107358.c   2022-10-22 18:01:52.973660719 
+0200
@@ -0,0 +1,30 @@
+/* PR c++/107358 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fexcess-precision=standard" } */
+
+typedef float __attribute__((vector_size (4 * sizeof (float A;
+typedef double __attribute__((vector_size (2 * sizeof (double B;
+
+void
+foo (A *x)
+{
+  *x = *x - 124.225514990f;
+}
+
+void
+bar (A *x, float y)
+{
+  *x = *x - y;
+}
+
+void
+baz (B *x)
+{
+  *x = *x + 124.225514990f;
+}
+
+void
+qux (B *x, double y)
+{
+  *x = *x + y;
+}
--- gcc/testsuite/g++.dg/cpp1y/pr68180.C.jj 2022-10-14 09:28:28.339159477 
+0200
+++ gcc/testsuite/g++.dg/cpp1y/pr68180.C2022-10-22 17:59:07.012946513 
+0200
@@ -1,6 +1,6 @@
  // PR c++/68180
  // { dg-do compile { target c++14 } }
-// { dg-additional-options "-Wno-psabi -fexcess-precision=fast" }
+// { dg-additional-options "-Wno-psabi" }
  
  typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t;

  constexpr float32x4_t fill(float x) {

Jakub

[PATCH v2] Always use TYPE_MODE instead of DECL_MODE for vector field

2022-10-24 Thread H.J. Lu via Gcc-patches

On Mon, Oct 24, 2022 at 12:12 AM Richard Biener
 wrote:
>
> On Fri, Oct 21, 2022 at 6:18 PM H.J. Lu  wrote:
> >
> > On Fri, Oct 21, 2022 at 2:33 AM Richard Biener
> >  wrote:
> > >
> > > On Thu, Oct 20, 2022 at 6:58 PM H.J. Lu via Gcc-patches
> > >  wrote:
> > > >
> > > > commit e034c5c895722e0092d2239cd8c2991db77d6d39
> > > > Author: Jakub Jelinek 
> > > > Date:   Sat Dec 2 08:54:47 2017 +0100
> > > >
> > > > PR target/78643
> > > > PR target/80583
> > > > * expr.c (get_inner_reference): If DECL_MODE of a non-bitfield
> > > > is BLKmode for vector field with vector raw mode, use TYPE_MODE
> > > > instead of DECL_MODE.
> > > >
> > > > fixed the case where DECL_MODE of a vector field is BLKmode and its
> > > > TYPE_MODE is a vector mode because of target attribute.  Remove the
> > > > BLKmode check for the case where DECL_MODE of a vector field is a vector
> > > > mode and its TYPE_MODE is BLKmode because of target attribute.
> > > >
> > > > gcc/
> > > >
> > > > PR target/107304
> > > > * expr.c (get_inner_reference): Always use TYPE_MODE for vector
> > > > field with vector raw mode.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > PR target/107304
> > > > * gcc.target/i386/pr107304.c: New test.
> > > > ---
> > > >  gcc/expr.cc  |  3 +-
> > > >  gcc/testsuite/gcc.target/i386/pr107304.c | 39 
> > > >  2 files changed, 40 insertions(+), 2 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr107304.c
> > > >
> > > > diff --git a/gcc/expr.cc b/gcc/expr.cc
> > > > index efe387e6173..9145193c2c1 100644
> > > > --- a/gcc/expr.cc
> > > > +++ b/gcc/expr.cc
> > > > @@ -7905,8 +7905,7 @@ get_inner_reference (tree exp, poly_int64_pod 
> > > > *pbitsize,
> > > >   /* For vector fields re-check the target flags, as DECL_MODE
> > > >  could have been set with different target flags than
> > > >  the current function has.  */
> > > > - if (mode == BLKmode
> > > > - && VECTOR_TYPE_P (TREE_TYPE (field))
> > > > + if (VECTOR_TYPE_P (TREE_TYPE (field))
> > > >   && VECTOR_MODE_P (TYPE_MODE_RAW (TREE_TYPE (field
> > >
> > > Isn't the check on TYPE_MODE_RAW also wrong then?  Btw, the mode could
> >
> > TYPE_MODE_RAW is always set to a vector mode for a vector type:
> >
> >/* Find an appropriate mode for the vector type.  */
> > if (TYPE_MODE (type) == VOIDmode)
> >   SET_TYPE_MODE (type,
> >  mode_for_vector (SCALAR_TYPE_MODE (innertype),
> >   nunits).else_blk ());
>
> But mode_for_vector can return a MODE_INT!

You are right.

>   /* For integers, try mapping it to a same-sized scalar mode.  */
>   if (GET_MODE_CLASS (innermode) == MODE_INT)
> {
>   poly_uint64 nbits = nunits * GET_MODE_BITSIZE (innermode);
>   if (int_mode_for_size (nbits, 0).exists ()
>   && have_regs_of_mode[mode])
> return mode;
>
> > But TYPE_MODE returns BLKmode if the vector mode is unsupported.
> >
> > > also be an integer mode.
> >
> > For a vector field, mode is either BLK mode or the vector mode.  Jakub,
> > can you comment on it?
>
> I think that for
>
> typedef int v2si __attribute__((vector_size(8)));
>
> struct X { int i; v2si j; };
>
> v2si should get DImode with -mno-sse?
>

Currently GCC generates

(insn 31 32 33 (set (subreg:DI (reg:V2SI 105) 0)
(reg:DI 84 [ _3 ])) "y2.c":12:11 -1
 (nil))

With my patch, v2si gets DImode directly without SUBREG.

Here is the v2 patch with the update commit message:

Remove the BLKmode check for the case where DECL_MODE
of a vector field is a vector mode and its TYPE_MODE isn't a
vector mode because of target attribute.

OK for master?

Thanks.

-- 
H.J.
From 25995549d541ac30f17d28d51d53483298fa74e2 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 19 Oct 2022 12:53:35 -0700
Subject: [PATCH v2] Always use TYPE_MODE instead of DECL_MODE for vector field

commit e034c5c895722e0092d2239cd8c2991db77d6d39
Author: Jakub Jelinek 
Date:   Sat Dec 2 08:54:47 2017 +0100

	PR target/78643
	PR target/80583
	* expr.c (get_inner_reference): If DECL_MODE of a non-bitfield
	is BLKmode for vector field with vector raw mode, use TYPE_MODE
	instead of DECL_MODE.

fixed the case where DECL_MODE of a vector field is BLKmode and its
TYPE_MODE is a vector mode because of target attribute.  Remove the
BLKmode check for the case where DECL_MODE of a vector field is a vector
mode and its TYPE_MODE isn't a vector mode because of target attribute.

gcc/

	PR target/107304
	* expr.c (get_inner_reference): Always use TYPE_MODE for vector
	field with vector raw mode.

gcc/testsuite/

	PR target/107304
	* gcc.target/i386/pr107304.c: New test.
---
 gcc/expr.cc  |  3 +-
 gcc/testsuite/gcc.target/i386/pr107304.c | 39 
 2 files changed, 40

Re: [PATCH] c++: remove use_default_args parm of coerce_template_parms


On 10/24/22 12:27, Patrick Palka wrote:

It looks like the parameter use_default_args introduced in r110693 is
effectively unused ever since r7-5536-g3c75aaa3d884ef removed the last
(and probably only) 'coerce_template_parms (..., true, false)' call, so
this patch gets rid of it.

In passing, I noticed we currently define wrapper overloads of
coerce_template_parms that effectively default complain and
require_all_args, but it seems cleaner to specify default arguments
for the main overload instead.  And I suppose we should also give
c_innermost_t_p the same defaults.

However, I'm not sure about defaulting complain to tf_none since it's
contrary to how we default every other complain parameter to either
tf_error or tf_warning_or_error (for sake of non-SFINAE callers), so
this tf_none default seems surprising.  And since just three callers
(all in constraint.cc) use this complain=tf_none default, and just one
caller (determine_specialization) passes complain=tf_warning_or_error,
it seems best to just not default complain here.


Agreed, it's probably better in general not to default complain, that's 
a source of SFINAE bugs.



Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?


OK.


gcc/cp/ChangeLog:

* constraint.cc (resolve_function_concept_overload): Explicitly
pass tf_none to coerce_template_parms.
(resolve_concept_check): Likewise.
(normalize_concept_check): Likewise.
* cp-tree.h (coerce_template_parms): Declare the main overload
and default its last parameter to true.  Remove other overloads.
* pt.cc (determine_specialization): Adjust calls to
coerce_template_parms and coerce_innermost_template_parms after
removing their last parameter.
(coerce_template_args_for_ttp): Likewise.
(coerce_ttp_args_for_tta): Likewise.
(coerce_template_template_parms): Likewise.
(coerce_template_parms): Remove use_default_args parameter.
Remove wrapper overloads.  No longer static.
(coerce_innermost_template_parms): Remove use_default_args
parameter.
(lookup_template_class): As with determine_specialization.
(finish_template_variable): Likewise.
(tsubst_decl): Likewise.
(instantiate_alias_template): Likewise.
(fn_type_unification): Likewise.
(resolve_overloaded_unification): Likewise.
(resolve_nondeduced_context): Likewise.
(get_partial_spec_bindings): Likewise.
---
  gcc/cp/constraint.cc |   6 +--
  gcc/cp/cp-tree.h |   4 +-
  gcc/cp/pt.cc | 107 +++
  3 files changed, 32 insertions(+), 85 deletions(-)

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 74898ca1a23..5e6a3bcf059 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -323,7 +323,7 @@ resolve_function_concept_overload (tree ovl, tree args)
/* Remember the candidate if we can deduce a substitution.  */
++processing_template_decl;
tree parms = TREE_VALUE (DECL_TEMPLATE_PARMS (tmpl));
-  if (tree subst = coerce_template_parms (parms, args, tmpl))
+  if (tree subst = coerce_template_parms (parms, args, tmpl, tf_none))
  {
if (subst == error_mark_node)
  ++nerrs;
@@ -404,7 +404,7 @@ resolve_concept_check (tree check)
tree args = TREE_OPERAND (id, 1);
tree parms = INNERMOST_TEMPLATE_PARMS (DECL_TEMPLATE_PARMS (tmpl));
++processing_template_decl;
-  tree result = coerce_template_parms (parms, args, tmpl);
+  tree result = coerce_template_parms (parms, args, tmpl, tf_none);
--processing_template_decl;
if (result == error_mark_node)
  return error_mark_node;
@@ -726,7 +726,7 @@ normalize_concept_check (tree check, tree args, norm_info 
info)
/* Turn on template processing; coercing non-type template arguments
   will automatically assume they're non-dependent.  */
++processing_template_decl;
-  tree subst = coerce_template_parms (parms, targs, tmpl);
+  tree subst = coerce_template_parms (parms, targs, tmpl, tf_none);
--processing_template_decl;
if (subst == error_mark_node)
  return error_mark_node;
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 60a25101049..b210943ab25 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7459,8 +7459,8 @@ extern tree get_function_template_decl
(const_tree);
  extern tree resolve_nondeduced_context(tree, tsubst_flags_t);
  extern tree resolve_nondeduced_context_or_error   (tree, tsubst_flags_t);
  extern hashval_t iterative_hash_template_arg  (tree arg, hashval_t val);
-extern tree coerce_template_parms   (tree, tree, tree);
-extern tree coerce_template_parms   (tree, tree, tree, 
tsubst_flags_t);
+extern tree coerce_template_parms  (tree, tree, tree, 
tsubst_flags_t,
+bool = true);
  extern tree

libgomp/nvptx: Prepare for reverse-offload callback handling, resolve spurious SIGSEGVs (was: [Patch][v5] libgomp/nvptx: Prepare for reverse-offload callback handling)

2022-10-24 Thread Thomas Schwinge

Hi!

On 2022-10-24T21:11:04+0200, I wrote:
> On 2022-10-24T21:05:46+0200, I wrote:
>> On 2022-10-24T16:07:25+0200, Jakub Jelinek via Gcc-patches 
>>  wrote:
>>> On Wed, Oct 12, 2022 at 10:55:26AM +0200, Tobias Burnus wrote:
 libgomp/nvptx: Prepare for reverse-offload callback handling
>>
>>> Ok, thanks.
>>
>> Per commit r13-3460-g131d18e928a3ea1ab2d3bf61aa92d68a8a254609
>> "libgomp/nvptx: Prepare for reverse-offload callback handling",
>> I'm seeing a lot of libgomp execution test regressions.  Random
>> example, 'libgomp.c-c++-common/error-1.c':
>>
>> [...]
>>   GOMP_OFFLOAD_run: kernel main$_omp_fn$0: launch [(teams: 1), 1, 1] 
>> [(lanes: 32), (threads: 8), 1]
>>
>> Thread 1 "a.out" received signal SIGSEGV, Segmentation fault.
>> 0x7793b87d in GOMP_OFFLOAD_run (ord=, 
>> tgt_fn=, tgt_vars=, args=) at 
>> [...]/source-gcc/libgomp/plugin/plugin-nvptx.c:2127
>> 2127if (__atomic_load_n (_dev->rev_data->fn, 
>> __ATOMIC_ACQUIRE) != 0)
>> (gdb) print ptx_dev
>> $1 = (struct ptx_device *) 0x6a55a0
>> (gdb) print ptx_dev->rev_data
>> $2 = (struct rev_offload *) 0x
>> (gdb) print ptx_dev->rev_data->fn
>> Cannot access memory at address 0x
>>
>> Why is it even taking this 'if (reverse_offload)' code path, which isn't
>> applicable to this test case (as far as I understand)?  (Well, the answer
>> is 'bool reverse_offload = ptx_dev->rev_data != NULL;', but why is that?)
>
> Well.
>
> --- a/libgomp/plugin/plugin-nvptx.c
> +++ b/libgomp/plugin/plugin-nvptx.c
>
> @@ -329,6 +332,7 @@ struct ptx_device
>pthread_mutex_t lock;
>  } omp_stacks;
>
> +  struct rev_offload *rev_data;
>struct ptx_device *next;
>  };
>
> ... but as far as I can tell, this is never initialized in
> 'nvptx_open_device', which does 'ptx_dev = GOMP_PLUGIN_malloc ([...]);'.
> Would the following be the correct fix (currently testing)?
>
> --- libgomp/plugin/plugin-nvptx.c
> +++ libgomp/plugin/plugin-nvptx.c
> @@ -546,6 +546,8 @@ nvptx_open_device (int n)
>ptx_dev->omp_stacks.size = 0;
>pthread_mutex_init (_dev->omp_stacks.lock, NULL);
>
> +  ptx_dev->rev_data = NULL;
> +
>return ptx_dev;
>  }

That did clean up libgomp execution test regressions; pushed to
master branch commit 205538832b7033699047900cf25928f5920d8b93
"libgomp/nvptx: Prepare for reverse-offload callback handling, resolve spurious 
SIGSEGVs",
see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 205538832b7033699047900cf25928f5920d8b93 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Mon, 24 Oct 2022 21:11:47 +0200
Subject: [PATCH] libgomp/nvptx: Prepare for reverse-offload callback handling,
 resolve spurious SIGSEGVs

Per commit r13-3460-g131d18e928a3ea1ab2d3bf61aa92d68a8a254609
"libgomp/nvptx: Prepare for reverse-offload callback handling",
I'm seeing a lot of libgomp execution test regressions.  Random
example, 'libgomp.c-c++-common/error-1.c':

[...]
  GOMP_OFFLOAD_run: kernel main$_omp_fn$0: launch [(teams: 1), 1, 1] [(lanes: 32), (threads: 8), 1]

Thread 1 "a.out" received signal SIGSEGV, Segmentation fault.
0x7793b87d in GOMP_OFFLOAD_run (ord=, tgt_fn=, tgt_vars=, args=) at [...]/source-gcc/libgomp/plugin/plugin-nvptx.c:2127
2127if (__atomic_load_n (_dev->rev_data->fn, __ATOMIC_ACQUIRE) != 0)
(gdb) print ptx_dev
$1 = (struct ptx_device *) 0x6a55a0
(gdb) print ptx_dev->rev_data
$2 = (struct rev_offload *) 0x
(gdb) print ptx_dev->rev_data->fn
Cannot access memory at address 0x

	libgomp/
	* plugin/plugin-nvptx.c (nvptx_open_device): Initialize
	'ptx_dev->rev_data'.
---
 libgomp/plugin/plugin-nvptx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index ad057edabec..0768fca350b 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -546,6 +546,8 @@ nvptx_open_device (int n)
   ptx_dev->omp_stacks.size = 0;
   pthread_mutex_init (_dev->omp_stacks.lock, NULL);
 
+  ptx_dev->rev_data = NULL;
+
   return ptx_dev;
 }
 
-- 
2.35.1

Re: [Patch][v5] libgomp/nvptx: Prepare for reverse-offload callback handling

2022-10-24 Thread Tobias Burnus


Hi Tobias!

On 24.10.22 21:11, Thomas Schwinge wrote:

On 2022-10-24T21:05:46+0200, I wrote:

On 2022-10-24T16:07:25+0200, Jakub Jelinek via Gcc-patches 
 wrote:

On Wed, Oct 12, 2022 at 10:55:26AM +0200, Tobias Burnus wrote:

libgomp/nvptx: Prepare for reverse-offload callback handling

Well.
 +  struct rev_offload *rev_data;
... but as far as I can tell, this is never initialized in
'nvptx_open_device', which does 'ptx_dev = GOMP_PLUGIN_malloc ([...]);'.
Would the following be the correct fix (currently testing)?

 --- libgomp/plugin/plugin-nvptx.c
 +++ libgomp/plugin/plugin-nvptx.c
 @@ -546,6 +546,8 @@ nvptx_open_device (int n)
ptx_dev->omp_stacks.size = 0;
pthread_mutex_init (_dev->omp_stacks.lock, NULL);

 +  ptx_dev->rev_data = NULL;
 +
return ptx_dev;
  }


LGTM and I think it is obvious – albeit I am not sure why it did not
fail when testing it here.

Thanks,

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: [Patch][v5] libgomp/nvptx: Prepare for reverse-offload callback handling

2022-10-24 Thread Thomas Schwinge

Hi Tobias!

On 2022-10-24T21:05:46+0200, I wrote:
> On 2022-10-24T16:07:25+0200, Jakub Jelinek via Gcc-patches 
>  wrote:
>> On Wed, Oct 12, 2022 at 10:55:26AM +0200, Tobias Burnus wrote:
>>> libgomp/nvptx: Prepare for reverse-offload callback handling
>
>> Ok, thanks.
>
> Per commit r13-3460-g131d18e928a3ea1ab2d3bf61aa92d68a8a254609
> "libgomp/nvptx: Prepare for reverse-offload callback handling",
> I'm seeing a lot of libgomp execution test regressions.  Random
> example, 'libgomp.c-c++-common/error-1.c':
>
> [...]
>   GOMP_OFFLOAD_run: kernel main$_omp_fn$0: launch [(teams: 1), 1, 1] 
> [(lanes: 32), (threads: 8), 1]
>
> Thread 1 "a.out" received signal SIGSEGV, Segmentation fault.
> 0x7793b87d in GOMP_OFFLOAD_run (ord=, 
> tgt_fn=, tgt_vars=, args=) at 
> [...]/source-gcc/libgomp/plugin/plugin-nvptx.c:2127
> 2127if (__atomic_load_n (_dev->rev_data->fn, 
> __ATOMIC_ACQUIRE) != 0)
> (gdb) print ptx_dev
> $1 = (struct ptx_device *) 0x6a55a0
> (gdb) print ptx_dev->rev_data
> $2 = (struct rev_offload *) 0x
> (gdb) print ptx_dev->rev_data->fn
> Cannot access memory at address 0x
>
> Why is it even taking this 'if (reverse_offload)' code path, which isn't
> applicable to this test case (as far as I understand)?  (Well, the answer
> is 'bool reverse_offload = ptx_dev->rev_data != NULL;', but why is that?)

Well.

--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c

@@ -329,6 +332,7 @@ struct ptx_device
   pthread_mutex_t lock;
 } omp_stacks;

+  struct rev_offload *rev_data;
   struct ptx_device *next;
 };

... but as far as I can tell, this is never initialized in
'nvptx_open_device', which does 'ptx_dev = GOMP_PLUGIN_malloc ([...]);'.
Would the following be the correct fix (currently testing)?

--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -546,6 +546,8 @@ nvptx_open_device (int n)
   ptx_dev->omp_stacks.size = 0;
   pthread_mutex_init (_dev->omp_stacks.lock, NULL);

+  ptx_dev->rev_data = NULL;
+
   return ptx_dev;
 }



Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: [Patch][v5] libgomp/nvptx: Prepare for reverse-offload callback handling

2022-10-24 Thread Thomas Schwinge

Hi Tobias!

On 2022-10-24T16:07:25+0200, Jakub Jelinek via Gcc-patches 
 wrote:
> On Wed, Oct 12, 2022 at 10:55:26AM +0200, Tobias Burnus wrote:
>> libgomp/nvptx: Prepare for reverse-offload callback handling

> Ok, thanks.

Per commit r13-3460-g131d18e928a3ea1ab2d3bf61aa92d68a8a254609
"libgomp/nvptx: Prepare for reverse-offload callback handling",
I'm seeing a lot of libgomp execution test regressions.  Random
example, 'libgomp.c-c++-common/error-1.c':

[...]
  GOMP_OFFLOAD_run: kernel main$_omp_fn$0: launch [(teams: 1), 1, 1] 
[(lanes: 32), (threads: 8), 1]

Thread 1 "a.out" received signal SIGSEGV, Segmentation fault.
0x7793b87d in GOMP_OFFLOAD_run (ord=, 
tgt_fn=, tgt_vars=, args=) at 
[...]/source-gcc/libgomp/plugin/plugin-nvptx.c:2127
2127if (__atomic_load_n (_dev->rev_data->fn, 
__ATOMIC_ACQUIRE) != 0)
(gdb) print ptx_dev
$1 = (struct ptx_device *) 0x6a55a0
(gdb) print ptx_dev->rev_data
$2 = (struct rev_offload *) 0x
(gdb) print ptx_dev->rev_data->fn
Cannot access memory at address 0x

Why is it even taking this 'if (reverse_offload)' code path, which isn't
applicable to this test case (as far as I understand)?  (Well, the answer
is 'bool reverse_offload = ptx_dev->rev_data != NULL;', but why is that?)

Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: [PATCH] [X86_64]: Enable support for next generation AMD Zen4 CPU

2022-10-24 Thread Alexander Monakov

On Mon, 24 Oct 2022, Jan Hubička wrote:

> > By the way, it appears pre-existing znver[123] models are also causing
> > some kind
> > of combinatorial blow-up, but before znver4 it was not a blocking issue:
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87832
> 
> 
> It is really easy to make DFA size to grow if there are possibly many
> instructions in the pipeline (as every possible state of a modelled pipeline
> needs to be a new state of the automaton). This is essentially
> depth_of_pipeline * number_of_units with additional states to repesent
> special instructions and this naturally keeps growing.
> 
> We could try to break the FP automata into multiple ones, but there are
> instructions that can go down any pipe which makes this hard
> or we can try toreduce number of different reservation types (possibly by
> breaking the automaton to znver1-3 and 4 or so).
> With znver2 model I experimented with broken up version and common one and
> ended up with smaller binary for combined one.

Looking at znver1.md again, I think the problem is caused by incorrect modeling
of division instructions: they have descriptions like

(define_insn_reservation "znver1_idiv_DI" 41
(and (eq_attr "cpu" "znver1,znver2")
 (and (eq_attr "type" "idiv")
  (and (eq_attr "mode" "DI")
   (eq_attr "memory" "none"
"znver1-double,znver1-ieu2*41")

which says that DImode idiv has latency 41 (which is correct) and that it
occupies 2nd integer execution unit for 41 consecutive cycles, but that is
not correct:

1) the division instruction is partially pipelined, and has throughput 1/14

2) for the most part it occupies a separate division unit, not the general
arithmetic unit.

(incidentally, I think the blowup is caused by interaction of such super-long
41-cycle paths with the rest of reservations)

I think we should fix this by modeling the separate division unit properly, and
fixing reservations to use the measured reciprocal throughput of those
instructions (available from uops.info). The following patch does that for
integer divisions and completely eliminates the integer part of the problem; the
issue with floating-point divisions remains.

Top 5 znver table sizes, before:

68692 r znver1_ieu_check
68692 r znver1_ieu_transitions
99792 r znver1_ieu_min_issue_delay
428108 r znver1_fp_min_issue_delay
856216 r znver1_fp_transitions

After:

1454 r znver1_ieu_translate
1454 r znver1_translate
2304 r znver1_ieu_transitions
428108 r znver1_fp_min_issue_delay
856216 r znver1_fp_transitions

Will you help getting this reviewed for trunk?



diff --git a/gcc/config/i386/znver1.md b/gcc/config/i386/znver1.md
index 9c25b4e27..39b59343d 100644
--- a/gcc/config/i386/znver1.md
+++ b/gcc/config/i386/znver1.md
@@ -24,7 +24,7 @@
 ;; AMD znver1, znver2 and znver3 Scheduling
 ;; Modeling automatons for zen decoders, integer execution pipes,
 ;; AGU pipes and floating point execution units.
-(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu")
+(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu, znver1_idiv")
 
 ;; Decoders unit has 4 decoders and all of them can decode fast path
 ;; and vector type instructions.
@@ -50,6 +50,7 @@
 (define_cpu_unit "znver1-ieu1" "znver1_ieu")
 (define_cpu_unit "znver1-ieu2" "znver1_ieu")
 (define_cpu_unit "znver1-ieu3" "znver1_ieu")
+(define_cpu_unit "znver1-idiv" "znver1_idiv")
 (define_reservation "znver1-ieu" 
"znver1-ieu0|znver1-ieu1|znver1-ieu2|znver1-ieu3")
 
 ;; 2 AGU pipes in znver1 and 3 AGU pipes in znver2 and znver3
@@ -176,28 +177,28 @@
  (and (eq_attr "type" "idiv")
   (and (eq_attr "mode" "DI")
(eq_attr "memory" "none"
-"znver1-double,znver1-ieu2*41")
+"znver1-double,znver1-idiv*14")
 
 (define_insn_reservation "znver1_idiv_SI" 25
 (and (eq_attr "cpu" "znver1,znver2")
  (and (eq_attr "type" "idiv")
   (and (eq_attr "mode" "SI")
(eq_attr "memory" "none"
-"znver1-double,znver1-ieu2*25")
+"znver1-double,znver1-idiv*14")
 
 (define_insn_reservation "znver1_idiv_HI" 17
 (and (eq_attr "cpu" "znver1,znver2")
  (and (eq_attr "type" "idiv")
   (and (eq_attr "mode" "HI")
(eq_attr "memory" "none"
-"znver1-double,znver1-ieu2*17")
+"znver1-double,znver1-idiv*14")
 
 (define_insn_reservation "znver1_idiv_QI" 12
 (and (eq_attr "cpu" "znver1,znver2")
  (and (eq_attr "type" "idiv")

[PATCH (pushed)] x86: fix VENDOR_MAX enum value

Hi.

The following patch fixes detection of VENDOR_CENTAUR CPU vendors when 
-march=native is used.
Problem what that BUILTIN_VENDOR_MAX dropped the current enum value to 
VENDOR_OTHER (4)
and thus VENDOR_MAX was 5 and not 7.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
I'm going to push it as obvious.

PR target/107364

gcc/ChangeLog:

* common/config/i386/i386-cpuinfo.h (enum processor_vendor):
  Reorder enum values as BUILTIN_VENDOR_MAX should not point
  in the middle of the valid enum values.

Martin

---
 gcc/common/config/i386/i386-cpuinfo.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/common/config/i386/i386-cpuinfo.h 
b/gcc/common/config/i386/i386-cpuinfo.h
index 9893fc422bc..586a1a2d0f6 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -34,8 +34,10 @@ enum processor_vendor
   VENDOR_CENTAUR,
   VENDOR_CYRIX,
   VENDOR_NSC,
+
+  /* Maximum values must be at the end of this enum.  */
+  VENDOR_MAX,
   BUILTIN_VENDOR_MAX = VENDOR_OTHER,
-  VENDOR_MAX
 };
 
 /* Any new types or subtypes have to be inserted at the end. */
-- 
2.38.0

Re: [OG12 commit] vect: WORKAROUND vectorizer bug




> Am 24.10.2022 um 18:51 schrieb Andrew Stubbs :
> 
> I've committed this to the OG12 branch to remove some test failures. We 
> probably ought to have something on mainline also, but a proper fix would be 
> better.
> 
> Without this. the libgomp.oacc-c-c++-common/private-variables.c testcase 
> fails to compile due to an ICE.  The OpenACC worker broadcasting code is 
> creating SLP optimizable loads and stores in amdgcn address-space-4. 
> Previously this was "ok" as SLP didn't work with less that 64-lane vectors, 
> but the newly implemented smaller vectors are working as intended and 
> optimizing this.
> 
> Unfortunately the vectorizer is losing the address-space data from the 
> intermediate types, and it all falls apart during expand when it tries the 
> convert a 32-bit address into a 64-bit address and that's not something that 
> works. At first sight it looks like we could possibly make that work with 
> POINTERS_EXTEND_UNSIGNED, but that only changes the error message. 
> Fundamentally we need to make sure that various instances of "vectype" have 
> the correct address space, but my attempts to do so showed that that's a 
> larger task than I have time for right now.

Istr there were issues like this in the past that I fixed, so any testcase that 
exposes this with just a gcn cc1 would be nice to have.

Richard 

> 
> This patch simply prevents the vectorizer working in the case where it would 
> break. This should not be a regression because this code didn't vectorize at 
> all, previously.
> 
> Andrew
> <221024-workarround-vec-addrspace-bug.patch>

Re: [PATCH v2] c++: ICE with invalid structured bindings [PR107276]


On 10/24/22 13:13, Marek Polacek wrote:

On Mon, Oct 24, 2022 at 10:31:50AM -0400, Jason Merrill wrote:

On 10/21/22 19:29, Marek Polacek wrote:

This test ICEs in C++23 because we reach the new code in do_auto_deduction:

30468   if (cxx_dialect >= cxx23
30469   && context == adc_return_type
30470   && (!AUTO_IS_DECLTYPE (auto_node)
30471   || !unparenthesized_id_or_class_member_access_p (init))
30472   && (r = treat_lvalue_as_rvalue_p (maybe_undo_parenthesized_ref 
(init),
30473 /*return*/true)))

where 'init' is "VIEW_CONVERT_EXPR error (y)", and then the move
in treat_lvalue_as_rvalue_p returns error_mark_node whereupon
set_implicit_rvalue_p crashes.

I don't think such V_C_Es are useful so let's not create them.  But that
won't fix the ICE so I'm checking the return value of move.  A structured
bindings decl can have an error type, that is set in cp_finish_decomp:

   8908   TREE_TYPE (first) = error_mark_node;

therefore I think treat_lvalue_as_rvalue_p just needs to cope.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/107276

gcc/cp/ChangeLog:

* typeck.cc (treat_lvalue_as_rvalue_p): Check the return value of move.

gcc/ChangeLog:

* tree.cc (maybe_wrap_with_location): Don't create a location wrapper
when the type is erroneous.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/decomp4.C: New test.
---
   gcc/cp/typeck.cc | 7 ++-
   gcc/testsuite/g++.dg/cpp2a/decomp4.C | 8 
   gcc/tree.cc  | 3 ++-
   3 files changed, 16 insertions(+), 2 deletions(-)
   create mode 100644 gcc/testsuite/g++.dg/cpp2a/decomp4.C

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 16e7d85793d..5ca191759f6 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -10726,7 +10726,12 @@ treat_lvalue_as_rvalue_p (tree expr, bool return_p)
 if (DECL_CONTEXT (retval) != current_function_decl)
   return NULL_TREE;
 if (return_p)
-return set_implicit_rvalue_p (move (expr));
+{
+  expr = move (expr);
+  if (expr == error_mark_node)
+   return NULL_TREE;
+  return set_implicit_rvalue_p (expr);
+}
 /* if the operand of a throw-expression is a (possibly parenthesized)
id-expression that names an implicitly movable entity whose scope does 
not
diff --git a/gcc/testsuite/g++.dg/cpp2a/decomp4.C 
b/gcc/testsuite/g++.dg/cpp2a/decomp4.C
new file mode 100644
index 000..28b3f172b53
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/decomp4.C
@@ -0,0 +1,8 @@
+// PR c++/107276
+// { dg-do run { target c++20 } }
+
+auto f(auto x) {
+  auto [y] = x; // { dg-error "cannot decompose" }
+  return y;
+}
+int i = f(0);
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 81a6ceaf181..4e5b1df4d85 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -14352,7 +14352,8 @@ maybe_wrap_with_location (tree expr, location_t loc)
 /* For now, don't add wrappers to exceptional tree nodes, to minimize
any impact of the wrapper nodes.  */
-  if (EXCEPTIONAL_CLASS_P (expr))
+  if (EXCEPTIONAL_CLASS_P (expr)
+  || (TREE_TYPE (expr) && EXCEPTIONAL_CLASS_P (TREE_TYPE (expr


I think check error_operand_p instead; I don't think it makes sense to look
for other exceptional nodes in TREE_TYPE.


Makes sense.  I don't suppose you want to replace the whole condition with
error_operand_p, only what comes after ||.  Updated patch below, thanks.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


-- >8 --
This test ICEs in C++23 because we reach the new code in do_auto_deduction:

30468   if (cxx_dialect >= cxx23
30469   && context == adc_return_type
30470   && (!AUTO_IS_DECLTYPE (auto_node)
30471   || !unparenthesized_id_or_class_member_access_p (init))
30472   && (r = treat_lvalue_as_rvalue_p (maybe_undo_parenthesized_ref 
(init),
30473 /*return*/true)))

where 'init' is "VIEW_CONVERT_EXPR error (y)", and then the move
in treat_lvalue_as_rvalue_p returns error_mark_node whereupon
set_implicit_rvalue_p crashes.

I don't think such V_C_Es are useful so let's not create them.  But that
won't fix the ICE so I'm checking the return value of move.  A structured
bindings decl can have an error type, that is set in cp_finish_decomp:

  8908   TREE_TYPE (first) = error_mark_node;

therefore I think treat_lvalue_as_rvalue_p just needs to cope.

PR c++/107276

gcc/cp/ChangeLog:

* typeck.cc (treat_lvalue_as_rvalue_p): Check the return value of move.

gcc/ChangeLog:

* tree.cc (maybe_wrap_with_location): Don't create a location wrapper
when the type is erroneous.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/decomp4.C: New test.
---
  gcc/cp/typeck.cc | 7 ++-
  gcc/testsuite/g++.dg/cpp2a/decomp4.C | 8 
  gcc/tree.cc  | 2 +-
  3 files changed, 15

Re: [PATCH] c++: Implement -Wdangling-reference [PR106393]


On 10/21/22 19:28, Marek Polacek wrote:

This patch implements a new experimental warning (enabled by -Wextra) to
detect references bound to temporaries whose lifetime has ended.  The


Great!


primary motivation is the Note in
:

   Capturing the result of std::max by reference produces a dangling reference
   if one of the parameters is a temporary and that parameter is returned:

   int n = 1;
   const int& r = std::max(n-1, n+1); // r is dangling

That's because both temporaries for n-1 and n+1 are destroyed at the end
of the full expression.  With this warning enabled, you'll get:

g.C:3:12: warning: possibly dangling reference to a temporary 
[-Wdangling-reference]
 3 | const int& r = std::max(n-1, n+1);
   |^
g.C:3:24: note: the temporary was destroyed at the end of the full expression 
'std::max((n - 1), (n + 1))'
 3 | const int& r = std::max(n-1, n+1);
   |^~

The warning works by checking if a reference is initialized with a function
that returns a reference, and at least one parameter of the function is
a reference that is bound to a temporary.  It assumes that such a function
actually returns one of its arguments!  (I added code to check_return_expr
to suppress the warning when we've seen the definition of the function
and we can say that it can return something other than its parameter.)


Hmm, that misses returning a reference to a subobject or container 
element that will also go away when the object is destroyed.  Does it 
also avoid a lot of false positives?



It doesn't warn when the function in question is a member function, otherwise
it'd emit loads of warnings for valid code like obj.emplace({0}, 0).


We had discussed warning if the object argument is a temporary (and for 
the above check, the function returns *this)?



It warns in member initializer lists as well:

   const int& f(const int& i) { return i; }
   struct S {
 const int  // { dg-warning "dangling reference" }
 S() : r(f(10)) { } // { dg-message "destroyed" }
   };

I've run the testsuite/bootstrap with the warning enabled by default.
There were just a few FAILs:
* g++.dg/warn/Wdangling-pointer-2.C
* 20_util/any/misc/any_cast.cc
* 20_util/forward/c_neg.cc
* 20_util/forward/f_neg.cc
* experimental/any/misc/any_cast.cc
all of these look like genuine bugs.  A bootstrap with the warning
enabled by default passed.

When testing a previous version of the patch, there were many FAILs in
libstdc++'s 22_locale/; all of them because the warning triggered on

   const test_type& obj = std::use_facet(std::locale());

but this code looks valid -- std::use_facet doesn't return a reference
to its parameter.  Therefore I added code to suppress the warning when
the call is std::use_facet.  Now 22_locale/* pass even with the warning
on.  We could exclude more std:: functions like this if desirable.


Instead of adding special cases in the compiler, let's disable the 
warning around the definition of use_facet (and adjust the compiler as 
needed so that avoids the warning).


I was remembering range adaptors being a stated motivation for Nico's 
P2012, but looking back at the paper I now see that this problem was 
avoided for them by disallowing rvalue arguments to range composition.



Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/106393

gcc/c-family/ChangeLog:

* c.opt (Wdangling-reference): New.

gcc/cp/ChangeLog:

* call.cc (expr_represents_temporary_p): New, factored out of
conv_binds_ref_to_temporary.
(conv_binds_ref_to_temporary): Don't return false just because a ck_base
is missing.  Use expr_represents_temporary_p.
(find_initializing_call_expr): New.
(do_warn_dangling_reference): New.
(extend_ref_init_temps): Call do_warn_dangling_reference.
* typeck.cc (check_return_expr): Suppress -Wdangling-reference
warnings.

gcc/ChangeLog:

* doc/invoke.texi: Document -Wdangling-reference.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/elision4.C: Use -Wdangling-reference, add dg-warning.
* g++.dg/cpp23/elision7.C: Likewise.
* g++.dg/warn/Wdangling-reference1.C: New test.
* g++.dg/warn/Wdangling-reference2.C: New test.


Could use a test with a virtual base.


---
  gcc/c-family/c.opt|   4 +
  gcc/cp/call.cc| 138 --
  gcc/cp/cp-tree.h  |   4 +-
  gcc/cp/typeck.cc  |  10 ++
  gcc/doc/invoke.texi   |  34 -
  gcc/testsuite/g++.dg/cpp23/elision4.C |   5 +-
  gcc/testsuite/g++.dg/cpp23/elision7.C |   3 +-
  .../g++.dg/warn/Wdangling-reference1.C| 103 +
  .../g++.dg/warn/Wdangling-reference2.C|  28 
  9 files changed, 312 insertions(+), 17 deletions(-)
  create mode 100644

[PATCH v2] c++: ICE with invalid structured bindings [PR107276]

2022-10-24 Thread Marek Polacek via Gcc-patches

On Mon, Oct 24, 2022 at 10:31:50AM -0400, Jason Merrill wrote:
> On 10/21/22 19:29, Marek Polacek wrote:
> > This test ICEs in C++23 because we reach the new code in do_auto_deduction:
> > 
> > 30468   if (cxx_dialect >= cxx23
> > 30469   && context == adc_return_type
> > 30470   && (!AUTO_IS_DECLTYPE (auto_node)
> > 30471   || !unparenthesized_id_or_class_member_access_p (init))
> > 30472   && (r = treat_lvalue_as_rvalue_p (maybe_undo_parenthesized_ref 
> > (init),
> > 30473 /*return*/true)))
> > 
> > where 'init' is "VIEW_CONVERT_EXPR error (y)", and then the move
> > in treat_lvalue_as_rvalue_p returns error_mark_node whereupon
> > set_implicit_rvalue_p crashes.
> > 
> > I don't think such V_C_Es are useful so let's not create them.  But that
> > won't fix the ICE so I'm checking the return value of move.  A structured
> > bindings decl can have an error type, that is set in cp_finish_decomp:
> > 
> >   8908   TREE_TYPE (first) = error_mark_node;
> > 
> > therefore I think treat_lvalue_as_rvalue_p just needs to cope.
> > 
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > PR c++/107276
> > 
> > gcc/cp/ChangeLog:
> > 
> > * typeck.cc (treat_lvalue_as_rvalue_p): Check the return value of move.
> > 
> > gcc/ChangeLog:
> > 
> > * tree.cc (maybe_wrap_with_location): Don't create a location wrapper
> > when the type is erroneous.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp2a/decomp4.C: New test.
> > ---
> >   gcc/cp/typeck.cc | 7 ++-
> >   gcc/testsuite/g++.dg/cpp2a/decomp4.C | 8 
> >   gcc/tree.cc  | 3 ++-
> >   3 files changed, 16 insertions(+), 2 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/decomp4.C
> > 
> > diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
> > index 16e7d85793d..5ca191759f6 100644
> > --- a/gcc/cp/typeck.cc
> > +++ b/gcc/cp/typeck.cc
> > @@ -10726,7 +10726,12 @@ treat_lvalue_as_rvalue_p (tree expr, bool return_p)
> > if (DECL_CONTEXT (retval) != current_function_decl)
> >   return NULL_TREE;
> > if (return_p)
> > -return set_implicit_rvalue_p (move (expr));
> > +{
> > +  expr = move (expr);
> > +  if (expr == error_mark_node)
> > +   return NULL_TREE;
> > +  return set_implicit_rvalue_p (expr);
> > +}
> > /* if the operand of a throw-expression is a (possibly parenthesized)
> >id-expression that names an implicitly movable entity whose scope 
> > does not
> > diff --git a/gcc/testsuite/g++.dg/cpp2a/decomp4.C 
> > b/gcc/testsuite/g++.dg/cpp2a/decomp4.C
> > new file mode 100644
> > index 000..28b3f172b53
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/cpp2a/decomp4.C
> > @@ -0,0 +1,8 @@
> > +// PR c++/107276
> > +// { dg-do run { target c++20 } }
> > +
> > +auto f(auto x) {
> > +  auto [y] = x; // { dg-error "cannot decompose" }
> > +  return y;
> > +}
> > +int i = f(0);
> > diff --git a/gcc/tree.cc b/gcc/tree.cc
> > index 81a6ceaf181..4e5b1df4d85 100644
> > --- a/gcc/tree.cc
> > +++ b/gcc/tree.cc
> > @@ -14352,7 +14352,8 @@ maybe_wrap_with_location (tree expr, location_t loc)
> > /* For now, don't add wrappers to exceptional tree nodes, to minimize
> >any impact of the wrapper nodes.  */
> > -  if (EXCEPTIONAL_CLASS_P (expr))
> > +  if (EXCEPTIONAL_CLASS_P (expr)
> > +  || (TREE_TYPE (expr) && EXCEPTIONAL_CLASS_P (TREE_TYPE (expr
> 
> I think check error_operand_p instead; I don't think it makes sense to look
> for other exceptional nodes in TREE_TYPE.

Makes sense.  I don't suppose you want to replace the whole condition with
error_operand_p, only what comes after ||.  Updated patch below, thanks.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This test ICEs in C++23 because we reach the new code in do_auto_deduction:

30468   if (cxx_dialect >= cxx23
30469   && context == adc_return_type
30470   && (!AUTO_IS_DECLTYPE (auto_node)
30471   || !unparenthesized_id_or_class_member_access_p (init))
30472   && (r = treat_lvalue_as_rvalue_p (maybe_undo_parenthesized_ref 
(init),
30473 /*return*/true)))

where 'init' is "VIEW_CONVERT_EXPR error (y)", and then the move
in treat_lvalue_as_rvalue_p returns error_mark_node whereupon
set_implicit_rvalue_p crashes.

I don't think such V_C_Es are useful so let's not create them.  But that
won't fix the ICE so I'm checking the return value of move.  A structured
bindings decl can have an error type, that is set in cp_finish_decomp:

 8908   TREE_TYPE (first) = error_mark_node;

therefore I think treat_lvalue_as_rvalue_p just needs to cope.

PR c++/107276

gcc/cp/ChangeLog:

* typeck.cc (treat_lvalue_as_rvalue_p): Check the return value of move.

gcc/ChangeLog:

* tree.cc (maybe_wrap_with_location): Don't create a location wrapper
when the type

[OG12 commit] vect: WORKAROUND vectorizer bug

2022-10-24 Thread Andrew Stubbs

I've committed this to the OG12 branch to remove some test failures. We 
probably ought to have something on mainline also, but a proper fix 
would be better.


Without this. the libgomp.oacc-c-c++-common/private-variables.c testcase 
fails to compile due to an ICE.  The OpenACC worker broadcasting code is 
creating SLP optimizable loads and stores in amdgcn address-space-4. 
Previously this was "ok" as SLP didn't work with less that 64-lane 
vectors, but the newly implemented smaller vectors are working as 
intended and optimizing this.


Unfortunately the vectorizer is losing the address-space data from the 
intermediate types, and it all falls apart during expand when it tries 
the convert a 32-bit address into a 64-bit address and that's not 
something that works. At first sight it looks like we could possibly 
make that work with POINTERS_EXTEND_UNSIGNED, but that only changes the 
error message. Fundamentally we need to make sure that various instances 
of "vectype" have the correct address space, but my attempts to do so 
showed that that's a larger task than I have time for right now.


This patch simply prevents the vectorizer working in the case where it 
would break. This should not be a regression because this code didn't 
vectorize at all, previously.


Andrewvect: WORKAROUND vectorizer bug

This patch disables vectorization of memory accesses to non-default address
spaces where the pointer size is different to the usual pointer size.  This
condition typically occurs in OpenACC programs on amdgcn, where LDS memory is
used for broadcasting gang-private variables between threads. In particular,
see libgomp.oacc-c-c++-common/private-variables.c

The problem is that the address space information is dropped from the various
types in the middle-end and eventually it triggers an ICE trying to do an
address conversion.  That ICE can be avoided by defining
POINTERS_EXTEND_UNSIGNED, but that just produces wrong RTL code later on.

A correct solution would ensure that all the vectypes have the correct address
spaces, but I don't have time for that right now.

gcc/ChangeLog:

* tree-vect-data-refs.cc (vect_analyze_data_refs): Workaround an
address-space bug.

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 09223baf718..70b671ed94a 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4598,7 +4598,21 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 
*min_vf, bool *fatal)
   /* Set vectype for STMT.  */
   scalar_type = TREE_TYPE (DR_REF (dr));
   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
-  if (!vectype)
+
+  /* FIXME: If the object is in an address-space in which the pointer size
+is different to the default address space then vectorizing here will
+lead to an ICE down the road because the address space information
+gets lost.  This work-around fixes the problem until we have a proper
+solution.  */
+  tree base_object = DR_REF (dr);
+  tree op = (TREE_CODE (base_object) == COMPONENT_REF
+|| TREE_CODE (base_object) == ARRAY_REF
+? TREE_OPERAND (base_object, 0) : base_object);
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (op));
+  bool addr_space_bug = (!ADDR_SPACE_GENERIC_P (as)
+&& targetm.addr_space.pointer_mode (as) != Pmode);
+
+  if (!vectype || addr_space_bug)
 {
   if (dump_enabled_p ())
 {

[OG12 commit] amdgcn: disallow USM on gfx908

2022-10-24 Thread Andrew Stubbs

I've committed this patch to the devel/omp/gcc-12 branch. I will have to 
fold it into my previous OpenMP memory management patch series when I 
repost it.


The GFX908 (MI100) devices only partially support the Unified Shared 
Memory model that we have, and only then with additional kernel boot 
parameters.  Without that there are unexplained memory access faults at 
runtime, and it's generally not a good user experience.


For this reason we have decided not to support USM on gfx908 in the 
toolchain. The gfx90a (MI200) devices are still supported.


Andrewamdgcn: disallow USM on gfx908

It does work, but not well and only with the amdgpu.noreply=0 kernel boot
option.

gcc/ChangeLog:

* config/gcn/gcn.cc (gcn_init_cumulative_args): Disallow gfx908.

diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 9c2fd4c5b8a..720c0a08a13 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -2905,6 +2905,7 @@ gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* 
Argument info to init */ ,
case PROCESSOR_FIJI:
case PROCESSOR_VEGA10:
case PROCESSOR_VEGA20:
+   case PROCESSOR_GFX908:
  error ("GPU architecture does not support Unified Shared Memory");
default:
  ;

Re: [PATCH] builtins: Add various complex builtins for _Float{16,32,64,128,32x,64x,128x}




On 10/21/22 09:42, Jakub Jelinek wrote:

Hi!

On top of the pending
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603665.html
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604080.html
the following patch adds some complex builtins which have libm
implementation in glibc 2.26 and later on various arches.
It is needed for libstdc++ _Float128 support when long double is not
IEEE quad.

Tested on x86_64-linux, ok for trunk?

2022-10-21  Jakub Jelinek  

* builtin-types.def (BT_COMPLEX_FLOAT16, BT_COMPLEX_FLOAT32,
BT_COMPLEX_FLOAT64, BT_COMPLEX_FLOAT128, BT_COMPLEX_FLOAT32X,
BT_COMPLEX_FLOAT64X, BT_COMPLEX_FLOAT128X,
BT_FN_COMPLEX_FLOAT16_COMPLEX_FLOAT16,
BT_FN_COMPLEX_FLOAT32_COMPLEX_FLOAT32,
BT_FN_COMPLEX_FLOAT64_COMPLEX_FLOAT64,
BT_FN_COMPLEX_FLOAT128_COMPLEX_FLOAT128,
BT_FN_COMPLEX_FLOAT32X_COMPLEX_FLOAT32X,
BT_FN_COMPLEX_FLOAT64X_COMPLEX_FLOAT64X,
BT_FN_COMPLEX_FLOAT128X_COMPLEX_FLOAT128X,
BT_FN_FLOAT16_COMPLEX_FLOAT16, BT_FN_FLOAT32_COMPLEX_FLOAT32,
BT_FN_FLOAT64_COMPLEX_FLOAT64, BT_FN_FLOAT128_COMPLEX_FLOAT128,
BT_FN_FLOAT32X_COMPLEX_FLOAT32X, BT_FN_FLOAT64X_COMPLEX_FLOAT64X,
BT_FN_FLOAT128X_COMPLEX_FLOAT128X,
BT_FN_COMPLEX_FLOAT16_COMPLEX_FLOAT16_COMPLEX_FLOAT16,
BT_FN_COMPLEX_FLOAT32_COMPLEX_FLOAT32_COMPLEX_FLOAT32,
BT_FN_COMPLEX_FLOAT64_COMPLEX_FLOAT64_COMPLEX_FLOAT64,
BT_FN_COMPLEX_FLOAT128_COMPLEX_FLOAT128_COMPLEX_FLOAT128,
BT_FN_COMPLEX_FLOAT32X_COMPLEX_FLOAT32X_COMPLEX_FLOAT32X,
BT_FN_COMPLEX_FLOAT64X_COMPLEX_FLOAT64X_COMPLEX_FLOAT64X,
BT_FN_COMPLEX_FLOAT128X_COMPLEX_FLOAT128X_COMPLEX_FLOAT128X): New.
* builtins.def (CABS_TYPE, CACOSH_TYPE, CARG_TYPE, CASINH_TYPE,
CPOW_TYPE, CPROJ_TYPE): Define and undefine later.
(BUILT_IN_CABS, BUILT_IN_CACOSH, BUILT_IN_CACOS, BUILT_IN_CARG,
BUILT_IN_CASINH, BUILT_IN_CASIN, BUILT_IN_CATANH, BUILT_IN_CATAN,
BUILT_IN_CCOSH, BUILT_IN_CCOS, BUILT_IN_CEXP, BUILT_IN_CLOG,
BUILT_IN_CPOW, BUILT_IN_CPROJ, BUILT_IN_CSINH, BUILT_IN_CSIN,
BUILT_IN_CSQRT, BUILT_IN_CTANH, BUILT_IN_CTAN): Add
DEF_EXT_LIB_FLOATN_NX_BUILTINS.
* fold-const-call.cc (fold_const_call_sc, fold_const_call_cc,
fold_const_call_ccc): Add various CASE_CFN_*_FN: cases when
CASE_CFN_* is present.
* gimple-ssa-backprop.cc (backprop::process_builtin_call_use):
Likewise.
* builtins.cc (expand_builtin, fold_builtin_1): Likewise.
* fold-const.cc (negate_mathfn_p, tree_expr_finite_p,
tree_expr_maybe_signaling_nan_p, tree_expr_maybe_nan_p,
tree_expr_maybe_real_minus_zero_p, tree_call_nonnegative_warnv_p):
Likewise.


OK

jeff

[PATCH] c++: remove use_default_args parm of coerce_template_parms

2022-10-24 Thread Patrick Palka via Gcc-patches

It looks like the parameter use_default_args introduced in r110693 is
effectively unused ever since r7-5536-g3c75aaa3d884ef removed the last
(and probably only) 'coerce_template_parms (..., true, false)' call, so
this patch gets rid of it.

In passing, I noticed we currently define wrapper overloads of
coerce_template_parms that effectively default complain and
require_all_args, but it seems cleaner to specify default arguments
for the main overload instead.  And I suppose we should also give
c_innermost_t_p the same defaults.

However, I'm not sure about defaulting complain to tf_none since it's
contrary to how we default every other complain parameter to either
tf_error or tf_warning_or_error (for sake of non-SFINAE callers), so
this tf_none default seems surprising.  And since just three callers
(all in constraint.cc) use this complain=tf_none default, and just one
caller (determine_specialization) passes complain=tf_warning_or_error,
it seems best to just not default complain here.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

gcc/cp/ChangeLog:

* constraint.cc (resolve_function_concept_overload): Explicitly
pass tf_none to coerce_template_parms.
(resolve_concept_check): Likewise.
(normalize_concept_check): Likewise.
* cp-tree.h (coerce_template_parms): Declare the main overload
and default its last parameter to true.  Remove other overloads.
* pt.cc (determine_specialization): Adjust calls to
coerce_template_parms and coerce_innermost_template_parms after
removing their last parameter.
(coerce_template_args_for_ttp): Likewise.
(coerce_ttp_args_for_tta): Likewise.
(coerce_template_template_parms): Likewise.
(coerce_template_parms): Remove use_default_args parameter.
Remove wrapper overloads.  No longer static.
(coerce_innermost_template_parms): Remove use_default_args
parameter.
(lookup_template_class): As with determine_specialization.
(finish_template_variable): Likewise.
(tsubst_decl): Likewise.
(instantiate_alias_template): Likewise.
(fn_type_unification): Likewise.
(resolve_overloaded_unification): Likewise.
(resolve_nondeduced_context): Likewise.
(get_partial_spec_bindings): Likewise.
---
 gcc/cp/constraint.cc |   6 +--
 gcc/cp/cp-tree.h |   4 +-
 gcc/cp/pt.cc | 107 +++
 3 files changed, 32 insertions(+), 85 deletions(-)

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 74898ca1a23..5e6a3bcf059 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -323,7 +323,7 @@ resolve_function_concept_overload (tree ovl, tree args)
   /* Remember the candidate if we can deduce a substitution.  */
   ++processing_template_decl;
   tree parms = TREE_VALUE (DECL_TEMPLATE_PARMS (tmpl));
-  if (tree subst = coerce_template_parms (parms, args, tmpl))
+  if (tree subst = coerce_template_parms (parms, args, tmpl, tf_none))
 {
   if (subst == error_mark_node)
 ++nerrs;
@@ -404,7 +404,7 @@ resolve_concept_check (tree check)
   tree args = TREE_OPERAND (id, 1);
   tree parms = INNERMOST_TEMPLATE_PARMS (DECL_TEMPLATE_PARMS (tmpl));
   ++processing_template_decl;
-  tree result = coerce_template_parms (parms, args, tmpl);
+  tree result = coerce_template_parms (parms, args, tmpl, tf_none);
   --processing_template_decl;
   if (result == error_mark_node)
 return error_mark_node;
@@ -726,7 +726,7 @@ normalize_concept_check (tree check, tree args, norm_info 
info)
   /* Turn on template processing; coercing non-type template arguments
  will automatically assume they're non-dependent.  */
   ++processing_template_decl;
-  tree subst = coerce_template_parms (parms, targs, tmpl);
+  tree subst = coerce_template_parms (parms, targs, tmpl, tf_none);
   --processing_template_decl;
   if (subst == error_mark_node)
 return error_mark_node;
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 60a25101049..b210943ab25 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7459,8 +7459,8 @@ extern tree get_function_template_decl
(const_tree);
 extern tree resolve_nondeduced_context (tree, tsubst_flags_t);
 extern tree resolve_nondeduced_context_or_error(tree, tsubst_flags_t);
 extern hashval_t iterative_hash_template_arg   (tree arg, hashval_t val);
-extern tree coerce_template_parms   (tree, tree, tree);
-extern tree coerce_template_parms   (tree, tree, tree, 
tsubst_flags_t);
+extern tree coerce_template_parms  (tree, tree, tree, 
tsubst_flags_t,
+bool = true);
 extern tree canonicalize_type_argument (tree, tsubst_flags_t);
 extern void register_local_specialization   (tree, tree);
 extern tree retrieve_local_specialization   (tree);
diff --git

[OG12 commit] amdgcn, libgomp: USM allocation update

2022-10-24 Thread Andrew Stubbs

I've committed this patch to the devel/omp/gcc-12 branch. I will have to 
fold it into my previous OpenMP memory management patch series when I 
repost it.


The patch changes the internal memory allocation method such that memory 
is allocated in the regular heap and then marked as "coarse-grained", as 
opposed to allocating coarse-grained memory in the first place. The 
difference is that this is CPU first, not GPU first, which is typically 
the right way around, especially when we are using this for all possible 
allocations.


Andrewamdgcn, libgomp: USM allocation update

Allocate Unified Shared Memory via malloc and hsa_amd_svm_attributes_set,
instead of hsa_allocate_memory.  This scheme should be more efficient for
for memory that is first accessed by the CPU.

libgomp/ChangeLog:

* plugin/plugin-gcn.c (HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED): New.
(HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT): New.
(HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG): New.
(HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED): New.
(hsa_amd_svm_attribute_pair_t): New.
(struct hsa_runtime_fn_info): Add hsa_amd_svm_attributes_set_fn.
(dump_hsa_system_info): Dump HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED and
HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT.
(DLSYM_OPT_FN): New.
(init_hsa_runtime_functions): Add hsa_amd_svm_attributes_set.
(GOMP_OFFLOAD_usm_alloc): Use malloc and hsa_amd_svm_attributes_set.
(GOMP_OFFLOAD_usm_free): Use regular free.
* testsuite/libgomp.c/usm-1.c: Add -mxnack=on for amdgcn.
* testsuite/libgomp.c/usm-2.c: Likewise.
* testsuite/libgomp.c/usm-3.c: Likewise.
* testsuite/libgomp.c/usm-4.c: Likewise.

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index dd493f63912..4871a6a793b 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -113,6 +113,16 @@ struct gcn_thread
   int async;
 };
 
+/* TEMPORARY IMPORT, UNTIL hsa_ext_amd.h GETS UPDATED.  */
+const static int HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201;
+const static int HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202;
+const static int HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG = 0;
+const static int HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED = 1;
+typedef struct hsa_amd_svm_attribute_pair_s {
+  uint64_t attribute;
+  uint64_t value;
+} hsa_amd_svm_attribute_pair_t;
+
 /* As an HSA runtime is dlopened, following structure defines function
pointers utilized by the HSA plug-in.  */
 
@@ -195,6 +205,9 @@ struct hsa_runtime_fn_info
   hsa_status_t (*hsa_code_object_deserialize_fn)
 (void *serialized_code_object, size_t serialized_code_object_size,
  const char *options, hsa_code_object_t *code_object);
+  hsa_status_t (*hsa_amd_svm_attributes_set_fn)
+(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
+ size_t attribute_count);
 };
 
 /* Structure describing the run-time and grid properties of an HSA kernel
@@ -720,6 +733,24 @@ dump_hsa_system_info (void)
 }
   else
 GCN_WARNING ("HSA_SYSTEM_INFO_EXTENSIONS: FAILED\n");
+
+  bool svm_supported;
+  status = hsa_fns.hsa_system_get_info_fn
+(HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED, _supported);
+  if (status == HSA_STATUS_SUCCESS)
+GCN_DEBUG ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: %s\n",
+  (svm_supported ? "TRUE" : "FALSE"));
+  else
+GCN_WARNING ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: FAILED\n");
+
+  bool svm_accessible;
+  status = hsa_fns.hsa_system_get_info_fn
+(HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT, _accessible);
+  if (status == HSA_STATUS_SUCCESS)
+GCN_DEBUG ("HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: %s\n",
+  (svm_accessible ? "TRUE" : "FALSE"));
+  else
+GCN_WARNING ("HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: FAILED\n");
 }
 
 /* Dump information about the available hardware.  */
@@ -1361,6 +1392,8 @@ init_hsa_runtime_functions (void)
   hsa_fns.function##_fn = dlsym (handle, #function); \
   if (hsa_fns.function##_fn == NULL) \
 return false;
+#define DLSYM_OPT_FN(function) \
+  hsa_fns.function##_fn = dlsym (handle, #function);
   void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
   if (handle == NULL)
 return false;
@@ -1395,6 +1428,7 @@ init_hsa_runtime_functions (void)
   DLSYM_FN (hsa_signal_load_acquire)
   DLSYM_FN (hsa_queue_destroy)
   DLSYM_FN (hsa_code_object_deserialize)
+  DLSYM_OPT_FN (hsa_amd_svm_attributes_set)
   return true;
 #undef DLSYM_FN
 }
@@ -3886,15 +3920,38 @@ static struct usm_splay_tree_s usm_map = { NULL };
 
 /* Allocate memory suitable for Unified Shared Memory.
 
-   In fact, AMD memory need only be "coarse grained", which target
-   allocations already are.  We do need to track allocations so that
-   GOMP_OFFLOAD_is_usm_ptr can look them up.  */
+   Normal heap memory is already enabled for USM, but by default it is "fine-
+   grained" memory, meaning that the GPU must access it via the system bus,
+   slowly.  Changing the page to

Re: [PATCH] builtins: Add __builtin_nextafterf16b builtin




On 10/21/22 01:23, Jakub Jelinek wrote:

Hi!

On top of the pending
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603665.html
patch, the following patch adds another needed builtin.
The above patch adds among other things __builtin_nextafterf16
builtin which we need in order to constexpr evaluate
std::nextafter(_Float16) overload (patch for that to be posted momentarily).
While there is inline implementation of the overload, it isn't constant
evaluation friendly, and the builtin doesn't need libm implementation
because it will be used only during constant expression evaluation.
We need the same thing also for std::nextafter(__gnu_cxx::__bfloat16_t)
though and this patch does that.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-10-21  Jakub Jelinek  

* builtin-types.def (BT_FN_BFLOAT16_BFLOAT16_BFLOAT16): New.
* builtins.def (BUILT_IN_NEXTAFTERF16B): New builtin.
* fold-const-call.cc (fold_const_call_sss): Handle
CFN_BUILT_IN_NEXTAFTERF16B.


OK

jeff

Re: [PATCH] c, c++: Fix up excess precision handling of scalar_to_vector conversion [PR107358]




On 10/24/22 01:19, Jakub Jelinek via Gcc-patches wrote:

Hi!

As mentioned earlier in the C++ excess precision support mail, the following
testcase is broken with excess precision both in C and C++ (though just in C++
it was triggered in real-world code).
scalar_to_vector is called in both FEs after the excess precision promotions
(or stripping of EXCESS_PRECISION_EXPR), so we can then get invalid
diagnostics that say float vector + float involves truncation (on ia32
from long double to float).

The following patch fixes that by calling scalar_to_vector on the operands
before the excess precision promotions, let scalar_to_vector just do the
diagnostics (it does e.g. fold_for_warn so it will fold
EXCESS_PRECISION_EXPR around REAL_CST to constants etc.) but will then
do the actual conversions using the excess precision promoted operands
(so say if we have vector double + (float + float) we don't actually do
vector double + (float) ((long double) float + (long double) float)
but
vector double + (double) ((long double) float + (long double) float)

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-10-24  Jakub Jelinek  

PR c++/107358
c/
* c-typeck.cc (build_binary_op): Pass operands before excess precision
promotions to scalar_to_vector call.
cp/
* typeck.cc (cp_build_binary_op): Pass operands before excess precision
promotions to scalar_to_vector call.
testsuite/
* c-c++-common/pr107358.c: New test.
* g++.dg/cpp1y/pr68180.C: Remove -fexcess-precision=fast from
dg-options.


OK

jeff

Re: [PATCH RFA] tree: add build_string_literal overloads




On 10/20/22 14:20, Jason Merrill via Gcc-patches wrote:

Tested x86_64-pc-linux-gnu, OK for trunk?

-- >8 --

Simplify several calls to build_string_literal by not requiring redundant
strlen or IDENTIFIER_* in the caller.

I also corrected a wrong comment on IDENTIFIER_LENGTH.

gcc/ChangeLog:

* tree.h (build_string_literal): New one-argument overloads that
take tree (identifier) and const char *.
* builtins.cc (fold_builtin_FILE)
(fold_builtin_FUNCTION)
* gimplify.cc (gimple_add_init_for_auto_var)
* vtable-verify.cc (verify_bb_vtables): Simplify calls.

gcc/cp/ChangeLog:

* cp-gimplify.cc (fold_builtin_source_location)
* vtable-class-hierarchy.cc (register_all_pairs): Simplify calls to
build_string_literal.
(build_string_from_id): Remove.


OK

jeff

Re: [PATCH] microblaze: use strverscmp() in MICROBLAZE_VERSION_COMPARE()




On 10/16/22 12:15, Ovidiu Panait via Gcc-patches wrote:

Currently, combining '-mxl-multiply-high' with -mcpu=v11.0 produces the
following bogus warning:

   echo "int main(){}" | ./microblazeel-linux-gnu-gcc -mxl-multiply-high \
   -mno-xl-soft-mul -mcpu=v11.0 -nostdlib -x c -
   warning: '-mxl-multiply-high' can be used only with '-mcpu=v6.00.a' or 
greater

Since strcasecmp() doesn't properly compare single-digit cpu versions with
double-digit versions, switch MICROBLAZE_VERSION_COMPARE() to use strverscmp()
instead.

* config/microblaze/microblaze.cc (MICROBLAZE_VERSION_COMPARE): Use
strverscmp() to fix bogus warnings when passing multi-digit -mcpu
versions on the command line.


But strverscmp doesn't ignore character case.  ISTM that you need to 
canonicalize the case first if you're going to use strverscmp.



Jeff

Re: [PATCH] RISC-V: Add h extension support




On 10/24/22 03:55, Kito Cheng wrote:

`h` was the prefix of multi-letter extension name, but it become a
extension in later RISC-V isa spec.

Fortunately we don't have any extension really defined is prefixed
with `h`, so we can just change that.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_ext_version_table):
Add `h`.
(riscv_supported_std_ext): Ditto.
(multi_letter_subset_rank): Remove `h`.
(riscv_subset_list::parse_std_ext): Handle `h` as single letter
extension.
(riscv_subset_list::parse): Ditto.

gcc/testsuite/ChangeLog:

* testsuite/gcc.target/riscv/arch-18.c: New.
* testsuite/gcc.target/riscv/arch-5.c: Remove test for prefixed
with `h`.
* testsuite/gcc.target/riscv/predef-23.c: New.


OK.


Jeff

Re: [RFC] how to handle the combination of -fstrict-flex-arrays + -Warray-bounds

2022-10-24 Thread Qing Zhao via Gcc-patches



> On Oct 24, 2022, at 3:30 AM, Richard Biener  wrote:
> 
> On Sat, 22 Oct 2022, Martin Sebor wrote:
> 
>> On 10/21/22 09:29, Qing Zhao wrote:
>>> Hi,
>>> 
>>> (FAM below refers to Flexible Array Members):
>>> 
>>> I need inputs on  how to handle the combination of -fstrict-flex-arrays +
>>> -Warray-bounds.
>>> 
>>> Our initial goal is to update -Warray-bounds with multiple levels of
>>> -fstrict-flex-arrays=N
>>> to issue warnings according to the different levels of ?N?.
>>> However, after detailed study, I found that this goal was very hard to be
>>> achieved.
>>> 
>>> 1. -fstrict-flex-arrays and its levels
>>> 
>>> The new option -fstrict-flex-arrays has 4 levels:
>>> 
>>> level   trailing arrays
>>> treated as FAM
>>> 
>>>   0 [],[0],[1],[n]  the default without option
>>>   1 [],[0],[1]
>>>   2 [],[0]
>>>   3 []  the default when option specified
>>>   without value
>>> 
>>> 2. -Warray-bounds and its levels
>>> 
>>> The option -Warray-bounds currently has 2 levels:
>>> 
>>> level   trailing arrays
>>> treated as FAM
>>> 
>>>   1 [],[0],[1]   the default when option specified
>>>   without value
>>>   2 []  
>>> 
>>> i.e,
>>> When -Warray-bounds=1, it treats [],[0],[1] as FAM, the same level as
>>> -fstrict-flex-arrays=1;
>>> When -Warray-bounds=2, it only treat [] as FAM, the same level as
>>> -fstrict-flex-arrays=3;
>>> 
>>> 3. How to handle the combination of  -fstrict-flex-arrays and
>>> -Warray-bounds?
>>> 
>>> Question 1:  when -fstrict-flex-arrays does not present, the default is
>>> -strict-flex-arrays=0,
>>> which treats [],[0],[1],[n] as FAM, so should we update
>>> the default behavior
>>> of -Warray-bounds to treat any trailing array [n] as
>>> FAMs?
>>> 
>>> My immediate answer to Q1 is NO, we shouldn?t, that will be a big regression
>>> on -Warray-bounds, right?
>> 
>> Yes, it would disable -Warray-bounds in the cases where it warns
>> for past-the-end accesses to trailing arrays with two or more
>> elements.  Diagnosing those has historically (i.e., before recent
>> changes) been a design goal.
>> 
>>> 
>>> Question 2:  when -fstrict-flex-arrays=N1 and -Warray-bounds=N2 present at
>>> the same time,
>>>  Which one has higher priority? N1 or N2?
>>> 
>>> -fstrict-flex-arrays=N1 controls how the compiler code generation treats the
>>> trailing arrays as FAMs, it seems
>>> reasonable to give higher priority to N1,
>> 
>> I tend to agree.  In other words, set N2' = min(N1, N2).
> 
> Yes.  Or do nothing and treat them independently.

I prefer treating them independently. 

If there is no multiple levels of -Warray-bounds, it’s safe and reasonable to 
control -Warray-bounds with 
different levels of -fstrict-flex-arrays=N.  However, the current 
-Warray-bounds already has multiple levels which
have been exposed to and been used by the end users. Changing their behavior 
will impact the end-users.


> Can you check whether
> it's possible to distinguish -Warray-bounds from -Warray-bounds=N?

The current difference between -Warray-bounds and -Warray-bounds=2 is:  
-Warray-bounds=2 
will NOT treat 0-length arrays and 1-element arrays as FAMs. Therefore report 
out-of-bounds
 access to 0-lenght arrays or 1-element arrays.


>  I'd
> say that explicit -Warray-bounds=N should exactly get the documented
> set of diagnostis, independent of -fstrict-flex-arrays=N.

If we decide to make -fstrict-flex-arrays=N1 and -Warray-bounds=N2 
independently.
How about -fstrict-flex-array=N and -Wstringop-overflow (-Wstringop-overread, 
etc)? 
Shall we control -Wstringop-overflow with -fstrict-flex-array=N?  Or treat them 
independently?

Qing
> 
>>> However, then should we completely disable the level of -Warray-bounds
>>> N2 under such situation?
>>> 
>>> I really don?t know what?s the best way to handle the conflict  between N1
>>> and N2.
>>> 
>>> Can we completely cancel the 2 levels of -Warray-bounds, and always honor
>>> the level of -fstrict-flex-arrays?
>>> 
>>> Any comments or suggestion will be helpful.
>> 
>> The recent -fstrict-flex-array changes aside, IIRC, there's only
>> a subtle distinction between the two -Warray-bounds levels (since
>> level 1 started warning on a number of instances that only level
>> 2 used to diagnose a few releases ago).  I think that subset of
>> level 2 could be merged into level 1 without increasing the rate
>> of false positives.  Then level 2 could be assigned a new set of
>> potential problems to detect (such as past-the-end accesses to
>> trailing one-element arrays).
>> 
>> Martin
>> 
>> 
> 
> -- 
> Richard Biener 
> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
> Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
> HRB 36809 (AG Nuernberg)

Re: [PATCH] [X86_64]: Enable support for next generation AMD Zen4 CPU

2022-10-24 Thread Jan Hubička via Gcc-patches

On Mon, Oct 24, 2022 at 4:26 PM Alexander Monakov 
wrote:

> > > > This grew insn-automata.cc from 201502 lines to 639968 lines and the
> > > > build of the automata (genautomata) to several minutes in my dev
> tree.
> > >
> > > Yeah, in my unoptimized non-bootstrapped development tree genautomata
> > > now takes over 12 minutes on a fast box, that is simply not acceptable.
> >
> > Thank you for notifying us.
> >
> > tejassanjay.jo...@amd.com has posted a patch for review to fix this (as
> per Honza's comments).
> > Ref: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604144.html

This patch is OK

>
>
> By the way, it appears pre-existing znver[123] models are also causing
> some kind
> of combinatorial blow-up, but before znver4 it was not a blocking issue:
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87832

It is really easy to make DFA size to grow if there are possibly many
instructions in the pipeline (as every possible state of a modelled pipeline
needs to be a new state of the automaton). This is essentially
depth_of_pipeline * number_of_units with additional states to repesent
special instructions and this naturally keeps growing.

We could try to break the FP automata into multiple ones, but there are
instructions that can go down any pipe which makes this hard
or we can try toreduce number of different reservation types (possibly by
breaking the automaton to znver1-3 and 4 or so).
With znver2 model I experimented with broken up version and common one and
ended up with smaller binary for combined one.

Honza

>
>
> Alexander
>

Re: [PATCH] c++: ICE with invalid structured bindings [PR107276]


On 10/21/22 19:29, Marek Polacek wrote:

This test ICEs in C++23 because we reach the new code in do_auto_deduction:

30468   if (cxx_dialect >= cxx23
30469   && context == adc_return_type
30470   && (!AUTO_IS_DECLTYPE (auto_node)
30471   || !unparenthesized_id_or_class_member_access_p (init))
30472   && (r = treat_lvalue_as_rvalue_p (maybe_undo_parenthesized_ref 
(init),
30473 /*return*/true)))

where 'init' is "VIEW_CONVERT_EXPR error (y)", and then the move
in treat_lvalue_as_rvalue_p returns error_mark_node whereupon
set_implicit_rvalue_p crashes.

I don't think such V_C_Es are useful so let's not create them.  But that
won't fix the ICE so I'm checking the return value of move.  A structured
bindings decl can have an error type, that is set in cp_finish_decomp:

  8908   TREE_TYPE (first) = error_mark_node;

therefore I think treat_lvalue_as_rvalue_p just needs to cope.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/107276

gcc/cp/ChangeLog:

* typeck.cc (treat_lvalue_as_rvalue_p): Check the return value of move.

gcc/ChangeLog:

* tree.cc (maybe_wrap_with_location): Don't create a location wrapper
when the type is erroneous.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/decomp4.C: New test.
---
  gcc/cp/typeck.cc | 7 ++-
  gcc/testsuite/g++.dg/cpp2a/decomp4.C | 8 
  gcc/tree.cc  | 3 ++-
  3 files changed, 16 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/decomp4.C

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 16e7d85793d..5ca191759f6 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -10726,7 +10726,12 @@ treat_lvalue_as_rvalue_p (tree expr, bool return_p)
if (DECL_CONTEXT (retval) != current_function_decl)
  return NULL_TREE;
if (return_p)
-return set_implicit_rvalue_p (move (expr));
+{
+  expr = move (expr);
+  if (expr == error_mark_node)
+   return NULL_TREE;
+  return set_implicit_rvalue_p (expr);
+}
  
/* if the operand of a throw-expression is a (possibly parenthesized)

   id-expression that names an implicitly movable entity whose scope does 
not
diff --git a/gcc/testsuite/g++.dg/cpp2a/decomp4.C 
b/gcc/testsuite/g++.dg/cpp2a/decomp4.C
new file mode 100644
index 000..28b3f172b53
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/decomp4.C
@@ -0,0 +1,8 @@
+// PR c++/107276
+// { dg-do run { target c++20 } }
+
+auto f(auto x) {
+  auto [y] = x; // { dg-error "cannot decompose" }
+  return y;
+}
+int i = f(0);
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 81a6ceaf181..4e5b1df4d85 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -14352,7 +14352,8 @@ maybe_wrap_with_location (tree expr, location_t loc)
  
/* For now, don't add wrappers to exceptional tree nodes, to minimize

   any impact of the wrapper nodes.  */
-  if (EXCEPTIONAL_CLASS_P (expr))
+  if (EXCEPTIONAL_CLASS_P (expr)
+  || (TREE_TYPE (expr) && EXCEPTIONAL_CLASS_P (TREE_TYPE (expr


I think check error_operand_p instead; I don't think it makes sense to 
look for other exceptional nodes in TREE_TYPE.


Jason

RE: [PATCH] [X86_64]: Enable support for next generation AMD Zen4 CPU

2022-10-24 Thread Alexander Monakov

> > > This grew insn-automata.cc from 201502 lines to 639968 lines and the
> > > build of the automata (genautomata) to several minutes in my dev tree.
> >
> > Yeah, in my unoptimized non-bootstrapped development tree genautomata
> > now takes over 12 minutes on a fast box, that is simply not acceptable.
> 
> Thank you for notifying us.
> 
> tejassanjay.jo...@amd.com has posted a patch for review to fix this (as per 
> Honza's comments).
> Ref: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604144.html

By the way, it appears pre-existing znver[123] models are also causing some kind
of combinatorial blow-up, but before znver4 it was not a blocking issue:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87832

Alexander

[PATCH] RISC-V: Fix typo.

2022-10-24 Thread juzhe . zhong

From: Ju-Zhe Zhong 

gcc/ChangeLog:

* config/riscv/riscv-modes.def (ADJUST_BYTESIZE): Fix typo.

---
 gcc/config/riscv/riscv-modes.def | 46 
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index 95f69e87e23..ea88442e117 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -71,29 +71,29 @@ ADJUST_BYTESIZE (VNx64BI, riscv_vector_chunks * 
riscv_bytes_per_vector_chunk);
 
 /*
| Mode| MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
-   | | LMUL|  SEW/LMUL   | LMUL| SEW/LMUL|
-   | VNx1QI  | MF4 |  32 | MF8 | 64  |
-   | VNx2QI  | MF2 |  16 | MF4 | 32  |
-   | VNx4QI  | M1  |  8  | MF2 | 16  |
-   | VNx8QI  | M2  |  4  | M1  | 8   |
-   | VNx16QI | M4  |  2  | M2  | 4   |
-   | VNx32QI | M8  |  1  | M4  | 2   |
-   | VNx64QI | N/A |  N/A| M8  | 1   |
-   | VNx1(HI|HF) | MF2 |  32 | MF4 | 64  |
-   | VNx2(HI|HF) | M1  |  16 | MF2 | 32  |
-   | VNx4(HI|HF) | M2  |  8  | M1  | 16  |
-   | VNx8(HI|HF) | M4  |  4  | M2  | 8   |
-   | VNx16(HI|HF)| M8  |  2  | M4  | 4   |
-   | VNx32(HI|HF)| N/A |  N/A| M8  | 2   |
-   | VNx1(SI|SF) | M1  |  32 | MF2 | 64  |
-   | VNx2(SI|SF) | M2  |  16 | M1  | 32  |
-   | VNx4(SI|SF) | M4  |  8  | M2  | 16  |
-   | VNx8(SI|SF) | M8  |  4  | M4  | 8   |
-   | VNx16(SI|SF)| N/A |  N/A| M8  | 4   |
-   | VNx1(DI|DF) | N/A |  N/A| M1  | 64  |
-   | VNx2(DI|DF) | N/A |  N/A| M2  | 32  |
-   | VNx4(DI|DF) | N/A |  N/A| M4  | 16  |
-   | VNx8(DI|DF) | N/A |  N/A| M8  | 8   |
+   | | LMUL| SEW/LMUL| LMUL| SEW/LMUL|
+   | VNx1QI  | MF4 | 32  | MF8 | 64  |
+   | VNx2QI  | MF2 | 16  | MF4 | 32  |
+   | VNx4QI  | M1  | 8   | MF2 | 16  |
+   | VNx8QI  | M2  | 4   | M1  | 8   |
+   | VNx16QI | M4  | 2   | M2  | 4   |
+   | VNx32QI | M8  | 1   | M4  | 2   |
+   | VNx64QI | N/A | N/A | M8  | 1   |
+   | VNx1(HI|HF) | MF2 | 32  | MF4 | 64  |
+   | VNx2(HI|HF) | M1  | 16  | MF2 | 32  |
+   | VNx4(HI|HF) | M2  | 8   | M1  | 16  |
+   | VNx8(HI|HF) | M4  | 4   | M2  | 8   |
+   | VNx16(HI|HF)| M8  | 2   | M4  | 4   |
+   | VNx32(HI|HF)| N/A | N/A | M8  | 2   |
+   | VNx1(SI|SF) | M1  | 32  | MF2 | 64  |
+   | VNx2(SI|SF) | M2  | 16  | M1  | 32  |
+   | VNx4(SI|SF) | M4  | 8   | M2  | 16  |
+   | VNx8(SI|SF) | M8  | 4   | M4  | 8   |
+   | VNx16(SI|SF)| N/A | N/A | M8  | 4   |
+   | VNx1(DI|DF) | N/A | N/A | M1  | 64  |
+   | VNx2(DI|DF) | N/A | N/A | M2  | 32  |
+   | VNx4(DI|DF) | N/A | N/A | M4  | 16  |
+   | VNx8(DI|DF) | N/A | N/A | M8  | 8   |
 */
 
 /* Define RVV modes whose sizes are multiples of 64-bit chunks.  */
-- 
2.36.1

Re: [RFC] how to handle the combination of -fstrict-flex-arrays + -Warray-bounds

2022-10-24 Thread Qing Zhao via Gcc-patches



> On Oct 22, 2022, at 12:54 PM, Martin Sebor  wrote:
> 
> On 10/21/22 09:29, Qing Zhao wrote:
>> Hi,
>> (FAM below refers to Flexible Array Members):
>> I need inputs on  how to handle the combination of -fstrict-flex-arrays + 
>> -Warray-bounds.
>> Our initial goal is to update -Warray-bounds with multiple levels of 
>> -fstrict-flex-arrays=N
>> to issue warnings according to the different levels of “N”.
>> However, after detailed study, I found that this goal was very hard to be 
>> achieved.
>> 1. -fstrict-flex-arrays and its levels
>> The new option -fstrict-flex-arrays has 4 levels:
>> level   trailing arrays
>> treated as FAM
>>   0 [],[0],[1],[n]   the default without option
>>   1 [],[0],[1]
>>   2 [],[0]
>>   3 []   the default when option specified 
>> without value
>> 2. -Warray-bounds and its levels
>> The option -Warray-bounds currently has 2 levels:
>> level   trailing arrays
>> treated as FAM
>>   1 [],[0],[1]the default when option specified 
>> without value
>>   2 []   
>> i.e,
>> When -Warray-bounds=1, it treats [],[0],[1] as FAM, the same level as 
>> -fstrict-flex-arrays=1;
>> When -Warray-bounds=2, it only treat [] as FAM, the same level as 
>> -fstrict-flex-arrays=3;
>> 3. How to handle the combination of  -fstrict-flex-arrays and -Warray-bounds?
>> Question 1:  when -fstrict-flex-arrays does not present, the default is 
>> -strict-flex-arrays=0,
>> which treats [],[0],[1],[n] as FAM, so should we update 
>> the default behavior
>> of -Warray-bounds to treat any trailing array [n] as 
>> FAMs?
>> My immediate answer to Q1 is NO, we shouldn’t, that will be a big regression 
>> on -Warray-bounds, right?
> 
> Yes, it would disable -Warray-bounds in the cases where it warns
> for past-the-end accesses to trailing arrays with two or more
> elements.  Diagnosing those has historically (i.e., before recent
> changes) been a design goal.
> 
>> Question 2:  when -fstrict-flex-arrays=N1 and -Warray-bounds=N2 present at 
>> the same time,
>>  Which one has higher priority? N1 or N2?
>> -fstrict-flex-arrays=N1 controls how the compiler code generation treats the 
>> trailing arrays as FAMs, it seems
>> reasonable to give higher priority to N1,
> 
> I tend to agree.  In other words, set N2' = min(N1, N2).
> 
>> However, then should we completely disable the level of -Warray-bounds
>> N2 under such situation?
>> I really don’t know what’s the best way to handle the conflict  between N1 
>> and N2.
>> Can we completely cancel the 2 levels of -Warray-bounds, and always honor 
>> the level of -fstrict-flex-arrays?
>> Any comments or suggestion will be helpful.
> 
> The recent -fstrict-flex-array changes aside, IIRC, there's only
> a subtle distinction between the two -Warray-bounds levels (since
> level 1 started warning on a number of instances that only level
> 2 used to diagnose a few releases ago).  

From the doc: (and I also checked the source code)

 -Warray-bounds=2
This warning level also warns about out of bounds accesses to trailing
struct members of one-element array types (@pxref{Zero Length}) and about
the intermediate results of pointer arithmetic that may yield out of bounds
values.  This warning level may give a larger number of false positives and
is deactivated by default.

As I understand, -Warray-bounds=1 (i.e., -Warray-bounds) will report 
out-of-bounds access to trailing arrays with two or more elements, and treat 
trailing arrays with 0 or 1 as FAMs;
-Warray-bounds=2 will report out-of-bounds access to trailing arrays with 0 or 
1elements in addition to -Warray-bounds =1. 

Is the above understanding correct?


> I think that subset of
> level 2 could be merged into level 1 without increasing the rate
> of false positives.  Then level 2 could be assigned a new set of
> potential problems to detect (such as past-the-end accesses to
> trailing one-element arrays).

If I understand correctly, Current Level 2 already include warning about 
past-the-end accesses to trailing one-element arrays (and also 0-length 
arrays).  

Qing

> 
> Martin

Re: [PATCH] c++: Fix up constexpr handling of char/signed char/short pre/post inc/decrement [PR105774]


On 10/24/22 03:27, Jakub Jelinek wrote:

Hi!

signed char, char or short int pre/post inc/decrement are represented by
normal {PRE,POST}_{INC,DEC}REMENT_EXPRs in the FE and only gimplification
ensures that the {PLUS,MINUS}_EXPR is done in unsigned version of those
types:
 case PREINCREMENT_EXPR:
 case PREDECREMENT_EXPR:
 case POSTINCREMENT_EXPR:
 case POSTDECREMENT_EXPR:
   {
 tree type = TREE_TYPE (TREE_OPERAND (*expr_p, 0));
 if (INTEGRAL_TYPE_P (type) && c_promoting_integer_type_p (type))
   {
 if (!TYPE_OVERFLOW_WRAPS (type))
   type = unsigned_type_for (type);
 return gimplify_self_mod_expr (expr_p, pre_p, post_p, 1, type);
   }
 break;
   }
This means during constant evaluation we need to do it similarly (either
using unsigned_type_for or using widening to integer_type_node).
The following patch does the latter.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.


2022-10-24  Jakub Jelinek  

PR c++/105774
* constexpr.cc (cxx_eval_increment_expression): For signed types
that promote to int, evaluate PLUS_EXPR or MINUS_EXPR in int type.

* g++.dg/cpp1y/constexpr-105774.C: New test.

--- gcc/cp/constexpr.cc.jj  2022-10-21 18:04:47.869797312 +0200
+++ gcc/cp/constexpr.cc 2022-10-23 18:43:27.003390282 +0200
@@ -6234,6 +6234,18 @@ cxx_eval_increment_expression (const con
offset = fold_build1 (NEGATE_EXPR, TREE_TYPE (offset), offset);
mod = fold_build2 (POINTER_PLUS_EXPR, type, val, offset);
  }
+  else if (c_promoting_integer_type_p (type)
+  && !TYPE_UNSIGNED (type)
+  && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
+{
+  offset = fold_convert (integer_type_node, offset);
+  mod = fold_convert (integer_type_node, val);
+  tree t = fold_build2 (inc ? PLUS_EXPR : MINUS_EXPR, integer_type_node,
+   mod, offset);
+  mod = fold_convert (type, t);
+  if (TREE_OVERFLOW_P (mod) && !TREE_OVERFLOW_P (t))
+   TREE_OVERFLOW (mod) = false;
+}
else
  mod = fold_build2 (inc ? PLUS_EXPR : MINUS_EXPR, type, val, offset);
if (!ptr)
--- gcc/testsuite/g++.dg/cpp1y/constexpr-105774.C.jj2022-10-23 
18:44:15.587729613 +0200
+++ gcc/testsuite/g++.dg/cpp1y/constexpr-105774.C   2022-10-23 
18:33:54.754170726 +0200
@@ -0,0 +1,15 @@
+// PR c++/105774
+// { dg-do compile { target c++14 } }
+
+constexpr signed char
+foo ()
+{
+#if __SCHAR_MAX__ < __INT_MAX__
+  signed char x = __SCHAR_MAX__;
+#else
+  signed char x = 0;
+#endif
+  return ++x;
+}
+
+constexpr auto a = foo ();

Jakub

Re: [Patch][v5] libgomp/nvptx: Prepare for reverse-offload callback handling

On Wed, Oct 12, 2022 at 10:55:26AM +0200, Tobias Burnus wrote:
> libgomp/nvptx: Prepare for reverse-offload callback handling
> 
> This patch adds a stub 'gomp_target_rev' in the host's target.c, which will
> later handle the reverse offload.
> For nvptx, it adds support for forwarding the offload gomp_target_ext call
> to the host by setting values in a struct on the device and querying it on
> the host - invoking gomp_target_rev on the result.
> 
> include/ChangeLog:
> 
>   * cuda/cuda.h (enum CUdevice_attribute): Add
>   CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING.
>   (CU_MEMHOSTALLOC_DEVICEMAP): Define.
>   (cuMemHostAlloc): Add prototype.
> 
> libgomp/ChangeLog:
> 
>   * config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Remove
>   'static' for this variable.
>   * config/nvptx/libgomp-nvptx.h: New file.
>   * config/nvptx/target.c: Include it.
>   (GOMP_ADDITIONAL_ICVS): Declare extern var.
>   (GOMP_REV_OFFLOAD_VAR): Declare var.
>   (GOMP_target_ext): Handle reverse offload.
>   * libgomp-plugin.h (GOMP_PLUGIN_target_rev): New prototype.
>   * libgomp-plugin.c (GOMP_PLUGIN_target_rev): New, call ...
>   * target.c (gomp_target_rev): ... this new stub function.
>   * libgomp.h (gomp_target_rev): Declare.
>   * libgomp.map (GOMP_PLUGIN_1.4): New; add GOMP_PLUGIN_target_rev.
>   * plugin/cuda-lib.def (cuMemHostAlloc): Add.
>   * plugin/plugin-nvptx.c: Include libgomp-nvptx.h.
>   (struct ptx_device): Add rev_data member. 
>   (nvptx_open_device): Remove async_engines query, last used in
>   r10-304-g1f4c5b9b; add unified-address assert check.
>   (GOMP_OFFLOAD_get_num_devices): Claim unified address
>   support.
>   (GOMP_OFFLOAD_load_image): Free rev_fn_table if no
>   offload functions exist. Make offload var available
>   on host and device.
>   (rev_off_dev_to_host_cpy, rev_off_host_to_dev_cpy): New.
>   (GOMP_OFFLOAD_run): Handle reverse offload.

Ok, thanks.

Jakub

Re: Ping (c,c++): Handling of main() function for freestanding


On 10/23/22 07:54, Arsen Arsenović via Gcc-patches wrote:

On Friday, 21 October 2022 23:02:02 CEST Joseph Myers wrote:

I have no objections to the C changes.


Great!  Thanks for the review.  I don't have push rights currently, so I
must ask that someone else pushes this patch for me.

Have a great day!


Done, thanks.

Jason

Re: [PATCH] [PR tree-optimization/107355] Handle NANs in abs range-op entry.

2022-10-24 Thread Aldy Hernandez via Gcc-patches

Tested on x86-64 Linux.

Pushed.

On Mon, Oct 24, 2022 at 3:33 PM Aldy Hernandez  wrote:
>
> The problem here is that the threader is coming up with a path where
> the only valid result is a NAN.  When the abs op1_range entry is
> trying to add the negative posibility, it attempts to get the bounds
> of the working range.  NANs don't have bounds so they need to be
> special cased.
>
> PR tree-optimization/107355
>
> gcc/ChangeLog:
>
> * range-op-float.cc (foperator_abs::op1_range): Handle NAN.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/pr107355.c: New test.
> ---
>  gcc/range-op-float.cc|  9 +
>  gcc/testsuite/gcc.dg/tree-ssa/pr107355.c | 13 +
>  2 files changed, 22 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr107355.c
>
> diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
> index 8777bc70d71..04208c88dd1 100644
> --- a/gcc/range-op-float.cc
> +++ b/gcc/range-op-float.cc
> @@ -1269,6 +1269,15 @@ foperator_abs::op1_range (frange , tree type,
>positives.update_nan (/*sign=*/false);
>positives.intersect (lhs);
>r = positives;
> +  // Add -NAN if relevant.
> +  if (r.maybe_isnan ())
> +{
> +  frange neg_nan;
> +  neg_nan.set_nan (type, true);
> +  r.union_ (neg_nan);
> +}
> +  if (r.known_isnan ())
> +return true;
>// Then add the negative of each pair:
>// ABS(op1) = [5,20] would yield op1 => [-20,-5][5,20].
>r.union_ (frange (type,
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr107355.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr107355.c
> new file mode 100644
> index 000..40796344bfb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr107355.c
> @@ -0,0 +1,13 @@
> +// { dg-do compile }
> +// { dg-options "-O2 -fno-guess-branch-probability 
> -fsanitize=float-cast-overflow --param=max-jump-thread-duplication-stmts=240" 
> }
> +
> +float f;
> +
> +void
> +foo (double d)
> +{
> +  (char) f;
> +  long l = __builtin_fabs (d);
> +  (char) f;
> +  (long) d;
> +}
> --
> 2.37.3
>

[PATCH] [PR tree-optimization/107355] Handle NANs in abs range-op entry.

2022-10-24 Thread Aldy Hernandez via Gcc-patches

The problem here is that the threader is coming up with a path where
the only valid result is a NAN.  When the abs op1_range entry is
trying to add the negative posibility, it attempts to get the bounds
of the working range.  NANs don't have bounds so they need to be
special cased.

PR tree-optimization/107355

gcc/ChangeLog:

* range-op-float.cc (foperator_abs::op1_range): Handle NAN.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr107355.c: New test.
---
 gcc/range-op-float.cc|  9 +
 gcc/testsuite/gcc.dg/tree-ssa/pr107355.c | 13 +
 2 files changed, 22 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr107355.c

diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
index 8777bc70d71..04208c88dd1 100644
--- a/gcc/range-op-float.cc
+++ b/gcc/range-op-float.cc
@@ -1269,6 +1269,15 @@ foperator_abs::op1_range (frange , tree type,
   positives.update_nan (/*sign=*/false);
   positives.intersect (lhs);
   r = positives;
+  // Add -NAN if relevant.
+  if (r.maybe_isnan ())
+{
+  frange neg_nan;
+  neg_nan.set_nan (type, true);
+  r.union_ (neg_nan);
+}
+  if (r.known_isnan ())
+return true;
   // Then add the negative of each pair:
   // ABS(op1) = [5,20] would yield op1 => [-20,-5][5,20].
   r.union_ (frange (type,
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr107355.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr107355.c
new file mode 100644
index 000..40796344bfb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr107355.c
@@ -0,0 +1,13 @@
+// { dg-do compile }
+// { dg-options "-O2 -fno-guess-branch-probability 
-fsanitize=float-cast-overflow --param=max-jump-thread-duplication-stmts=240" }
+
+float f;
+
+void
+foo (double d)
+{
+  (char) f;
+  long l = __builtin_fabs (d);
+  (char) f;
+  (long) d;
+}
-- 
2.37.3

Re: vect: Make vect_check_gather_scatter reject offsets that aren't multiples of BITS_PER_UNIT [PR107346]

On Mon, 24 Oct 2022, Andre Vieira (lists) wrote:

> 
> On 24/10/2022 13:46, Richard Biener wrote:
> > On Mon, 24 Oct 2022, Andre Vieira (lists) wrote:
> >
> >> On 24/10/2022 08:17, Richard Biener wrote:
> >>> Can you check why vect_find_stmt_data_reference doesn't trip on the
> >>>
> >>> if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
> >>> && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
> >>>   {
> >>> free_data_ref (dr);
> >>> return opt_result::failure_at (stmt,
> >>>"not vectorized:"
> >>>" statement is an unsupported"
> >>>" bitfield access %G", stmt);
> >>>   }
> >> It used to, which is why this test didn't trigger the error before my
> >> patch,
> >> but we lower it to BIT_FIELD_REFs in ifcvt now so it is no longer a
> >> DECL_BIT_FIELD.
> >>
> >> But that is a red-herring, if you change the test structure's 'type Int24
> >> is
> >> mod 2**24;' to 'type Int24 is mod 2**32;', thus making the field we access
> >> a
> >> normal 32-bit integer, the field no longer is a DECL_BIT_FIELD and thus my
> >> lowering does nothing. However, you will still get the failure because the
> >> field before it is a packed 4-bit field, making the offset to the field we
> >> are
> >> accessing less than BITS_PER_UNIT.
> > Hmm, so the _intent_ of DECL_BIT_FIELD_REPRESENTATIVE is to definitely
> > _not_ be a DECL_BIT_FIELD (well, that's the whole point!).   So this
> > shows an issue with setting up DECL_BIT_FIELD_REPRESENTATIVE?  Of course
> > for a type with an alignment less than BITS_PER_UNIT (is StructB actually
> > such a type?) there cannot be a representative that isn't, so maybe
> > we should then set DECL_BIT_FIELD on it with a condition like Eric
> > mentions?
> I could do this, but it would not resolve the latent issue as I could still
> reproduce it without using any of the bitfield lowering code, see below.
> >
> >>> ?  I think we should amend this check and I guess that
> >>> checking multiple_p on DECL_FIELD_BIT_OFFSET should be enough?
> >> That won't work either, unless we do the same walk-through the full access
> >> as
> >> we do in get_inner_reference.
> > I suppose we should not "if-convert" bit field accesses with a
> > DECL_BIT_FIELD representative.  There isn't any benefit doing that
> > (not for general bitfield lowering either).
> Changing if-convert would merely change this testcase but we could still
> trigger using a different structure type, changing the size of Int24 to 32
> bits rather than 24:
> package Loop_Optimization23_Pkg is
>   type Nibble is mod 2**4;
>   type Int24  is mod 2**32;  -- Changed this from 24->32
>   type StructA is record
>     a : Nibble;
>     b : Int24;
>   end record;
>   pragma Pack(StructA);
>   type StructB is record
>     a : Nibble;
>     b : StructA;
>   end record;
>   pragma Pack(StructB);
>   type ArrayOfStructB is array(0..100) of StructB;
>   procedure Foo (X : in out ArrayOfStructB);
> end Loop_Optimization23_Pkg;
> 
> This would yield a DR_REF (dr): (*x_7(D))[_1].b.b  where the last 'b' isn't a
> DECL_BIT_FIELD anymore, but the first one still is and still has the
> non-multiple of BITS_PER_UNIT offset. Thus passing the
> vect_find_stmt_data_reference check and triggering the
> vect_check_gather_scatter failure. So unless we go and make sure we always set
> the DECL_BIT_FIELD on all subsequent accesses of a DECL_BIT_FIELD 'struct'
> (which is odd enough on its own) then we are better off catching the issue in
> vect_check_gather_scatter ?

But it's not only an issue with scatter-gather, other load/store handling
assumes it can create a pointer to the start of the access and thus
requires BITS_PER_UNIT alignment for each of them.  So we need to fail
at data-ref analysis somehow.

Richard.

Re: vect: Make vect_check_gather_scatter reject offsets that aren't multiples of BITS_PER_UNIT [PR107346]

2022-10-24 Thread Andre Vieira (lists) via Gcc-patches




On 24/10/2022 13:46, Richard Biener wrote:

On Mon, 24 Oct 2022, Andre Vieira (lists) wrote:


On 24/10/2022 08:17, Richard Biener wrote:

Can you check why vect_find_stmt_data_reference doesn't trip on the

if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
&& DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
  {
free_data_ref (dr);
return opt_result::failure_at (stmt,
   "not vectorized:"
   " statement is an unsupported"
   " bitfield access %G", stmt);
  }

It used to, which is why this test didn't trigger the error before my patch,
but we lower it to BIT_FIELD_REFs in ifcvt now so it is no longer a
DECL_BIT_FIELD.

But that is a red-herring, if you change the test structure's 'type Int24 is
mod 2**24;' to 'type Int24 is mod 2**32;', thus making the field we access a
normal 32-bit integer, the field no longer is a DECL_BIT_FIELD and thus my
lowering does nothing. However, you will still get the failure because the
field before it is a packed 4-bit field, making the offset to the field we are
accessing less than BITS_PER_UNIT.

Hmm, so the _intent_ of DECL_BIT_FIELD_REPRESENTATIVE is to definitely
_not_ be a DECL_BIT_FIELD (well, that's the whole point!).   So this
shows an issue with setting up DECL_BIT_FIELD_REPRESENTATIVE?  Of course
for a type with an alignment less than BITS_PER_UNIT (is StructB actually
such a type?) there cannot be a representative that isn't, so maybe
we should then set DECL_BIT_FIELD on it with a condition like Eric
mentions?
I could do this, but it would not resolve the latent issue as I could 
still reproduce it without using any of the bitfield lowering code, see 
below.



?  I think we should amend this check and I guess that
checking multiple_p on DECL_FIELD_BIT_OFFSET should be enough?

That won't work either, unless we do the same walk-through the full access as
we do in get_inner_reference.

I suppose we should not "if-convert" bit field accesses with a
DECL_BIT_FIELD representative.  There isn't any benefit doing that
(not for general bitfield lowering either).
Changing if-convert would merely change this testcase but we could still 
trigger using a different structure type, changing the size of Int24 to 
32 bits rather than 24:

package Loop_Optimization23_Pkg is
  type Nibble is mod 2**4;
  type Int24  is mod 2**32;  -- Changed this from 24->32
  type StructA is record
    a : Nibble;
    b : Int24;
  end record;
  pragma Pack(StructA);
  type StructB is record
    a : Nibble;
    b : StructA;
  end record;
  pragma Pack(StructB);
  type ArrayOfStructB is array(0..100) of StructB;
  procedure Foo (X : in out ArrayOfStructB);
end Loop_Optimization23_Pkg;

This would yield a DR_REF (dr): (*x_7(D))[_1].b.b  where the last 'b' 
isn't a DECL_BIT_FIELD anymore, but the first one still is and still has 
the non-multiple of BITS_PER_UNIT offset. Thus passing the 
vect_find_stmt_data_reference check and triggering the 
vect_check_gather_scatter failure. So unless we go and make sure we 
always set the DECL_BIT_FIELD on all subsequent accesses of a 
DECL_BIT_FIELD 'struct' (which is odd enough on its own) then we are 
better off catching the issue in vect_check_gather_scatter ?

Re: [Patch] OpenMP: Fix reverse offload GOMP_TARGET_REV IFN corner cases [PR107236]

On Tue, Oct 18, 2022 at 09:27:04PM +0200, Tobias Burnus wrote:
> The cgraph_node::create_clone issue is exposed with -O2 for the existing
> libgomp.fortran/reverse-offload-1.f90.
> 
> omp-offload.cc
> 
>   PR middle-end/107236
> 
> gcc/ChangeLog:
>   * omp-expand.cc (expand_omp_target): Set calls_declare_variant_alt
>   in DECL_CONTEXT and not to cfun->decl.
>   * cgraphclones.cc (cgraph_node::create_clone): Copy also the
>   node's calls_declare_variant_alt value.
> 
> gcc/testsuite/ChangeLog:
>   * gfortran.dg/gomp/target-device-ancestor-6.f90: New test.

LGTM, thanks.

Jakub

Re: vect: Fix wrong shift_n after widening on BE [PR107338]

On Mon, Oct 24, 2022 at 12:43 PM Kewen.Lin  wrote:
>
> Hi,
>
> As PR107338 shows, with the use of widening loads, the
> container_type can become a wider type, it causes us to
> get wrong shift_n since the BIT_FIELD_REF offset actually
> becomes bigger on BE.  Taking the case in PR107338 as
> example, at the beginning the container type is short and
> BIT_FIELD_REF offset is 8 and size is 4, with unpacking to
> wider type int, the high 16 bits are zero, by viewing it
> as type int, its offset actually becomes to 24.  So the
> shift_n should be 4 (32 - 24 - 4) instead of 20 (32 - 8
> - 4).
>
> I noticed that if we move shift_n calculation early
> before the adjustments for widening loads (container type
> change), it's based on all the stuffs of the original
> container, the shfit_n calculated there is exactly what
> we want, it can be independent of widening.  Besides, I
> add prec adjustment together with the current adjustments
> for widening loads, although prec's subsequent uses don't
> require this change for now, since the container type gets
> changed, we should keep the corresponding prec consistent.
>
> Bootstrapped and regtested on x86_64-redhat-linux,
> aarch64-linux-gnu, powerpc64-linux-gnu P7 and P8 and
> powerpc64le-linux-gnu P9 and P10.
>
> Is it ok for trunk?

OK.

Richard.

> BR,
> Kewen
> -
>
> PR tree-optimization/107338
>
> gcc/ChangeLog:
>
> * tree-vect-patterns.cc (vect_recog_bitfield_ref_pattern): Move
> shfit_n calculation before the adjustments for widening loads.
> ---
>  gcc/tree-vect-patterns.cc | 17 +++--
>  1 file changed, 11 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 777ba2f5903..01094e8cb86 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1925,6 +1925,16 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>tree container_type = TREE_TYPE (container);
>tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
>
> +  /* Calculate shift_n before the adjustments for widening loads, otherwise
> + the container may change and we have to consider offset change for
> + widening loads on big endianness.  The shift_n calculated here can be
> + independent of widening.  */
> +  unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
> +  unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
> +  unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
> +  if (BYTES_BIG_ENDIAN)
> +shift_n = prec - shift_n - mask_width;
> +
>/* We move the conversion earlier if the loaded type is smaller than the
>   return type to enable the use of widening loads.  */
>if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type)
> @@ -1935,6 +1945,7 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>NOP_EXPR, container);
>container = gimple_get_lhs (pattern_stmt);
>container_type = TREE_TYPE (container);
> +  prec = tree_to_uhwi (TYPE_SIZE (container_type));
>vectype = get_vectype_for_scalar_type (vinfo, container_type);
>append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
>  }
> @@ -1953,12 +1964,6 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
> stmt_vec_info stmt_info,
> shift_first = false;
>  }
>
> -  unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
> -  unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
> -  unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
> -  if (BYTES_BIG_ENDIAN)
> -shift_n = prec - shift_n - mask_width;
> -
>/* If we don't have to shift we only generate the mask, so just fix the
>   code-path to shift_first.  */
>if (shift_n == 0)
> --
> 2.35.4

Re: vect: Make vect_check_gather_scatter reject offsets that aren't multiples of BITS_PER_UNIT [PR107346]

On Mon, 24 Oct 2022, Andre Vieira (lists) wrote:

> 
> On 24/10/2022 08:17, Richard Biener wrote:
> >
> > Can you check why vect_find_stmt_data_reference doesn't trip on the
> >
> >if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
> >&& DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
> >  {
> >free_data_ref (dr);
> >return opt_result::failure_at (stmt,
> >   "not vectorized:"
> >   " statement is an unsupported"
> >   " bitfield access %G", stmt);
> >  }
> 
> It used to, which is why this test didn't trigger the error before my patch,
> but we lower it to BIT_FIELD_REFs in ifcvt now so it is no longer a
> DECL_BIT_FIELD.
> 
> But that is a red-herring, if you change the test structure's 'type Int24 is
> mod 2**24;' to 'type Int24 is mod 2**32;', thus making the field we access a
> normal 32-bit integer, the field no longer is a DECL_BIT_FIELD and thus my
> lowering does nothing. However, you will still get the failure because the
> field before it is a packed 4-bit field, making the offset to the field we are
> accessing less than BITS_PER_UNIT.

Hmm, so the _intent_ of DECL_BIT_FIELD_REPRESENTATIVE is to definitely
_not_ be a DECL_BIT_FIELD (well, that's the whole point!).   So this
shows an issue with setting up DECL_BIT_FIELD_REPRESENTATIVE?  Of course
for a type with an alignment less than BITS_PER_UNIT (is StructB actually
such a type?) there cannot be a representative that isn't, so maybe
we should then set DECL_BIT_FIELD on it with a condition like Eric
mentions?

> > ?  I think we should amend this check and I guess that
> > checking multiple_p on DECL_FIELD_BIT_OFFSET should be enough?
> That won't work either, unless we do the same walk-through the full access as
> we do in get_inner_reference.

I suppose we should not "if-convert" bit field accesses with a
DECL_BIT_FIELD representative.  There isn't any benefit doing that
(not for general bitfield lowering either).

Richard.

> Let me elaborate, the 'offending' stmt here is:
> _ifc__23 = (*x_7(D))[_1].b.D.3707;
> 
> And the struct in question is:
> package Loop_Optimization23_Pkg is
>   type Nibble is mod 2**4;
>   type Int24  is mod 2**24;
>   type StructA is record
>     a : Nibble;
>     b : Int24;
>   end record;
>   pragma Pack(StructA);
>   type StructB is record
>     a : Nibble;
>     b : StructA;
>   end record;
>   pragma Pack(StructB);
>   type ArrayOfStructB is array(0..100) of StructB;
>   procedure Foo (X : in out ArrayOfStructB);
> end Loop_Optimization23_Pkg;
> 
> That D.3707 is the 'container'm i.e. the DECL_BIT_FIELD_REPRESENTATIVE of the
> original bitfield of type Int24.
> So in vect_find_stmt_data_reference , the dr is: (*x_7(D))[_1].b.D.3707 and
> TREE_OPERAND (DR_REF (dr), 1): D.3707,
> which has DECL_FIELD_BIT_OFFSET: 0
> 
> So that check would also pass. However, get_inner_reference, walks the full
> access and comes across '.b', the member access for StructA inside StructB,
> that has DECL_FIELD_BIT_OFFSET: 4
> Which is where we get into trouble. So to catch that here, we would need to do
> the same type of walking through all the member accesses, like
> get_inner_reference does.
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

Re: [PATCH] c++ modules: verify_type failure with typedef enum [PR106848]

2022-10-24 Thread Nathan Sidwell via Gcc-patches


On 10/21/22 09:11, Patrick Palka wrote:

On Fri, 21 Oct 2022, Nathan Sidwell wrote:




Thanks for the explanation, it's a situation I didn;t anticipate and your fix
is good.  Could you add a comment about why you need to propagate the values
though?


Thanks a lot, will do.  Just to make sure since there are multiple
solutions proposed, do you prefer to go with
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603487.html
or
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603831.html ?

Both solutions fix the PR106848 issue (empty TYPE_MIN/MAX_VALUE on an
enum type variant), but the latter also fixes the related PR102600
(empty TYPE_MIN/MAX_VALUE on the main variant of an enum with no
enumerators).  (We could maybe even combine the two solutions: stream
TYPE_MIN/MAX_VALUE as part of ENUMERAL_TYPE, and also update TYPE_VALUES
of each variant during trees_in::read_enum_def)



Oh, let's go with the latter:

* module.cc (trees_out::core_vals): Stream TYPE_MAX_VALUE and
TYPE_MIN_VALUE of ENUMERAL_TYPE.
(trees_in::core_vals): Likewise.
(trees_out::write_enum_def): Don't stream them here.
(trees_in::read_enum_def): Likewise.

but, again, some comments -- at the new streaming point, and in the defn 
streamer were we no longer stream them.


thanks.





nathan






A somewhat orthogonal issue (that incidentally fixes this testcase) is
that we stream TYPE_MIN/MAX_VALUE only for enums with a definition, but
the frontend sets these fields even for opaque enums.  If we make sure
to stream these fields for all ENUMERAL_TYPEs, then we won't have to
worry about these fields being stale for variants that may have been
created before reading in the enum definition (their TYPE_VALUES field
will still be stale I guess, but verify_type doesn't worry about that
it seems, so we avoid the ICE).

patch to that effect is at
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603831.html



Richard.



 rest_of_type_compilation (type, DECL_NAMESPACE_SCOPE_P
(defn));
   }
diff --git a/gcc/testsuite/g++.dg/modules/enum-9_a.H
b/gcc/testsuite/g++.dg/modules/enum-9_a.H
new file mode 100644
index 000..fb7d10ad3b6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/enum-9_a.H
@@ -0,0 +1,5 @@
+// PR c++/106848
+// { dg-additional-options -fmodule-header }
+// { dg-module-cmi {} }
+
+typedef enum memory_order { memory_order_seq_cst } memory_order;
diff --git a/gcc/testsuite/g++.dg/modules/enum-9_b.C
b/gcc/testsuite/g++.dg/modules/enum-9_b.C
new file mode 100644
index 000..63e81675d0a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/enum-9_b.C
@@ -0,0 +1,6 @@
+// PR c++/106848
+// { dg-additional-options "-fmodules-ts -g" }
+
+import "enum-9_a.H";
+
+memory_order x = memory_order_seq_cst;
--
2.38.0.68.ge85701b4af













--
Nathan Sidwell






--
Nathan Sidwell

Re: [PATCH] Relax assertion in profile.cc

On Mon, Oct 24, 2022 at 10:26 AM Eric Botcazou via Gcc-patches
 wrote:
>
> Hi,
>
> this assertion in branch_prob:
>
>   if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
> {
>   location_t loc = DECL_SOURCE_LOCATION (current_function_decl);
>   gcc_checking_assert (!RESERVED_LOCATION_P (loc));
>
> had been correct until:
>
> 2021-08-11  Bernd Edlinger  
>
> PR debug/101598
> * gcc-interface/trans.c (Subprogram_Body_to_gnu): Set the
> DECL_SOURCE_LOCATION of DECL_IGNORED_P gnu_subprog_decl to
> UNKNOWN_LOCATION.
>
> was installed.
>
> Tested on x86-64/Linux, OK for mainline and 12 branch?

OK.

>
> 2022-10-24  Eric Botcazou  
>
> * profile.cc (branch_prob): Be prepared for ignored functions with
> DECL_SOURCE_LOCATION set to UNKNOWN_LOCATION.
>
>
> 2022-10-24  Eric Botcazou  
>
> * gnat.dg/specs/coverage1.ads: New test.
>
> --
> Eric Botcazou

Re: [PATCH] Add -gcodeview option

2022-10-24 Thread Martin Storsjö


On Mon, 24 Oct 2022, Mark Harmstone wrote:


Both current lld and the next version of ld have an option -pdb, which
creates a PDB file which Microsoft's debuggers can use. This patch adds
a -gcodeview option, which passes this to the linker.

I do intend to expand this so it also creates the .debug$S and .debug$T
sections which would make this useful - I submitted patches for this a
while back, but they need to be rewritten to parse the DWARF DIEs rather
than using debug_hooks.

Clang also has -gcodeview, but AFAICS only uses it for .debug$S and
.debug$T, and doesn't use it for linker options (though IMO it probably
should).


That's true - in Clang, this option doesn't affect linking, it only 
affects code generation.


(FWIW, if I understand it correctly, Clang also does support generating 
both DWARF and CodeView at the same time - I think that would require 
passing something like "-g -gdwarf-4 -gcodeview" at the same time - but I 
don't have experience with playing with such setups.)


Another vague oddity in how this option is handled in Clang, is that if I 
only pass "-gcodeview" to the compiler, it doesn't actually generate any 
debug info (it just changes preference, in case I would request debug info 
separately), while one has to pass e.g. "-g -gcodeview" for it to do 
what's expected. I'm not sure if this is the same with dwarf, or if 
passing "-gdwarf-4" is enough for actually enabling generating dwarf debug 
info too. In any case, I don't think this aspect needs to be matched 
closely (unless dwarf does the same), as any existing users of PDB 
generation do use "-g -gcodeview", so as long as that case works, there 
shouldn't be any interop issues.



---
gcc/common.opt  | 4 
gcc/doc/invoke.texi | 7 +++
gcc/gcc.cc  | 4 
gcc/opts.cc | 3 +++
4 files changed, 18 insertions(+)

@@ -4608,6 +4608,10 @@ driver_handle_option (struct gcc_options *opts,
  do_save = false;
  break;

+case OPT_gcodeview:
+  add_infile ("-pdb=", "*");
+  break;


Hmm, what does this end up passing to the linker in the end - does it just 
pass "-pdb="? (What does the "*" parameter do here?) If that's the case - 
that sounds reasonable - assuming that if a user passes an extra 
-Wl,--pdb,myspecificname.pdb, that would take precedence (i.e. be passed 
after the compiler's default one).


// Martin

vect: Fix wrong shift_n after widening on BE [PR107338]

2022-10-24 Thread Kewen.Lin via Gcc-patches

Hi,

As PR107338 shows, with the use of widening loads, the
container_type can become a wider type, it causes us to
get wrong shift_n since the BIT_FIELD_REF offset actually
becomes bigger on BE.  Taking the case in PR107338 as
example, at the beginning the container type is short and
BIT_FIELD_REF offset is 8 and size is 4, with unpacking to
wider type int, the high 16 bits are zero, by viewing it
as type int, its offset actually becomes to 24.  So the
shift_n should be 4 (32 - 24 - 4) instead of 20 (32 - 8
- 4).

I noticed that if we move shift_n calculation early
before the adjustments for widening loads (container type
change), it's based on all the stuffs of the original
container, the shfit_n calculated there is exactly what
we want, it can be independent of widening.  Besides, I
add prec adjustment together with the current adjustments
for widening loads, although prec's subsequent uses don't
require this change for now, since the container type gets
changed, we should keep the corresponding prec consistent.

Bootstrapped and regtested on x86_64-redhat-linux,
aarch64-linux-gnu, powerpc64-linux-gnu P7 and P8 and
powerpc64le-linux-gnu P9 and P10.

Is it ok for trunk?

BR,
Kewen
-

PR tree-optimization/107338

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_bitfield_ref_pattern): Move
shfit_n calculation before the adjustments for widening loads.
---
 gcc/tree-vect-patterns.cc | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 777ba2f5903..01094e8cb86 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -1925,6 +1925,16 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
stmt_vec_info stmt_info,
   tree container_type = TREE_TYPE (container);
   tree vectype = get_vectype_for_scalar_type (vinfo, container_type);

+  /* Calculate shift_n before the adjustments for widening loads, otherwise
+ the container may change and we have to consider offset change for
+ widening loads on big endianness.  The shift_n calculated here can be
+ independent of widening.  */
+  unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
+  unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
+  unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
+  if (BYTES_BIG_ENDIAN)
+shift_n = prec - shift_n - mask_width;
+
   /* We move the conversion earlier if the loaded type is smaller than the
  return type to enable the use of widening loads.  */
   if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type)
@@ -1935,6 +1945,7 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
stmt_vec_info stmt_info,
   NOP_EXPR, container);
   container = gimple_get_lhs (pattern_stmt);
   container_type = TREE_TYPE (container);
+  prec = tree_to_uhwi (TYPE_SIZE (container_type));
   vectype = get_vectype_for_scalar_type (vinfo, container_type);
   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
 }
@@ -1953,12 +1964,6 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, 
stmt_vec_info stmt_info,
shift_first = false;
 }

-  unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
-  unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
-  unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
-  if (BYTES_BIG_ENDIAN)
-shift_n = prec - shift_n - mask_width;
-
   /* If we don't have to shift we only generate the mask, so just fix the
  code-path to shift_first.  */
   if (shift_n == 0)
--
2.35.4

Re: vect: Make vect_check_gather_scatter reject offsets that aren't multiples of BITS_PER_UNIT [PR107346]

2022-10-24 Thread Andre Vieira (lists) via Gcc-patches




On 24/10/2022 08:17, Richard Biener wrote:


Can you check why vect_find_stmt_data_reference doesn't trip on the

   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
   && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
 {
   free_data_ref (dr);
   return opt_result::failure_at (stmt,
  "not vectorized:"
  " statement is an unsupported"
  " bitfield access %G", stmt);
 }


It used to, which is why this test didn't trigger the error before my 
patch, but we lower it to BIT_FIELD_REFs in ifcvt now so it is no longer 
a DECL_BIT_FIELD.


But that is a red-herring, if you change the test structure's 'type 
Int24 is mod 2**24;' to 'type Int24 is mod 2**32;', thus making the 
field we access a normal 32-bit integer, the field no longer is a 
DECL_BIT_FIELD and thus my lowering does nothing. However, you will 
still get the failure because the field before it is a packed 4-bit 
field, making the offset to the field we are accessing less than 
BITS_PER_UNIT.



?  I think we should amend this check and I guess that
checking multiple_p on DECL_FIELD_BIT_OFFSET should be enough?
That won't work either, unless we do the same walk-through the full 
access as we do in get_inner_reference.


Let me elaborate, the 'offending' stmt here is:
_ifc__23 = (*x_7(D))[_1].b.D.3707;

And the struct in question is:
package Loop_Optimization23_Pkg is
  type Nibble is mod 2**4;
  type Int24  is mod 2**24;
  type StructA is record
    a : Nibble;
    b : Int24;
  end record;
  pragma Pack(StructA);
  type StructB is record
    a : Nibble;
    b : StructA;
  end record;
  pragma Pack(StructB);
  type ArrayOfStructB is array(0..100) of StructB;
  procedure Foo (X : in out ArrayOfStructB);
end Loop_Optimization23_Pkg;

That D.3707 is the 'container'm i.e. the DECL_BIT_FIELD_REPRESENTATIVE 
of the original bitfield of type Int24.

So in vect_find_stmt_data_reference , the dr is: (*x_7(D))[_1].b.D.3707 and
TREE_OPERAND (DR_REF (dr), 1): D.3707,
which has DECL_FIELD_BIT_OFFSET: 0

So that check would also pass. However, get_inner_reference, walks the 
full access and comes across '.b', the member access for StructA inside 
StructB, that has DECL_FIELD_BIT_OFFSET: 4
Which is where we get into trouble. So to catch that here, we would need 
to do the same type of walking through all the member accesses, like 
get_inner_reference does.

Re: [PATCH] RISC-V: Support --target-help for -mcpu/-mtune

2022-10-24 Thread Kito Cheng via Gcc-patches

committed.

On Fri, Sep 30, 2022 at 10:06 AM Kito Cheng  wrote:
>
> gcc/ChangeLog:
>
> * common/config/riscv/riscv-common.cc (riscv_tunes): New.
> (riscv_get_valid_option_values): New.
> (TARGET_GET_VALID_OPTION_VALUES): New.
> * config/riscv/riscv-cores.def (RISCV_TUNE): New, define options
> for tune here.
> (RISCV_CORE): Fix comment.
> * config/riscv/riscv.cc (riscv_tune_info_table): Move definition to
> riscv-cores.def.
> ---
>  gcc/common/config/riscv/riscv-common.cc | 46 +
>  gcc/config/riscv/riscv-cores.def| 35 ---
>  gcc/config/riscv/riscv.cc   |  9 ++---
>  3 files changed, 80 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/common/config/riscv/riscv-common.cc 
> b/gcc/common/config/riscv/riscv-common.cc
> index c39ed2e2696..697bfe435c8 100644
> --- a/gcc/common/config/riscv/riscv-common.cc
> +++ b/gcc/common/config/riscv/riscv-common.cc
> @@ -224,6 +224,14 @@ static const riscv_cpu_info riscv_cpu_tables[] =
>  {NULL, NULL, NULL}
>  };
>
> +static const char *riscv_tunes[] =
> +{
> +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
> +TUNE_NAME,
> +#include "../../../config/riscv/riscv-cores.def"
> +NULL
> +};
> +
>  static const char *riscv_supported_std_ext (void);
>
>  static riscv_subset_list *current_subset_list = NULL;
> @@ -1683,6 +1691,41 @@ riscv_compute_multilib (
>  return xstrdup (multilib_infos[best_match_multi_lib].path.c_str ());
>  }
>
> +vec
> +riscv_get_valid_option_values (int option_code,
> +  const char *prefix ATTRIBUTE_UNUSED)
> +{
> +  vec v;
> +  v.create (0);
> +  opt_code opt = (opt_code) option_code;
> +
> +  switch (opt)
> +{
> +case OPT_mtune_:
> +  {
> +   const char **tune = _tunes[0];
> +   for (;*tune; ++tune)
> + v.safe_push (*tune);
> +
> +   const riscv_cpu_info *cpu_info = _cpu_tables[0];
> +   for (;cpu_info->name; ++cpu_info)
> + v.safe_push (cpu_info->name);
> +  }
> +  break;
> +case OPT_mcpu_:
> +  {
> +   const riscv_cpu_info *cpu_info = _cpu_tables[0];
> +   for (;cpu_info->name; ++cpu_info)
> + v.safe_push (cpu_info->name);
> +  }
> +  break;
> +default:
> +  break;
> +}
> +
> +  return v;
> +}
> +
>  #undef TARGET_COMPUTE_MULTILIB
>  #define TARGET_COMPUTE_MULTILIB riscv_compute_multilib
>  #endif
> @@ -1701,4 +1744,7 @@ static const struct default_options 
> riscv_option_optimization_table[] =
>  #undef TARGET_HANDLE_OPTION
>  #define TARGET_HANDLE_OPTION riscv_handle_option
>
> +#undef  TARGET_GET_VALID_OPTION_VALUES
> +#define TARGET_GET_VALID_OPTION_VALUES riscv_get_valid_option_values
> +
>  struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
> diff --git a/gcc/config/riscv/riscv-cores.def 
> b/gcc/config/riscv/riscv-cores.def
> index ecb5e213d98..b84ad999ac1 100644
> --- a/gcc/config/riscv/riscv-cores.def
> +++ b/gcc/config/riscv/riscv-cores.def
> @@ -17,19 +17,46 @@
> along with GCC; see the file COPYING3.  If not see
> .  */
>
> +/* This is a list of tune that implement RISC-V.
> +
> +   Before using #include to read this file, define a macro:
> +
> +  RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
> +
> +   The TUNE_NAME is the name of the micro-arch, represented as a string.
> +   The PIPELINE_MODEL is the pipeline model of the micro-arch, represented 
> as a
> +   string, defined in riscv.md.
> +   The TUNE_INFO is the detail cost model for this core, represented as an
> +   identifier, reference to riscv.cc.  */
> +
> +#ifndef RISCV_TUNE
> +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
> +#endif
> +
> +RISCV_TUNE("rocket", generic, rocket_tune_info)
> +RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
> +RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
> +RISCV_TUNE("sifive-7-series", generic, sifive_7_tune_info)
> +RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> +RISCV_TUNE("size", generic, optimize_size_tune_info)
> +
> +#undef RISCV_TUNE
> +
>  /* This is a list of cores that implement RISC-V.
>
> Before using #include to read this file, define a macro:
>
> -  RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH, TUNE_INFO)
> +  RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH)
>
> The CORE_NAME is the name of the core, represented as a string.
> The ARCH is the default arch of the core, represented as a string,
> can be NULL if no default arch.
> The MICRO_ARCH is the name of the core for which scheduling decisions
> -   will be made, represented as an identifier.
> -   The TUNE_INFO is the detail cost model for this core, represented as an
> -   identifier, reference to riscv-tunes.def.  */
> +   will be made, represented as an identifier.  */
> +
> +#ifndef RISCV_CORE
> +#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH)
> +#endif
>
>  RISCV_CORE("sifive-e20",

Re: [PATCH 1/2] Add a parameter for the builtin function of prefetch to align with LLVM

2022-10-24 Thread Richard Sandiford via Gcc-patches

Segher Boessenkool  writes:
> On Thu, Oct 20, 2022 at 07:34:13AM +, Jiang, Haochen wrote:
>> > > +  /* Argument 3 must be either zero or one.  */
>> > > +  if (INTVAL (op3) != 0 && INTVAL (op3) != 1)
>> > > +{
>> > > +  warning (0, "invalid fourth argument to %<__builtin_prefetch%>;"
>> > > +" using one");
>> > 
>> > "using 1" makes sense maybe, but "using one" reads as "using an
>> > argument", not very sane.
>> > 
>> > An error would be better here anyway?
>> 
>> Will change to 1 to avoid confusion in that. The reason why this is a warning
>> is because previous ones related to constant arguments out of range in 
>> prefetch
>> are also using warning.
>
> Please don't repeat historical mistakes.  You might not want to fix the
> existing code (since that can in theory break existing user code), but
> that is not a reason to punish users of a new feature as well ;-)

I agree an error would be appropriate for something like
__builtin_clear_cache.  But __builtin_prefetch is a hint only.
Nothing should break if the compiler simply evaluates the arguments
and does nothing else.

Using a warning in that situation means that, if the ranges of
parameters are increased in future, older compilers won't needlessly
reject new code.

So personally I think we should stick with the current choice
of a default-on warning.

Thanks,
Richard

[PATCH] RISC-V: Add h extension support

2022-10-24 Thread Kito Cheng

`h` was the prefix of multi-letter extension name, but it become a
extension in later RISC-V isa spec.

Fortunately we don't have any extension really defined is prefixed
with `h`, so we can just change that.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_ext_version_table):
Add `h`.
(riscv_supported_std_ext): Ditto.
(multi_letter_subset_rank): Remove `h`.
(riscv_subset_list::parse_std_ext): Handle `h` as single letter
extension.
(riscv_subset_list::parse): Ditto.

gcc/testsuite/ChangeLog:

* testsuite/gcc.target/riscv/arch-18.c: New.
* testsuite/gcc.target/riscv/arch-5.c: Remove test for prefixed
with `h`.
* testsuite/gcc.target/riscv/predef-23.c: New.
---
 gcc/common/config/riscv/riscv-common.cc| 23 +++-
 gcc/testsuite/gcc.target/riscv/arch-18.c   |  5 ++
 gcc/testsuite/gcc.target/riscv/arch-5.c|  2 +-
 gcc/testsuite/gcc.target/riscv/predef-23.c | 63 ++
 4 files changed, 77 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/arch-18.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-23.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index c39ed2e2696..595cff03cdc 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -145,6 +145,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"c", ISA_SPEC_CLASS_20190608, 2, 0},
   {"c", ISA_SPEC_CLASS_2P2,  2, 0},
 
+  {"h",   ISA_SPEC_CLASS_NONE, 1, 0},
+
   {"v",   ISA_SPEC_CLASS_NONE, 1, 0},
 
   {"zicsr", ISA_SPEC_CLASS_20191213, 2, 0},
@@ -353,21 +355,18 @@ multi_letter_subset_rank (const std::string )
   gcc_assert (subset.length () >= 2);
   int high_order = -1;
   int low_order = 0;
-  /* The order between multi-char extensions: s -> h -> z -> x.  */
+  /* The order between multi-char extensions: s -> z -> x.  */
   char multiletter_class = subset[0];
   switch (multiletter_class)
 {
 case 's':
   high_order = 0;
   break;
-case 'h':
-  high_order = 1;
-  break;
 case 'z':
-  high_order = 2;
+  high_order = 1;
   break;
 case 'x':
-  high_order = 3;
+  high_order = 2;
   break;
 default:
   gcc_unreachable ();
@@ -663,7 +662,7 @@ riscv_subset_list::lookup (const char *subset, int 
major_version,
 static const char *
 riscv_supported_std_ext (void)
 {
-  return "mafdqlcbkjtpvn";
+  return "mafdqlcbkjtpvnh";
 }
 
 /* Parsing subset version.
@@ -822,7 +821,7 @@ riscv_subset_list::parse_std_ext (const char *p)
 {
   char subset[2] = {0, 0};
 
-  if (*p == 'x' || *p == 's' || *p == 'h' || *p == 'z')
+  if (*p == 'x' || *p == 's' || *p == 'z')
break;
 
   if (*p == '_')
@@ -947,7 +946,7 @@ riscv_subset_list::handle_combine_ext ()
 
Arguments:
  `p`: Current parsing position.
- `ext_type`: What kind of extensions, 's', 'h', 'z' or 'x'.
+ `ext_type`: What kind of extensions, 's', 'z' or 'x'.
  `ext_type_str`: Full name for kind of extension.  */
 
 const char *
@@ -1086,12 +1085,6 @@ riscv_subset_list::parse (const char *arch, location_t 
loc)
   /* Parsing supervisor extension.  */
   p = subset_list->parse_multiletter_ext (p, "s", "supervisor extension");
 
-  if (p == NULL)
-goto fail;
-
-  /* Parsing hypervisor extension.  */
-  p = subset_list->parse_multiletter_ext (p, "h", "hypervisor extension");
-
   if (p == NULL)
 goto fail;
 
diff --git a/gcc/testsuite/gcc.target/riscv/arch-18.c 
b/gcc/testsuite/gcc.target/riscv/arch-18.c
new file mode 100644
index 000..bb045360ce1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-18.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32gcvh -mabi=ilp32 -mcmodel=medlow" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-5.c 
b/gcc/testsuite/gcc.target/riscv/arch-5.c
index 2a0f3b782a8..b945a643cc1 100644
--- a/gcc/testsuite/gcc.target/riscv/arch-5.c
+++ b/gcc/testsuite/gcc.target/riscv/arch-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv32isabc_hghi_zfoo_xbar -mabi=ilp32" } */
+/* { dg-options "-march=rv32isabc_zfoo_xbar -mabi=ilp32" } */
 int foo()
 {
 }
diff --git a/gcc/testsuite/gcc.target/riscv/predef-23.c 
b/gcc/testsuite/gcc.target/riscv/predef-23.c
new file mode 100644
index 000..676023f2a75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-23.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64ih_zfhmin -mabi=lp64f -mcmodel=medlow 
-misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i)
+#error "__riscv_i"
+#endif
+
+#if defined(__riscv_c)
+#error "__riscv_c"
+#endif
+
+#if defined(__riscv_e)
+#error "__riscv_e"
+#endif
+
+#if

Re: [PATCH] Support Intel CMPccXADD

2022-10-24 Thread Uros Bizjak via Gcc-patches

On Mon, Oct 24, 2022 at 11:01 AM Haochen Jiang  wrote:
>
> Hi all,
>
> I just refined CMPccXADD patch to make the enum in order intrin file
> aligned with how opcode does.
>
> Ok for trunk?
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
> * common/config/i386/cpuinfo.h (get_available_features):
> Detect cmpccxadd.
> * common/config/i386/i386-common.cc
> (OPTION_MASK_ISA2_CMPCCXADD_SET,
> OPTION_MASK_ISA2_CMPCCXADD_UNSET): New.
> (ix86_handle_option): Handle -mcmpccxadd, unset cmpccxadd when avx2
> is disabled.
> * common/config/i386/i386-cpuinfo.h (enum processor_features):
> Add FEATURE_CMPCCXADD.
> * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
> cmpccxadd.
> * config.gcc: Add cmpccxaddintrin.h.
> * config/i386/cpuid.h (bit_CMPCCXADD): New.
> * config/i386/i386-builtin-types.def:
> Add DEF_FUNCTION_TYPE(INT, PINT, INT, INT, INT)
> and DEF_FUNCTION_TYPE(LONGLONG, PLONGLONG, LONGLONG, LONGLONG, INT).
> * config/i386/i386-builtin.def (BDESC): Add new builtins.
> * config/i386/i386-c.cc (ix86_target_macros_internal): Define
> __CMPCCXADD__.
> * config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
> Add new parameter to indicate constant position.
> Handle INT_FTYPE_PINT_INT_INT_INT
> and LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT.
> * config/i386/i386-isa.def (CMPCCXADD): Add DEF_PTA(CMPCCXADD).
> * config/i386/i386-options.cc (isa2_opts): Add -mcmpccxadd.
> (ix86_valid_target_attribute_inner_p): Handle cmpccxadd.
> * config/i386/i386.opt: Add option -mcmpccxadd.
> * config/i386/sync.md (cmpccxadd_): New define insn.
> * config/i386/x86gprintrin.h: Include cmpccxaddintrin.h.
> * doc/extend.texi: Document cmpccxadd.
> * doc/invoke.texi: Document -mcmpccxadd.
> * doc/sourcebuild.texi: Document target cmpccxadd.
> * config/i386/cmpccxaddintrin.h: New file.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/other/i386-2.C: Add -mcmpccxadd.
> * g++.dg/other/i386-3.C: Ditto.
> * gcc.target/i386/avx-1.c: Add builtin define for enum.
> * gcc.target/i386/funcspec-56.inc: Add new target attribute.
> * gcc.target/i386/sse-13.c: Add builtin define for enum.
> * gcc.target/i386/sse-23.c: Ditto.
> * gcc.target/i386/x86gprintrin-1.c: Add -mcmpccxadd for 64 bit target.
> * gcc.target/i386/x86gprintrin-2.c: Add -mcmpccxadd for 64 bit target.
> Add builtin define for enum.
> * gcc.target/i386/x86gprintrin-3.c: Add -mcmpccxadd for 64 bit target.
> * gcc.target/i386/x86gprintrin-4.c: Add mcmpccxadd for 64 bit target.
> * gcc.target/i386/x86gprintrin-5.c: Add mcpmccxadd for 64 bit target.
> Add builtin define for enum.
> * gcc.target/i386/cmpccxadd-1.c: New test.
> * gcc.target/i386/cmpccxadd-2.c: New test.
> ---
>  gcc/common/config/i386/cpuinfo.h  |   2 +
>  gcc/common/config/i386/i386-common.cc |  15 ++
>  gcc/common/config/i386/i386-cpuinfo.h |   1 +
>  gcc/common/config/i386/i386-isas.h|   1 +
>  gcc/config.gcc|   3 +-
>  gcc/config/i386/cmpccxaddintrin.h |  89 +++
>  gcc/config/i386/cpuid.h   |   1 +
>  gcc/config/i386/i386-builtin-types.def|   4 +
>  gcc/config/i386/i386-builtin.def  |   4 +
>  gcc/config/i386/i386-c.cc |   2 +
>  gcc/config/i386/i386-expand.cc|  22 ++-
>  gcc/config/i386/i386-isa.def  |   1 +
>  gcc/config/i386/i386-options.cc   |   4 +-
>  gcc/config/i386/i386.opt  |   5 +
>  gcc/config/i386/sync.md   |  42 ++
>  gcc/config/i386/x86gprintrin.h|   2 +
>  gcc/doc/extend.texi   |   5 +
>  gcc/doc/invoke.texi   |  10 +-
>  gcc/doc/sourcebuild.texi  |   3 +
>  gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
>  gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
>  gcc/testsuite/gcc.target/i386/avx-1.c |   4 +
>  gcc/testsuite/gcc.target/i386/cmpccxadd-1.c   |  61 
>  gcc/testsuite/gcc.target/i386/cmpccxadd-2.c   | 138 ++
>  gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
>  gcc/testsuite/gcc.target/i386/sse-13.c|   6 +-
>  gcc/testsuite/gcc.target/i386/sse-23.c|   6 +-
>  .../gcc.target/i386/x86gprintrin-1.c  |   2 +-
>  .../gcc.target/i386/x86gprintrin-2.c  |   6 +-
>  .../gcc.target/i386/x86gprintrin-3.c  |   2 +-
>  .../gcc.target/i386/x86gprintrin-4.c  |   2 +-
>  .../gcc.target/i386/x86gprintrin-5.c  |   6 +-
>  gcc/testsuite/lib/target-supports.exp |  10 ++
>  33 files changed, 450

RE: [PATCH] Support Intel CMPccXADD

2022-10-24 Thread Jiang, Haochen via Gcc-patches

> -Original Message-
> From: Gcc-patches  bounces+haochen.jiang=intel@gcc.gnu.org> On Behalf Of Haochen Jiang
> via Gcc-patches
> Sent: Monday, October 24, 2022 5:01 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Liu, Hongtao 
> Subject: [PATCH] Support Intel CMPccXADD
> 
> Hi all,
> 
> I just refined CMPccXADD patch to make the enum in order intrin file aligned
> with how opcode does.
> 

I just found a testcase issue not fixed for the enum, will send the fixed patch
soon.

> Ok for trunk?
> 
> BRs,
> Haochen
> 
> gcc/ChangeLog:
> 
> * common/config/i386/cpuinfo.h (get_available_features):
>   Detect cmpccxadd.
>   * common/config/i386/i386-common.cc
>   (OPTION_MASK_ISA2_CMPCCXADD_SET,
>   OPTION_MASK_ISA2_CMPCCXADD_UNSET): New.
>   (ix86_handle_option): Handle -mcmpccxadd, unset cmpccxadd when
> avx2
>   is disabled.
> * common/config/i386/i386-cpuinfo.h (enum processor_features):
>   Add FEATURE_CMPCCXADD.
> * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
>   cmpccxadd.
>   * config.gcc: Add cmpccxaddintrin.h.
>   * config/i386/cpuid.h (bit_CMPCCXADD): New.
>   * config/i386/i386-builtin-types.def:
>   Add DEF_FUNCTION_TYPE(INT, PINT, INT, INT, INT)
>   and DEF_FUNCTION_TYPE(LONGLONG, PLONGLONG, LONGLONG,
> LONGLONG, INT).
>   * config/i386/i386-builtin.def (BDESC): Add new builtins.
>   * config/i386/i386-c.cc (ix86_target_macros_internal): Define
>   __CMPCCXADD__.
>   * config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
>   Add new parameter to indicate constant position.
>   Handle INT_FTYPE_PINT_INT_INT_INT
>   and LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT.
>   * config/i386/i386-isa.def (CMPCCXADD): Add DEF_PTA(CMPCCXADD).
>   * config/i386/i386-options.cc (isa2_opts): Add -mcmpccxadd.
>   (ix86_valid_target_attribute_inner_p): Handle cmpccxadd.
>   * config/i386/i386.opt: Add option -mcmpccxadd.
>   * config/i386/sync.md (cmpccxadd_): New define insn.
>   * config/i386/x86gprintrin.h: Include cmpccxaddintrin.h.
>   * doc/extend.texi: Document cmpccxadd.
>   * doc/invoke.texi: Document -mcmpccxadd.
>   * doc/sourcebuild.texi: Document target cmpccxadd.
>   * config/i386/cmpccxaddintrin.h: New file.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/other/i386-2.C: Add -mcmpccxadd.
>   * g++.dg/other/i386-3.C: Ditto.
>   * gcc.target/i386/avx-1.c: Add builtin define for enum.
>   * gcc.target/i386/funcspec-56.inc: Add new target attribute.
>   * gcc.target/i386/sse-13.c: Add builtin define for enum.
>   * gcc.target/i386/sse-23.c: Ditto.
>   * gcc.target/i386/x86gprintrin-1.c: Add -mcmpccxadd for 64 bit target.
>   * gcc.target/i386/x86gprintrin-2.c: Add -mcmpccxadd for 64 bit target.
>   Add builtin define for enum.
>   * gcc.target/i386/x86gprintrin-3.c: Add -mcmpccxadd for 64 bit target.
>   * gcc.target/i386/x86gprintrin-4.c: Add mcmpccxadd for 64 bit target.
>   * gcc.target/i386/x86gprintrin-5.c: Add mcpmccxadd for 64 bit target.
>   Add builtin define for enum.
>   * gcc.target/i386/cmpccxadd-1.c: New test.
>   * gcc.target/i386/cmpccxadd-2.c: New test.
> ---
>  gcc/common/config/i386/cpuinfo.h  |   2 +
>  gcc/common/config/i386/i386-common.cc |  15 ++
>  gcc/common/config/i386/i386-cpuinfo.h |   1 +
>  gcc/common/config/i386/i386-isas.h|   1 +
>  gcc/config.gcc|   3 +-
>  gcc/config/i386/cmpccxaddintrin.h |  89 +++
>  gcc/config/i386/cpuid.h   |   1 +
>  gcc/config/i386/i386-builtin-types.def|   4 +
>  gcc/config/i386/i386-builtin.def  |   4 +
>  gcc/config/i386/i386-c.cc |   2 +
>  gcc/config/i386/i386-expand.cc|  22 ++-
>  gcc/config/i386/i386-isa.def  |   1 +
>  gcc/config/i386/i386-options.cc   |   4 +-
>  gcc/config/i386/i386.opt  |   5 +
>  gcc/config/i386/sync.md   |  42 ++
>  gcc/config/i386/x86gprintrin.h|   2 +
>  gcc/doc/extend.texi   |   5 +
>  gcc/doc/invoke.texi   |  10 +-
>  gcc/doc/sourcebuild.texi  |   3 +
>  gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
>  gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
>  gcc/testsuite/gcc.target/i386/avx-1.c |   4 +
>  gcc/testsuite/gcc.target/i386/cmpccxadd-1.c   |  61 
>  gcc/testsuite/gcc.target/i386/cmpccxadd-2.c   | 138 ++
>  gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
>  gcc/testsuite/gcc.target/i386/sse-13.c|   6 +-
>  gcc/testsuite/gcc.target/i386/sse-23.c|   6 +-
>  .../gcc.target/i386/x86gprintrin-1.c  |   2 +-
>  .../gcc.target/i386/x86gprintrin-2.c  |   6 +-
>

[PATCH] Support Intel CMPccXADD

2022-10-24 Thread Haochen Jiang via Gcc-patches

Hi all,

I just refined CMPccXADD patch to make the enum in order intrin file
aligned with how opcode does.

Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect cmpccxadd.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_CMPCCXADD_SET,
OPTION_MASK_ISA2_CMPCCXADD_UNSET): New.
(ix86_handle_option): Handle -mcmpccxadd, unset cmpccxadd when avx2
is disabled.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_CMPCCXADD.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
cmpccxadd.
* config.gcc: Add cmpccxaddintrin.h.
* config/i386/cpuid.h (bit_CMPCCXADD): New.
* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE(INT, PINT, INT, INT, INT)
and DEF_FUNCTION_TYPE(LONGLONG, PLONGLONG, LONGLONG, LONGLONG, INT).
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__CMPCCXADD__.
* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
Add new parameter to indicate constant position.
Handle INT_FTYPE_PINT_INT_INT_INT
and LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT.
* config/i386/i386-isa.def (CMPCCXADD): Add DEF_PTA(CMPCCXADD).
* config/i386/i386-options.cc (isa2_opts): Add -mcmpccxadd.
(ix86_valid_target_attribute_inner_p): Handle cmpccxadd.
* config/i386/i386.opt: Add option -mcmpccxadd.
* config/i386/sync.md (cmpccxadd_): New define insn.
* config/i386/x86gprintrin.h: Include cmpccxaddintrin.h.
* doc/extend.texi: Document cmpccxadd.
* doc/invoke.texi: Document -mcmpccxadd.
* doc/sourcebuild.texi: Document target cmpccxadd.
* config/i386/cmpccxaddintrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mcmpccxadd.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-1.c: Add builtin define for enum.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-13.c: Add builtin define for enum.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/x86gprintrin-1.c: Add -mcmpccxadd for 64 bit target.
* gcc.target/i386/x86gprintrin-2.c: Add -mcmpccxadd for 64 bit target.
Add builtin define for enum.
* gcc.target/i386/x86gprintrin-3.c: Add -mcmpccxadd for 64 bit target.
* gcc.target/i386/x86gprintrin-4.c: Add mcmpccxadd for 64 bit target.
* gcc.target/i386/x86gprintrin-5.c: Add mcpmccxadd for 64 bit target.
Add builtin define for enum.
* gcc.target/i386/cmpccxadd-1.c: New test.
* gcc.target/i386/cmpccxadd-2.c: New test.
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  15 ++
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   1 +
 gcc/config.gcc|   3 +-
 gcc/config/i386/cmpccxaddintrin.h |  89 +++
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin-types.def|   4 +
 gcc/config/i386/i386-builtin.def  |   4 +
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-expand.cc|  22 ++-
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/sync.md   |  42 ++
 gcc/config/i386/x86gprintrin.h|   2 +
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |  10 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c |   4 +
 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c   |  61 
 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c   | 138 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-13.c|   6 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   6 +-
 .../gcc.target/i386/x86gprintrin-1.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-2.c  |   6 +-
 .../gcc.target/i386/x86gprintrin-3.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-4.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-5.c  |   6 +-
 gcc/testsuite/lib/target-supports.exp |  10 ++
 33 files changed, 450 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/i386/cmpccxaddintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c

diff --git

[PATCH] Aarch64: Do not define DONT_USE_BUILTIN_SETJMP

Hi,

we have been using an Ada compiler for the Aarch64 architecture configured 
with SJLJ exceptions as for the other architectures for some time, and have 
not run into any problems so far so the setting looks obsolete now.

OK for the mainline?


2022-10-24  Eric Botcazou  

* config/aarch64/aarch64.h (DONT_USE_BUILTIN_SETJMP): Delete.

-- 
Eric Botcazou
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 05da9af0367..e60f9bce023 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -571,10 +571,6 @@ enum class aarch64_feature : unsigned char {
 #define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
 #define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
-/* Don't use __builtin_setjmp until we've defined it.  */
-#undef DONT_USE_BUILTIN_SETJMP
-#define DONT_USE_BUILTIN_SETJMP 1
-
 #undef TARGET_COMPUTE_FRAME_LAYOUT
 #define TARGET_COMPUTE_FRAME_LAYOUT aarch64_layout_frame

[PATCH] ARM: Make ARMv8-M attribute cmse_nonsecure_call work in Ada

Hi,

until most other machine attributes, this one does not work in Ada because,
while it applies to pointer-to-function types, it is explicitly marked as
requiring declarations in the implementation.

Now, in Ada, machine attributes are specified like this:

  type Non_Secure is access procedure;
  pragma Machine_Attribute (Non_Secure, "cmse_nonsecure_call");

i.e. not attached to the declaration of Non_Secure (testcase attached).

So the attached patch extends the support to Ada by also accepting
pointer-to-function types in the handler.

Tested on arm-eabi, OK for the mainline?


2022-10-24  Eric Botcazou  

* config/arm/arm.cc (arm_attribute_table) : Change
decl_required field to false.
(arm_handle_cmse_nonsecure_call): Deal with a TYPE node.


-- 
Eric Botcazoudiff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index ee8f1babf8a..fc96ed9cce4 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -375,7 +375,7 @@ static const struct attribute_spec arm_attribute_table[] =
   /* ARMv8-M Security Extensions support.  */
   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 arm_handle_cmse_nonsecure_entry, NULL },
-  { "cmse_nonsecure_call", 0, 0, true, false, false, true,
+  { "cmse_nonsecure_call", 0, 0, false, false, false, true,
 arm_handle_cmse_nonsecure_call, NULL },
   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
   { NULL, 0, 0, false, false, false, false, NULL, NULL }
@@ -7605,8 +7605,8 @@ arm_handle_cmse_nonsecure_call (tree *node, tree name,
  int /* flags */,
  bool *no_add_attrs)
 {
-  tree decl = NULL_TREE, fntype = NULL_TREE;
-  tree type;
+  tree decl = NULL_TREE;
+  tree fntype, type;
 
   if (!use_cmse)
 {
@@ -7616,16 +7616,20 @@ arm_handle_cmse_nonsecure_call (tree *node, tree name,
   return NULL_TREE;
 }
 
-  if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
+  if (DECL_P (*node))
 {
-  decl = *node;
-  fntype = TREE_TYPE (decl);
+  fntype = TREE_TYPE (*node);
+
+  if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
+	decl = *node;
 }
+  else
+fntype = *node;
 
-  while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
+  while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
 fntype = TREE_TYPE (fntype);
 
-  if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
+  if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
 {
 	warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
 		 "function pointer", name);
@@ -7640,10 +7644,17 @@ arm_handle_cmse_nonsecure_call (tree *node, tree name,
 
   /* Prevent trees being shared among function types with and without
  cmse_nonsecure_call attribute.  */
-  type = TREE_TYPE (decl);
+  if (decl)
+{
+  type = build_distinct_type_copy (TREE_TYPE (decl));
+  TREE_TYPE (decl) = type;
+}
+  else
+{
+  type = build_distinct_type_copy (*node);
+  *node = type;
+}
 
-  type = build_distinct_type_copy (type);
-  TREE_TYPE (decl) = type;
   fntype = type;
 
   while (TREE_CODE (fntype) != FUNCTION_TYPE)
package P is

  type Non_Secure is access procedure;
  pragma Machine_Attribute (Non_Secure, "cmse_nonsecure_call");

  procedure Call (Proc : Non_Secure);

  procedure Foo;
  pragma Machine_Attribute (Foo, "cmse_nonsecure_call");

end P;
package body P is

  procedure Call (Proc : Non_Secure) is
  begin
Proc.all;
  end;

  procedure Foo is null;

end P;

Re: [PATCH] lto: Always quote path to touch

2022-10-24 Thread Torbjorn SVENSSON via Gcc-patches





On 2022-10-24 10:07, Richard Biener wrote:

On Fri, 21 Oct 2022, Torbj?rn SVENSSON wrote:


When generating the makefile, make sure that the paths are quoted so
that a native Windows path works within Cygwin.

Without this patch, this error is reported by the DejaGNU test suite:

make: [T:\ccMf0kI3.mk:3: T:\ccGEvdDp.ltrans0.ltrans.o] Error 1 (ignored)

The generated makefile fragment without the patch:

T:\ccGEvdDp.ltrans0.ltrans.o:
   @T:\build\bin\arm-none-eabi-g++.exe '-xlto' ... '-o' 
'T:\ccGEvdDp.ltrans0.ltrans.o' 'T:\ccGEvdDp.ltrans0.o'
   @-touch -r T:\ccGEvdDp.ltrans0.o T:\ccGEvdDp.ltrans0.o.tem > /dev/null 2>&1 
&& mv T:\ccGEvdDp.ltrans0.o.tem T:\ccGEvdDp.ltrans0.o
.PHONY: all
all: \
   T:\ccGEvdDp.ltrans0.ltrans.o

With the patch, the touch line would be replace with:

   @-touch -r "T:\ccGEvdDp.ltrans0.o" "T:\ccGEvdDp.ltrans0.o.tem" > /dev/null 2>&1 && mv 
"T:\ccGEvdDp.ltrans0.o.tem" "T:\ccGEvdDp.ltrans0.o"

gcc/ChangeLog:


OK.

Thanks,
Richard.


* lto-wrapper.cc: Quote paths in makefile.

Co-Authored-By: Yvan ROUX 
Signed-off-by: Torbj?rn SVENSSON 
---
  gcc/lto-wrapper.cc | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc
index 9a764702ffc..b12bcc1ad27 100644
--- a/gcc/lto-wrapper.cc
+++ b/gcc/lto-wrapper.cc
@@ -2010,8 +2010,8 @@ cont:
 truncate them as soon as we have processed it.  This
 reduces temporary disk-space usage.  */
  if (! save_temps)
-   fprintf (mstream, "\t@-touch -r %s %s.tem > /dev/null 2>&1 "
-"&& mv %s.tem %s\n",
+   fprintf (mstream, "\t@-touch -r \"%s\" \"%s.tem\" > /dev/null "
+"2>&1 && mv \"%s.tem\" \"%s\"\n",
 input_name, input_name, input_name, input_name);
}
  else






Pushed.

Re: vect: Make vect_check_gather_scatter reject offsets that aren't multiples of BITS_PER_UNIT [PR107346]

> Eric - the docs of DECL_BIT_FIELD are vague enough "must be accessed
> specially" but ISTR it might eventually only apply to the fields
> (bit) size and not it's position.  OTOH the Ada frontend might not
> be too careful in setting this flag for bit-packed structs?

It sets the flag when the alignment or size of the field does not match that 
of its type, which indeed means that it needs to be accessed specially.

-- 
Eric Botcazou

[PATCH] Relax assertion in profile.cc

Hi,

this assertion in branch_prob:

  if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
{
  location_t loc = DECL_SOURCE_LOCATION (current_function_decl);
  gcc_checking_assert (!RESERVED_LOCATION_P (loc));

had been correct until:

2021-08-11  Bernd Edlinger  

PR debug/101598
* gcc-interface/trans.c (Subprogram_Body_to_gnu): Set the
DECL_SOURCE_LOCATION of DECL_IGNORED_P gnu_subprog_decl to
UNKNOWN_LOCATION.

was installed.

Tested on x86-64/Linux, OK for mainline and 12 branch?


2022-10-24  Eric Botcazou  

* profile.cc (branch_prob): Be prepared for ignored functions with
DECL_SOURCE_LOCATION set to UNKNOWN_LOCATION.


2022-10-24  Eric Botcazou  

* gnat.dg/specs/coverage1.ads: New test.

-- 
Eric Botcazoudiff --git a/gcc/profile.cc b/gcc/profile.cc
index 96121d60711..1527a04124f 100644
--- a/gcc/profile.cc
+++ b/gcc/profile.cc
@@ -1457,11 +1457,13 @@ branch_prob (bool thunk)
 	  if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 	{
 	  location_t loc = DECL_SOURCE_LOCATION (current_function_decl);
-	  gcc_checking_assert (!RESERVED_LOCATION_P (loc));
-	  seen_locations.add (loc);
-	  expanded_location curr_location = expand_location (loc);
-	  output_location (_locations, curr_location.file,
-			   MAX (1, curr_location.line), , bb);
+	  if (!RESERVED_LOCATION_P (loc))
+		{
+		  seen_locations.add (loc);
+		  expanded_location curr_location = expand_location (loc);
+		  output_location (_locations, curr_location.file,
+   MAX (1, curr_location.line), , bb);
+		}
 	}
 
 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next ())
-- { dg-do compile }
-- { dg-options "-ftest-coverage" }

package Coverage1 is

  type Rec is record
I : Integer := 0;
  end record;

end Coverage1;

Re: Adding a new thread model to GCC

2022-10-24 Thread i.nixman--- via Gcc-patches


On 2022-10-24 08:15, Eric Botcazou wrote:

could you please refresh/recheck your patch for the current gcc master
and solve the objections noted in the thread? is it possible?





Hi,


I can do the former, but not the latter as my development setup (mostly
testing) on Windows has nearly vanished in the meantime.  But this 
rewritten

implementation is the one used by the C/C++/Ada compilers from AdaCore.



ah, it's great!


thank you very much!




best!

Re: Adding a new thread model to GCC

> could you please refresh/recheck your patch for the current gcc master
> and solve the objections noted in the thread? is it possible?

I can do the former, but not the latter as my development setup (mostly 
testing) on Windows has nearly vanished in the meantime.  But this rewritten 
implementation is the one used by the C/C++/Ada compilers from AdaCore.

-- 
Eric Botcazou

Re: Extend fold_vec_perm to fold VEC_PERM_EXPR in VLA manner

2022-10-24 Thread Prathamesh Kulkarni via Gcc-patches

On Mon, 17 Oct 2022 at 16:02, Prathamesh Kulkarni
 wrote:
>
> On Mon, 10 Oct 2022 at 16:18, Prathamesh Kulkarni
>  wrote:
> >
> > On Fri, 30 Sept 2022 at 21:38, Richard Sandiford
> >  wrote:
> > >
> > > Richard Sandiford via Gcc-patches  writes:
> > > > Prathamesh Kulkarni  writes:
> > > >> Sorry to ask a silly question but in which case shall we select 2nd 
> > > >> vector ?
> > > >> For num_poly_int_coeffs == 2,
> > > >> a1 /trunc n1 == (a1 + 0x) / (n1.coeffs[0] + n1.coeffs[1]*x)
> > > >> If a1/trunc n1 succeeds,
> > > >> 0 / n1.coeffs[1] == a1/n1.coeffs[0] == 0.
> > > >> So, a1 has to be < n1.coeffs[0] ?
> > > >
> > > > Remember that a1 is itself a poly_int.  It's not necessarily a constant.
> > > >
> > > > E.g. the TRN1 .D instruction maps to a VEC_PERM_EXPR with the selector:
> > > >
> > > >   { 0, 2 + 2x, 1, 4 + 2x, 2, 6 + 2x, ... }
> > >
> > > Sorry, should have been:
> > >
> > >   { 0, 2 + 2x, 2, 4 + 2x, 4, 6 + 2x, ... }
> > Hi Richard,
> > Thanks for the clarifications, and sorry for late reply.
> > I have attached POC patch that tries to implement the above approach.
> > Passes bootstrap+test on x86_64-linux-gnu and aarch64-linux-gnu for VLS 
> > vectors.
> >
> > For VLA vectors, I have only done limited testing so far.
> > It seems to pass couple of tests written in the patch for
> > nelts_per_pattern == 3,
> > and folds the following svld1rq test:
> > int32x4_t v = {1, 2, 3, 4};
> > return svld1rq_s32 (svptrue_b8 (), [0])
> > into:
> > return {1, 2, 3, 4, ...};
> > I will try to bootstrap+test it on SVE machine to test further for VLA 
> > folding.
> With the attached patch it seems to pass bootstrap+test with SVE enabled.
> The only difference w.r.t previous patch is it adds check in
> get_vector_for_pattern
> if S is constant otherwise returns NULL_TREE.
>
> I added this check because 930325-1.c ICE'd with previous patch
> because it had following vec_perm_expr,
> where S was non-constant:
> vect__16.13_70 = VEC_PERM_EXPR  POLY_INT_CST [3, 4], POLY_INT_CST [6, 8], POLY_INT_CST [9, 12], ...
> }>;
> I am not sure how to proceed in this case, so chose to bail out.
Hi Richard,
ping https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603717.html

Thanks,
Prathamesh
>
> Thanks,
> Prathamesh
>
> >
> > I have a couple of questions:
> > 1] When mask selects elements from same vector but from different patterns:
> > For eg:
> > arg0 = {1, 11, 2, 12, 3, 13, ...},
> > arg1 = {21, 31, 22, 32, 23, 33, ...},
> > mask = {0, 0, 0, 1, 0, 2, ... },
> > All have npatterns = 2, nelts_per_pattern = 3.
> >
> > With above mask,
> > Pattern {0, ...} selects arg0[0], ie {1, ...}
> > Pattern {0, 1, 2, ...} selects arg0[0], arg0[1], arg0[2], ie {1, 11, 2, ...}
> > While arg0[0] and arg0[2] belong to same pattern, arg0[1] belongs to 
> > different
> > pattern in arg0.
> > The result is:
> > res = {1, 1, 1, 11, 1, 2, ...}
> > In this case, res's 2nd pattern {1, 11, 2, ...} is encoded with:
> > with a0 = 1, a1 = 11, S = -9.
> > Is that expected tho ? It seems to create a new encoding which
> > wasn't present in the input vector. For instance, the next elem in
> > sequence would be -7,
> > which is not present originally in arg0.
> > I suppose it's fine since if the user defines mask to have pattern {0,
> > 1, 2, ...}
> > they intended result to have pattern with above encoding.
> > Just wanted to confirm if this is correct ?
> >
> > 2] Could you please suggest a test-case for S < 0 ?
> > I am not able to come up with one :/
> >
> > Thanks,
> > Prathamesh
> > >
> > > > which is an interleaving of the two patterns:
> > > >
> > > >   { 0, 2, 4, ... }  a0 = 0, a1 = 2, S = 2
> > > >   { 2 + 2x, 4 + 2x, 6 + 2x }a0 = 2 + 2x, a1 = 4 + 2x, S = 2

Re: [PATCH] lto: Always quote path to touch

On Fri, 21 Oct 2022, Torbj?rn SVENSSON wrote:

> When generating the makefile, make sure that the paths are quoted so
> that a native Windows path works within Cygwin.
> 
> Without this patch, this error is reported by the DejaGNU test suite:
> 
> make: [T:\ccMf0kI3.mk:3: T:\ccGEvdDp.ltrans0.ltrans.o] Error 1 (ignored)
> 
> The generated makefile fragment without the patch:
> 
> T:\ccGEvdDp.ltrans0.ltrans.o:
>   @T:\build\bin\arm-none-eabi-g++.exe '-xlto' ... '-o' 
> 'T:\ccGEvdDp.ltrans0.ltrans.o' 'T:\ccGEvdDp.ltrans0.o'
>   @-touch -r T:\ccGEvdDp.ltrans0.o T:\ccGEvdDp.ltrans0.o.tem > /dev/null 2>&1 
> && mv T:\ccGEvdDp.ltrans0.o.tem T:\ccGEvdDp.ltrans0.o
> .PHONY: all
> all: \
>   T:\ccGEvdDp.ltrans0.ltrans.o
> 
> With the patch, the touch line would be replace with:
> 
>   @-touch -r "T:\ccGEvdDp.ltrans0.o" "T:\ccGEvdDp.ltrans0.o.tem" > /dev/null 
> 2>&1 && mv "T:\ccGEvdDp.ltrans0.o.tem" "T:\ccGEvdDp.ltrans0.o"
> 
> gcc/ChangeLog:

OK.

Thanks,
Richard.

>   * lto-wrapper.cc: Quote paths in makefile.
> 
> Co-Authored-By: Yvan ROUX 
> Signed-off-by: Torbj?rn SVENSSON 
> ---
>  gcc/lto-wrapper.cc | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc
> index 9a764702ffc..b12bcc1ad27 100644
> --- a/gcc/lto-wrapper.cc
> +++ b/gcc/lto-wrapper.cc
> @@ -2010,8 +2010,8 @@ cont:
>truncate them as soon as we have processed it.  This
>reduces temporary disk-space usage.  */
> if (! save_temps)
> - fprintf (mstream, "\t@-touch -r %s %s.tem > /dev/null 2>&1 "
> -  "&& mv %s.tem %s\n",
> + fprintf (mstream, "\t@-touch -r \"%s\" \"%s.tem\" > /dev/null "
> +  "2>&1 && mv \"%s.tem\" \"%s\"\n",
>input_name, input_name, input_name, input_name); 
>   }
> else
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

ping Re: [Patch] OpenMP: Fix reverse offload GOMP_TARGET_REV IFN corner cases [PR107236]

2022-10-24 Thread Tobias Burnus


Ping this patch – and also "Re: [Patch][v5] libgomp/nvptx: Prepare for
reverse-offload callback handling".

For the latter cf. Alexander's code approval
https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603908.html – and
his concerns regarding the generic feature in
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601959.html (I
think 'target nowait' permits what he thinks is the better way for GPUs.)

Tobias

On 18.10.22 21:27, Tobias Burnus wrote:

Found when playing around with reverse offload once I used 'omp target
parallel'.
The other issue showed up when running the testsuite (which is done
with -O2).

In all cases, the ICE is in expand_GOMP_TARGET_REV of this IFN, which
should
be unreachable

Note: ENABLE_OFFLOADING inside the compiler must evaluate to true to
show up
as ICE - otherwise, the IFN is not even generated.

I did not see a good reason for DECL_CONTEXT = NULL, thus, I now set
it to
the same as was set for child_fn - for no good reason.

Tested on x86-64 with ENABLE_OFFLOADING albeit without true offloading.
OK for mainline?

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: [PATCH] [PR tree-optimization/107365] Check HONOR_NANS instead of flag_finite_math_only in frange:verify_range.

On Sun, Oct 23, 2022 at 4:57 PM Aldy Hernandez via Gcc-patches
 wrote:
>
> [Jakub and other FP experts, would this be OK, or am I missing
> something?]
>
> Vax does not seem to have !flag_finite_math_only, but float_type_node
> does not HONOR_NANS.  The check in frange::verify_range dependend on
> flag_finite_math_only, which is technically not correct since
> frange::set_varying() checks HONOR_NANS instead of
> flag_finite_math_only.
>
> I'm actually getting tired of flag_finite_math_only and
> !flag_finite_math_only discrepancies in the selftests (Vax and rx-elf
> come to mind).  I think we should just test both alternatives in the
> selftests as in this patch.
>
> We could also check flag_finite_math_only=0 with a float_type_node
> that does not HONOR_NANs, but I have no idea how to twiddle
> FLOAT_MODE_FORMAT temporarily, and that may be over thinking it.
>
> How does this look?

OK.  As said elsewhere checking flag_* is never correct for any of the
FP type features, always use HONOR_*.

Thanks,
Richard.

> PR tree-optimization/107365
>
> gcc/ChangeLog:
>
> * value-range.cc (frange::verify_range): Predicate NAN check in
> VARYING range on HONOR_NANS instead of flag_finite_math_only.
> (range_tests_floats): Same.
> (range_tests_floats_various): New.
> (range_tests): Call range_tests_floats_various.
> ---
>  gcc/value-range.cc | 33 +
>  1 file changed, 25 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/value-range.cc b/gcc/value-range.cc
> index d779e9819e2..d8ee6ec0d0f 100644
> --- a/gcc/value-range.cc
> +++ b/gcc/value-range.cc
> @@ -720,13 +720,13 @@ frange::verify_range ()
>gcc_checking_assert (!m_type);
>return;
>  case VR_VARYING:
> -  if (flag_finite_math_only)
> -   gcc_checking_assert (!m_pos_nan && !m_neg_nan);
> -  else
> -   gcc_checking_assert (m_pos_nan && m_neg_nan);
>gcc_checking_assert (m_type);
>gcc_checking_assert (frange_val_is_min (m_min, m_type));
>gcc_checking_assert (frange_val_is_max (m_max, m_type));
> +  if (HONOR_NANS (m_type))
> +   gcc_checking_assert (m_pos_nan && m_neg_nan);
> +  else
> +   gcc_checking_assert (!m_pos_nan && !m_neg_nan);
>return;
>  case VR_RANGE:
>gcc_checking_assert (m_type);
> @@ -3957,10 +3957,9 @@ range_tests_floats ()
>// A range of [-INF,+INF] is actually VARYING if no other properties
>// are set.
>r0 = frange_float ("-Inf", "+Inf");
> -  if (r0.maybe_isnan ())
> -ASSERT_TRUE (r0.varying_p ());
> +  ASSERT_TRUE (r0.varying_p ());
>// ...unless it has some special property...
> -  if (!flag_finite_math_only)
> +  if (HONOR_NANS (r0.type ()))
>  {
>r0.clear_nan ();
>ASSERT_FALSE (r0.varying_p ());
> @@ -4041,6 +4040,24 @@ range_tests_floats ()
>  }
>  }
>
> +// Run floating range tests for various combinations of NAN and INF
> +// support.
> +
> +static void
> +range_tests_floats_various ()
> +{
> +  int save_finite_math_only = flag_finite_math_only;
> +
> +  // Test -ffinite-math-only.
> +  flag_finite_math_only = 1;
> +  range_tests_floats ();
> +  // Test -fno-finite-math-only.
> +  flag_finite_math_only = 0;
> +  range_tests_floats ();
> +
> +  flag_finite_math_only = save_finite_math_only;
> +}
> +
>  void
>  range_tests ()
>  {
> @@ -4049,7 +4066,7 @@ range_tests ()
>range_tests_int_range_max ();
>range_tests_strict_enum ();
>range_tests_nonzero_bits ();
> -  range_tests_floats ();
> +  range_tests_floats_various ();
>range_tests_misc ();
>  }
>
> --
> 2.37.3
>

Re: [RFC] how to handle the combination of -fstrict-flex-arrays + -Warray-bounds

On Sat, 22 Oct 2022, Martin Sebor wrote:

> On 10/21/22 09:29, Qing Zhao wrote:
> > Hi,
> > 
> > (FAM below refers to Flexible Array Members):
> > 
> > I need inputs on  how to handle the combination of -fstrict-flex-arrays +
> > -Warray-bounds.
> > 
> > Our initial goal is to update -Warray-bounds with multiple levels of
> > -fstrict-flex-arrays=N
> > to issue warnings according to the different levels of ?N?.
> > However, after detailed study, I found that this goal was very hard to be
> > achieved.
> > 
> > 1. -fstrict-flex-arrays and its levels
> > 
> > The new option -fstrict-flex-arrays has 4 levels:
> > 
> > level   trailing arrays
> >  treated as FAM
> > 
> >0 [],[0],[1],[n] the default without option
> >1 [],[0],[1]
> >2 [],[0]
> >3 [] the default when option specified
> >without value
> > 
> > 2. -Warray-bounds and its levels
> > 
> > The option -Warray-bounds currently has 2 levels:
> > 
> > level   trailing arrays
> >  treated as FAM
> > 
> >1 [],[0],[1]  the default when option specified
> >without value
> >2 [] 
> > 
> > i.e,
> > When -Warray-bounds=1, it treats [],[0],[1] as FAM, the same level as
> > -fstrict-flex-arrays=1;
> > When -Warray-bounds=2, it only treat [] as FAM, the same level as
> > -fstrict-flex-arrays=3;
> > 
> > 3. How to handle the combination of  -fstrict-flex-arrays and
> > -Warray-bounds?
> > 
> > Question 1:  when -fstrict-flex-arrays does not present, the default is
> > -strict-flex-arrays=0,
> >  which treats [],[0],[1],[n] as FAM, so should we update
> >  the default behavior
> >  of -Warray-bounds to treat any trailing array [n] as
> >  FAMs?
> > 
> > My immediate answer to Q1 is NO, we shouldn?t, that will be a big regression
> > on -Warray-bounds, right?
> 
> Yes, it would disable -Warray-bounds in the cases where it warns
> for past-the-end accesses to trailing arrays with two or more
> elements.  Diagnosing those has historically (i.e., before recent
> changes) been a design goal.
> 
> > 
> > Question 2:  when -fstrict-flex-arrays=N1 and -Warray-bounds=N2 present at
> > the same time,
> >   Which one has higher priority? N1 or N2?
> > 
> > -fstrict-flex-arrays=N1 controls how the compiler code generation treats the
> > trailing arrays as FAMs, it seems
> > reasonable to give higher priority to N1,
> 
> I tend to agree.  In other words, set N2' = min(N1, N2).

Yes.  Or do nothing and treat them independently.  Can you check whether
it's possible to distinguish -Warray-bounds from -Warray-bounds=N?  I'd
say that explicit -Warray-bounds=N should exactly get the documented
set of diagnostis, independent of -fstrict-flex-arrays=N.

> > However, then should we completely disable the level of -Warray-bounds
> > N2 under such situation?
> > 
> > I really don?t know what?s the best way to handle the conflict  between N1
> > and N2.
> > 
> > Can we completely cancel the 2 levels of -Warray-bounds, and always honor
> > the level of -fstrict-flex-arrays?
> > 
> > Any comments or suggestion will be helpful.
> 
> The recent -fstrict-flex-array changes aside, IIRC, there's only
> a subtle distinction between the two -Warray-bounds levels (since
> level 1 started warning on a number of instances that only level
> 2 used to diagnose a few releases ago).  I think that subset of
> level 2 could be merged into level 1 without increasing the rate
> of false positives.  Then level 2 could be assigned a new set of
> potential problems to detect (such as past-the-end accesses to
> trailing one-element arrays).
> 
> Martin
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

[PATCH] c++: Fix up constexpr handling of char/signed char/short pre/post inc/decrement [PR105774]

Hi!

signed char, char or short int pre/post inc/decrement are represented by
normal {PRE,POST}_{INC,DEC}REMENT_EXPRs in the FE and only gimplification
ensures that the {PLUS,MINUS}_EXPR is done in unsigned version of those
types:
case PREINCREMENT_EXPR:
case PREDECREMENT_EXPR:
case POSTINCREMENT_EXPR:
case POSTDECREMENT_EXPR:
  {
tree type = TREE_TYPE (TREE_OPERAND (*expr_p, 0));
if (INTEGRAL_TYPE_P (type) && c_promoting_integer_type_p (type))
  {
if (!TYPE_OVERFLOW_WRAPS (type))
  type = unsigned_type_for (type);
return gimplify_self_mod_expr (expr_p, pre_p, post_p, 1, type);
  }
break;
  }
This means during constant evaluation we need to do it similarly (either
using unsigned_type_for or using widening to integer_type_node).
The following patch does the latter.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-10-24  Jakub Jelinek  

PR c++/105774
* constexpr.cc (cxx_eval_increment_expression): For signed types
that promote to int, evaluate PLUS_EXPR or MINUS_EXPR in int type.

* g++.dg/cpp1y/constexpr-105774.C: New test.

--- gcc/cp/constexpr.cc.jj  2022-10-21 18:04:47.869797312 +0200
+++ gcc/cp/constexpr.cc 2022-10-23 18:43:27.003390282 +0200
@@ -6234,6 +6234,18 @@ cxx_eval_increment_expression (const con
offset = fold_build1 (NEGATE_EXPR, TREE_TYPE (offset), offset);
   mod = fold_build2 (POINTER_PLUS_EXPR, type, val, offset);
 }
+  else if (c_promoting_integer_type_p (type)
+  && !TYPE_UNSIGNED (type)
+  && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
+{
+  offset = fold_convert (integer_type_node, offset);
+  mod = fold_convert (integer_type_node, val);
+  tree t = fold_build2 (inc ? PLUS_EXPR : MINUS_EXPR, integer_type_node,
+   mod, offset);
+  mod = fold_convert (type, t);
+  if (TREE_OVERFLOW_P (mod) && !TREE_OVERFLOW_P (t))
+   TREE_OVERFLOW (mod) = false;
+}
   else
 mod = fold_build2 (inc ? PLUS_EXPR : MINUS_EXPR, type, val, offset);
   if (!ptr)
--- gcc/testsuite/g++.dg/cpp1y/constexpr-105774.C.jj2022-10-23 
18:44:15.587729613 +0200
+++ gcc/testsuite/g++.dg/cpp1y/constexpr-105774.C   2022-10-23 
18:33:54.754170726 +0200
@@ -0,0 +1,15 @@
+// PR c++/105774
+// { dg-do compile { target c++14 } }
+
+constexpr signed char
+foo ()
+{
+#if __SCHAR_MAX__ < __INT_MAX__
+  signed char x = __SCHAR_MAX__;
+#else
+  signed char x = 0;
+#endif
+  return ++x;
+}
+
+constexpr auto a = foo ();

Jakub

Re: [PATCH] Rename nonzero_bits to known_zero_bits.

On Fri, Oct 21, 2022 at 3:15 PM Aldy Hernandez via Gcc-patches
 wrote:
>
> The name nonzero_bits is confusing.  We're not tracking nonzero bits.
> We're tracking known-zero bits, or at the worst we're tracking "maye
> nonzero bits".  But really, the only thing we're sure about in the
> "nonzero" bits are the bits that are zero, which are known to be 0.
> We're not tracking nonzero bits.
>
> I know we've been carrying around this name forever, but the fact that
> both of the maintainers of the code *HATE* it, should be telling.
> Also, we'd also like to track known-one bits in the irange, so it's
> best to keep the nomenclature consistent.
>
> Andrew, are you ok with this naming, or would you prefer something
> else?

But it's the same as on RTL.  And on release branches.  But yes,
it's maybe_nonzero_bits.  Ideally we'd track known/unknown_bits
(both zero and one) instead.  bit-CCP already computes that but throws
away the ones:

  unsigned int precision = TYPE_PRECISION (TREE_TYPE (val->value));
  wide_int nonzero_bits
= (wide_int::from (val->mask, precision, UNSIGNED)
   | wi::to_wide (val->value));
  nonzero_bits &= get_nonzero_bits (name);
  set_nonzero_bits (name, nonzero_bits);

so I think instead of renaming can you see what it takes to also record known
set bits?  (yeah, needs two masks instead of one in the storage)

> gcc/ChangeLog:
>
> * asan.cc (handle_builtin_alloca): Rename *nonzero* to *known_zero*.
> * fold-const.cc (expr_not_equal_to): Same.
> (tree_nonzero_bits): Same.
> * gimple-range-op.cc: Same.
> * ipa-cp.cc (ipcp_bits_lattice::get_value_and_mask): Same.
> * ipa-prop.cc (ipa_compute_jump_functions_for_edge): Same.
> (ipcp_update_bits): Same.
> * match.pd: Same.
> * range-op.cc (operator_lt::fold_range): Same.
> (operator_cast::fold_range): Same.
> (operator_bitwise_and::fold_range): Same.
> (set_nonzero_range_from_mask): Same.
> (set_known_zero_range_from_mask): Same.
> (operator_bitwise_and::simple_op1_range_solver): Same.
> (operator_bitwise_and::op1_range): Same.
> (range_op_cast_tests): Same.
> (range_op_bitwise_and_tests): Same.
> * tree-data-ref.cc (split_constant_offset): Same.
> * tree-ssa-ccp.cc (get_default_value): Same.
> (ccp_finalize): Same.
> (evaluate_stmt): Same.
> * tree-ssa-dom.cc
> (dom_opt_dom_walker::set_global_ranges_from_unreachable_edges): Same.
> * tree-ssa-reassoc.cc (optimize_range_tests_var_bound): Same.
> * tree-ssanames.cc (set_nonzero_bits): Same.
> (set_known_zero_bits): Same.
> (get_nonzero_bits): Same.
> (get_known_zero_bits): Same.
> (ssa_name_has_boolean_range): Same.
> * tree-ssanames.h (set_nonzero_bits): Same.
> (get_nonzero_bits): Same.
> (set_known_zero_bits): Same.
> (get_known_zero_bits): Same.
> * tree-vect-patterns.cc (vect_get_range_info): Same.
> * tree-vrp.cc (maybe_set_nonzero_bits): Same.
> (maybe_set_known_zero_bits): Same.
> (vrp_asserts::remove_range_assertions): Same.
> * tree-vrp.h (maybe_set_nonzero_bits): Same.
> (maybe_set_known_zero_bits): Same.
> * tree.cc (tree_ctz): Same.
> * value-range-pretty-print.cc
> (vrange_printer::print_irange_bitmasks): Same.
> * value-range-storage.cc (irange_storage_slot::set_irange): Same.
> (irange_storage_slot::get_irange): Same.
> (irange_storage_slot::dump): Same.
> * value-range-storage.h: Same.
> * value-range.cc (irange::operator=): Same.
> (irange::copy_to_legacy): Same.
> (irange::irange_set): Same.
> (irange::irange_set_anti_range): Same.
> (irange::set): Same.
> (irange::verify_range): Same.
> (irange::legacy_equal_p): Same.
> (irange::operator==): Same.
> (irange::contains_p): Same.
> (irange::irange_single_pair_union): Same.
> (irange::irange_union): Same.
> (irange::irange_intersect): Same.
> (irange::invert): Same.
> (irange::get_nonzero_bits_from_range): Same.
> (irange::get_known_zero_bits_from_range): Same.
> (irange::set_range_from_nonzero_bits): Same.
> (irange::set_range_from_known_zero_bits): Same.
> (irange::set_nonzero_bits): Same.
> (irange::set_known_zero_bits): Same.
> (irange::get_nonzero_bits): Same.
> (irange::get_known_zero_bits): Same.
> (irange::intersect_nonzero_bits): Same.
> (irange::intersect_known_zero_bits): Same.
> (irange::union_nonzero_bits): Same.
> (irange::union_known_zero_bits): Same.
> (range_tests_nonzero_bits): Same.
> * value-range.h (irange::varying_compatible_p): Same.
> (gt_ggc_mx): Same.
>

[PATCH] c, c++: Fix up excess precision handling of scalar_to_vector conversion [PR107358]

Hi!

As mentioned earlier in the C++ excess precision support mail, the following
testcase is broken with excess precision both in C and C++ (though just in C++
it was triggered in real-world code).
scalar_to_vector is called in both FEs after the excess precision promotions
(or stripping of EXCESS_PRECISION_EXPR), so we can then get invalid
diagnostics that say float vector + float involves truncation (on ia32
from long double to float).

The following patch fixes that by calling scalar_to_vector on the operands
before the excess precision promotions, let scalar_to_vector just do the
diagnostics (it does e.g. fold_for_warn so it will fold
EXCESS_PRECISION_EXPR around REAL_CST to constants etc.) but will then
do the actual conversions using the excess precision promoted operands
(so say if we have vector double + (float + float) we don't actually do
vector double + (float) ((long double) float + (long double) float)
but
vector double + (double) ((long double) float + (long double) float)

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-10-24  Jakub Jelinek  

PR c++/107358
c/
* c-typeck.cc (build_binary_op): Pass operands before excess precision
promotions to scalar_to_vector call.
cp/
* typeck.cc (cp_build_binary_op): Pass operands before excess precision
promotions to scalar_to_vector call.
testsuite/
* c-c++-common/pr107358.c: New test.
* g++.dg/cpp1y/pr68180.C: Remove -fexcess-precision=fast from
dg-options.

--- gcc/c/c-typeck.cc.jj2022-10-14 09:35:56.10261 +0200
+++ gcc/c/c-typeck.cc   2022-10-22 17:54:24.378839301 +0200
@@ -11995,8 +11995,8 @@ build_binary_op (location_t location, en
   if ((gnu_vector_type_p (type0) && code1 != VECTOR_TYPE)
   || (gnu_vector_type_p (type1) && code0 != VECTOR_TYPE))
 {
-  enum stv_conv convert_flag = scalar_to_vector (location, code, op0, op1,
-true);
+  enum stv_conv convert_flag = scalar_to_vector (location, code, orig_op0,
+orig_op1, true);
 
   switch (convert_flag)
{
--- gcc/cp/typeck.cc.jj 2022-10-20 13:54:22.535670240 +0200
+++ gcc/cp/typeck.cc2022-10-22 17:56:58.589715301 +0200
@@ -5191,6 +5191,8 @@ cp_build_binary_op (const op_location_t
 
   orig_type0 = type0 = TREE_TYPE (op0);
   orig_type1 = type1 = TREE_TYPE (op1);
+  tree non_ep_op0 = op0;
+  tree non_ep_op1 = op1;
 
   /* The expression codes of the data types of the arguments tell us
  whether the arguments are integers, floating, pointers, etc.  */
@@ -5303,8 +5305,9 @@ cp_build_binary_op (const op_location_t
   if ((gnu_vector_type_p (type0) && code1 != VECTOR_TYPE)
   || (gnu_vector_type_p (type1) && code0 != VECTOR_TYPE))
 {
-  enum stv_conv convert_flag = scalar_to_vector (location, code, op0, op1,
-complain & tf_error);
+  enum stv_conv convert_flag
+   = scalar_to_vector (location, code, non_ep_op0, non_ep_op1,
+   complain & tf_error);
 
   switch (convert_flag)
 {
--- gcc/testsuite/c-c++-common/pr107358.c.jj2022-10-22 18:46:59.390375310 
+0200
+++ gcc/testsuite/c-c++-common/pr107358.c   2022-10-22 18:01:52.973660719 
+0200
@@ -0,0 +1,30 @@
+/* PR c++/107358 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fexcess-precision=standard" } */
+
+typedef float __attribute__((vector_size (4 * sizeof (float A;
+typedef double __attribute__((vector_size (2 * sizeof (double B;
+
+void
+foo (A *x)
+{
+  *x = *x - 124.225514990f;
+}
+
+void
+bar (A *x, float y)
+{
+  *x = *x - y;
+}
+
+void
+baz (B *x)
+{
+  *x = *x + 124.225514990f;
+}
+
+void
+qux (B *x, double y)
+{
+  *x = *x + y;
+}
--- gcc/testsuite/g++.dg/cpp1y/pr68180.C.jj 2022-10-14 09:28:28.339159477 
+0200
+++ gcc/testsuite/g++.dg/cpp1y/pr68180.C2022-10-22 17:59:07.012946513 
+0200
@@ -1,6 +1,6 @@
 // PR c++/68180
 // { dg-do compile { target c++14 } }
-// { dg-additional-options "-Wno-psabi -fexcess-precision=fast" }
+// { dg-additional-options "-Wno-psabi" }
 
 typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t;
 constexpr float32x4_t fill(float x) {

Jakub

Re: vect: Make vect_check_gather_scatter reject offsets that aren't multiples of BITS_PER_UNIT [PR107346]

On Fri, 21 Oct 2022, Andre Vieira (lists) wrote:

> Hi,
> 
> The ada failure reported in the PR was being caused by
> vect_check_gather_scatter failing to deal with bit offsets that weren't
> multiples of BITS_PER_UNIT. This patch makes vect_check_gather_scatter reject
> memory accesses with such offsets.
> 
> Bootstrapped and regression tested on aarch64 and x86_64.
> 
> I wasn't sure whether I should add a new Ada test that shows the same failure
> without the bitfield lowering, I suspect this is such a rare form of
> data-structure that is why no other tests have highlighted the failure. Let me
> know if you would like me to add it still, the change is quite simple, just
> change the Int24 -> Int32 type in the structure. The 'thing' that causes the
> failure is the 4-bit member inside the packed structure before the field we
> access, giving it a 4-bit offset. I attempted but failed to create a C test
> using __attribute__((packed)).

Can you check why vect_find_stmt_data_reference doesn't trip on the

  if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
{
  free_data_ref (dr);
  return opt_result::failure_at (stmt,
 "not vectorized:"
 " statement is an unsupported"
 " bitfield access %G", stmt);
}

?  I think we should amend this check and I guess that
checking multiple_p on DECL_FIELD_BIT_OFFSET should be enough?

Eric - the docs of DECL_BIT_FIELD are vague enough "must be accessed
specially" but ISTR it might eventually only apply to the fields
(bit) size and not it's position.  OTOH the Ada frontend might not
be too careful in setting this flag for bit-packed structs?

Richard.

> Kind Regards,
> Andre
> 
> gcc/ChangeLog:
> 
>     PR tree-optimization/107346
>     * tree-vect-data-refs.cc (vect_check_gather_scatter): Reject 
> offsets that aren't
>     multiples of BITS_PER_UNIT.
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

Re: [PATCH] Always use TYPE_MODE instead of DECL_MODE for vector field

On Fri, Oct 21, 2022 at 6:18 PM H.J. Lu  wrote:
>
> On Fri, Oct 21, 2022 at 2:33 AM Richard Biener
>  wrote:
> >
> > On Thu, Oct 20, 2022 at 6:58 PM H.J. Lu via Gcc-patches
> >  wrote:
> > >
> > > commit e034c5c895722e0092d2239cd8c2991db77d6d39
> > > Author: Jakub Jelinek 
> > > Date:   Sat Dec 2 08:54:47 2017 +0100
> > >
> > > PR target/78643
> > > PR target/80583
> > > * expr.c (get_inner_reference): If DECL_MODE of a non-bitfield
> > > is BLKmode for vector field with vector raw mode, use TYPE_MODE
> > > instead of DECL_MODE.
> > >
> > > fixed the case where DECL_MODE of a vector field is BLKmode and its
> > > TYPE_MODE is a vector mode because of target attribute.  Remove the
> > > BLKmode check for the case where DECL_MODE of a vector field is a vector
> > > mode and its TYPE_MODE is BLKmode because of target attribute.
> > >
> > > gcc/
> > >
> > > PR target/107304
> > > * expr.c (get_inner_reference): Always use TYPE_MODE for vector
> > > field with vector raw mode.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/107304
> > > * gcc.target/i386/pr107304.c: New test.
> > > ---
> > >  gcc/expr.cc  |  3 +-
> > >  gcc/testsuite/gcc.target/i386/pr107304.c | 39 
> > >  2 files changed, 40 insertions(+), 2 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr107304.c
> > >
> > > diff --git a/gcc/expr.cc b/gcc/expr.cc
> > > index efe387e6173..9145193c2c1 100644
> > > --- a/gcc/expr.cc
> > > +++ b/gcc/expr.cc
> > > @@ -7905,8 +7905,7 @@ get_inner_reference (tree exp, poly_int64_pod 
> > > *pbitsize,
> > >   /* For vector fields re-check the target flags, as DECL_MODE
> > >  could have been set with different target flags than
> > >  the current function has.  */
> > > - if (mode == BLKmode
> > > - && VECTOR_TYPE_P (TREE_TYPE (field))
> > > + if (VECTOR_TYPE_P (TREE_TYPE (field))
> > >   && VECTOR_MODE_P (TYPE_MODE_RAW (TREE_TYPE (field
> >
> > Isn't the check on TYPE_MODE_RAW also wrong then?  Btw, the mode could
>
> TYPE_MODE_RAW is always set to a vector mode for a vector type:
>
>/* Find an appropriate mode for the vector type.  */
> if (TYPE_MODE (type) == VOIDmode)
>   SET_TYPE_MODE (type,
>  mode_for_vector (SCALAR_TYPE_MODE (innertype),
>   nunits).else_blk ());

But mode_for_vector can return a MODE_INT!

  /* For integers, try mapping it to a same-sized scalar mode.  */
  if (GET_MODE_CLASS (innermode) == MODE_INT)
{
  poly_uint64 nbits = nunits * GET_MODE_BITSIZE (innermode);
  if (int_mode_for_size (nbits, 0).exists ()
  && have_regs_of_mode[mode])
return mode;

> But TYPE_MODE returns BLKmode if the vector mode is unsupported.
>
> > also be an integer mode.
>
> For a vector field, mode is either BLK mode or the vector mode.  Jakub,
> can you comment on it?

I think that for

typedef int v2si __attribute__((vector_size(8)));

struct X { int i; v2si j; };

v2si should get DImode with -mno-sse?

> >
> > > mode = TYPE_MODE (TREE_TYPE (field));
> > > }
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr107304.c 
> > > b/gcc/testsuite/gcc.target/i386/pr107304.c
> > > new file mode 100644
> > > index 000..24d68795e7f
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr107304.c
> > > @@ -0,0 +1,39 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O0 -march=tigerlake" } */
> > > +
> > > +#include 
> > > +
> > > +typedef union {
> > > +  uint8_t v __attribute__((aligned(256))) __attribute__ ((vector_size(64 
> > > * sizeof(uint8_t;
> > > +  uint8_t i[64] __attribute__((aligned(256)));
> > > +} stress_vec_u8_64_t;
> > > +
> > > +typedef struct {
> > > + struct {
> > > +  stress_vec_u8_64_t s;
> > > +  stress_vec_u8_64_t o;
> > > +  stress_vec_u8_64_t mask1;
> > > +  stress_vec_u8_64_t mask2;
> > > + } u8_64;
> > > +} stress_vec_data_t;
> > > +
> > > +__attribute__((target_clones("arch=alderlake", "default")))
> > > +void
> > > +stress_vecshuf_u8_64(stress_vec_data_t *data)
> > > +{
> > > +  stress_vec_u8_64_t *__restrict s;
> > > +  stress_vec_u8_64_t *__restrict mask1;
> > > +  stress_vec_u8_64_t *__restrict mask2;
> > > +  register int i;
> > > +
> > > +  s = >u8_64.s;
> > > +  mask1 = >u8_64.mask1;
> > > +  mask2 = >u8_64.mask2;
> > > +
> > > +  for (i = 0; i < 256; i++) {  /* was i < 65536 */
> > > +  stress_vec_u8_64_t tmp;
> > > +
> > > +  tmp.v = __builtin_shuffle(s->v, mask1->v);
> > > +  s->v = __builtin_shuffle(tmp.v, mask2->v);
> > > +  }
> > > +}
> > > --
> > > 2.37.3
> > >
>
>
>
> --
> H.J.

Re: Adding a new thread model to GCC

2022-10-24 Thread i.nixman--- via Gcc-patches


On 2022-10-21 11:44, Eric Botcazou via Libstdc++ wrote:

How does this compare with Eric B's proposal at
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-06/msg01840.html ?


My proposal was to reimplement (and extend) the native thread model 
(win32)
instead of adding a new one, the advantage being that you don't need an 
extra

threading layer between GCC and Windows.


Hello Eric,

could you please refresh/recheck your patch for the current gcc master 
and solve the objections noted in the thread? is it possible?




best!

Re: [PATCH] d: Remove D-specific version definitions from target headers