[PATCH] RISC-V: Fix PR109615

2023-05-04 Thread juzhe . zhong
From: Juzhe-Zhong 

Before this patch:

...
.L2:
addia4,a1,100
add t1,a0,a2
mv  t0,a0
beq a2,zero,.L1
vsetvli zero,a3,e8,mf8,tu,mu
.L4:
addia6,t0,100
addia7,a4,-100
vle8.v  v1,0(t0)
addit0,t0,1
vse8.v  v1,0(a7)
vlm.v   v0,0(a6)
vle8.v  v1,0(a6),v0.t
vse8.v  v1,0(a4)
addia4,a4,1
bne t0,t1,.L4
addia0,a0,300
addia1,a1,300
add a2,a0,a2
vsetvli zero,a3,e8,mf8,ta,ma
.L5:
vle8.v  v2,0(a0)
addia0,a0,1
vse8.v  v2,0(a1)
addia1,a1,1
bne a2,a0,.L5
.L1:
ret

After this patch:

...
.L2:
addia4,a1,100
add t1,a0,a2
mv  t0,a0
beq a2,zero,.L1
vsetvli zero,a3,e8,mf8,tu,mu
.L4:
addia6,t0,100
addia7,a4,-100
vle8.v  v1,0(t0)
addit0,t0,1
vse8.v  v1,0(a7)
vlm.v   v0,0(a6)
vle8.v  v1,0(a6),v0.t
vse8.v  v1,0(a4)
addia4,a4,1
bne t0,t1,.L4
addia0,a0,300
addia1,a1,300
add a2,a0,a2
.L5:
vle8.v  v2,0(a0)
addia0,a0,1
vse8.v  v2,0(a1)
addia1,a1,1
bne a2,a0,.L5
.L1:
ret

PR target/109615

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (avl_info::multiple_source_equal_p): Add 
denegrate PHI optmization.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/vsetvl/avl_single-74.c: Adapt testcase.
* gcc.target/riscv/rvv/vsetvl/vsetvl-11.c: Ditto.
* gcc.target/riscv/rvv/vsetvl/pr109615.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc  | 81 +--
 .../riscv/rvv/vsetvl/avl_single-74.c  |  4 +-
 .../gcc.target/riscv/rvv/vsetvl/pr109615.c| 33 
 .../gcc.target/riscv/rvv/vsetvl/vsetvl-11.c   |  2 +-
 4 files changed, 54 insertions(+), 66 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109615.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 609f86d8704..39b4d21210b 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1676,72 +1676,27 @@ avl_info::single_source_equal_p (const avl_info ) 
const
 bool
 avl_info::multiple_source_equal_p (const avl_info ) const
 {
-  /* TODO: We don't do too much optimization here since it's
- too complicated in case of analyzing the PHI node.
+  /* When the def info is same in RTL_SSA namespace, it's safe
+ to consider they are avl compatible.  */
+  if (m_source == other.get_source ())
+return true;
 
- For example:
-   void f (void * restrict in, void * restrict out, int n, int m, int cond)
-   {
- size_t vl;
- switch (cond)
- {
- case 1:
-   vl = 100;
-   break;
- case 2:
-   vl = *(size_t*)(in + 100);
-   break;
- case 3:
-   {
- size_t new_vl = *(size_t*)(in + 500);
- size_t new_vl2 = *(size_t*)(in + 600);
- vl = new_vl + new_vl2 + 777;
- break;
-   }
- default:
-   vl = 4000;
-   break;
- }
- for (size_t i = 0; i < n; i++)
-   {
- vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
- __riscv_vse8_v_i8mf8 (out + i, v, vl);
+  /* We only consider handle PHI node.  */
+  if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi 
())
+return false;
 
- vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
- __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
-   }
+  phi_info *phi1 = as_a (m_source);
+  phi_info *phi2 = as_a (other.get_source ());
 
- size_t vl2;
- switch (cond)
- {
- case 1:
-   vl2 = 100;
-   break;
- case 2:
-   vl2 = *(size_t*)(in + 100);
-   break;
- case 3:
-   {
- size_t new_vl = *(size_t*)(in + 500);
- size_t new_vl2 = *(size_t*)(in + 600);
- vl2 = new_vl + new_vl2 + 777;
- break;
-   }
- default:
-   vl2 = 4000;
-   break;
- }
- for (size_t i = 0; i < m; i++)
-   {
- vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl2);
- __riscv_vse8_v_i8mf8 (out + i + 300, v, vl2);
- vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 200, vl2);
- __riscv_vse8_v_i8mf8 (out + i + 200, v2, vl2);
-   }
-   }
- Such case may not be necessary to optimize since the codes of defining
- vl and vl2 are redundant.  */
-  return m_source == other.get_source ();
+  if (phi1->is_degenerate () && phi2->is_degenerate ())
+{
+  /* Case 1: If both PHI nodes have the same single 

Re: [PATCH] MATCH: Add ABSU == 0 to a == 0 simplification

2023-05-04 Thread Richard Biener via Gcc-patches



> Am 05.05.2023 um 01:41 schrieb Andrew Pinski via Gcc-patches 
> :
> 
> There is already an `ABS == 0` to `a == 0` pattern,
> this just extends that to ABSU too.
> 
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Ok

Richard 

>PR tree-optimization/109722
> 
> gcc/ChangeLog:
> 
>* match.pd: Extend the `ABS == 0` pattern
>to cover `ABSU == 0` too.
> 
> gcc/testsuite/ChangeLog:
> 
>* gcc.dg/tree-ssa/abs-1.c: New test.
> ---
> gcc/match.pd  | 11 ++-
> gcc/testsuite/gcc.dg/tree-ssa/abs-1.c | 12 
> 2 files changed, 18 insertions(+), 5 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/abs-1.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 08a4f8ebdc1..ceae1c34abc 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5807,11 +5807,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if (tem && !TREE_OVERFLOW (tem))
>  (scmp @0 { tem; }))
> 
> -/* Convert ABS_EXPR == 0 or ABS_EXPR != 0 to x == 0 or x != 0.  */
> -(for op (eq ne)
> - (simplify
> -  (op (abs @0) zerop@1)
> -  (op @0 @1)))
> +/* Convert ABS[U]_EXPR == 0 or ABS[U]_EXPR != 0 to x == 0 or x != 0.  
> */
> +(for op (abs absu)
> + (for eqne (eq ne)
> +  (simplify
> +   (eqne (op @0) zerop@1)
> +   (eqne @0 { build_zero_cst (TREE_TYPE (@0)); }
> 
> /* From fold_sign_changed_comparison and fold_widened_comparison.
>FIXME: the lack of symmetry is disturbing.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/abs-1.c
> new file mode 100644
> index 000..ce40403
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-1.c
> @@ -0,0 +1,12 @@
> +/* PR tree-optimization/109722 */
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-gimple -fdump-tree-optimized" } */
> +
> +int g(signed char x){
> +x = x < 0 ? -x : x;
> +return x == 0;
> +}
> +
> +/* This should work even if int is 16bits. */
> +/* { dg-final { scan-tree-dump "ABSU_EXPR" "gimple"} } */
> +/* { dg-final { scan-tree-dump-not "ABSU_EXPR" "optimized"} } */
> -- 
> 2.31.1
> 


RE: Re: [PATCH] machine_mode type size: Extend enum size from 8-bit to 16-bit

2023-05-04 Thread Li, Pan2 via Gcc-patches
I tried the memory profiling by valgrind --tool=memcheck --trace-children=yes 
for this change, target the SPEC 2006 INT part with rv64gcv. Note we only count 
the bytes allocated from valgrind log like this "==2832896==   total heap 
usage: 208 allocs, 165 frees, 123,204 bytes allocated".

Consider some variance of valgrind, it looks like the impact to bytes allocated 
may be limited. However, I am still running this for x86, it will take more 
than 30 hours for each iteration...

RISC-V GCC Version:
>> ~/bin/test-gnu-8-bits/bin/riscv64-unknown-linux-gnu-gcc --version
riscv64-unknown-linux-gnu-gcc (gd7cb9720ed5) 14.0.0 20230503 (experimental)
Copyright (C) 2023 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

Bytes allocated with O2:
-
Benchmark   |  upstream | with this PATCH   
-
400.perlbench   | 29699642875   | 29949876269 ~0.0%
401.bzip2   | 1641041659| 1755563972 +6.95%
403.gcc | 68447500516   | 68900883291 ~0.0%
429.mcf | 1433156462| 1433253373 ~0.0%
445.gobmk   | 14239225210   | 14463438465 ~0.0%
456.hmmer   | 9635955623| 9808534948 +1.8%
458.sjeng   | 2419478204| 2545478940 +5.4%
462.libquantum  | 1686404489| 1800884197 +6.8%
464.h264ref 8j1 | 10190413900   | 10351134161 +1.6%
471.omnetpp | 40814627684   | 41185864529 ~0.0%
473.astar   | 3807097529| 3928428183 +3.2%
483.xalancbmk   | 152959418167  | 154201738843 ~0.0%

Bytes allocated with Ofast + funroll-loops:
--
Benchmark   |  upstream | with this PATCH
--
400.perlbench   |  39491184733  | 39223020267 ~0.0% 
401.bzip2   |  2843871517   | 2730383463 ~0%
403.gcc |  84195991898  | 83730632955 -4.0% 
429.mcf |  1481381164   | 1367309565 -7.7%
445.gobmk   |  20123943663  | 19886116394 -1.2%
456.hmmer   |  12302445139  | 12121745383 -1.5%
458.sjeng   |  3884712615   | 3755481930  -3.3%
462.libquantum  |  1966619940   | 1852274342  -5.8%
464.h264ref |  19219365552  | 19050288201 ~0.0%
471.omnetpp |  45701008325  | 45327805079 ~0.0%
473.astar   |  4118600354   | 3995943705 -3.0%
483.xalancbmk   |  179481305182 | 178160306301 ~0.0%

Pan


-Original Message-
From: Gcc-patches  On Behalf 
Of ???
Sent: Thursday, April 13, 2023 7:23 AM
To: kito.cheng ; rguenther 
Cc: richard.sandiford ; Jeff Law 
; gcc-patches ; palmer 
; jakub 
Subject: Re: Re: [PATCH] machine_mode type size: Extend enum size from 8-bit to 
16-bit

Yeah, like kito said.
Turns out the tuple type model in ARM SVE is the optimal solution for RVV.
And we like ARM SVE style implmentation.

And now we see swapping rtx_code and mode in rtx_def can make rtx_def overal 
not exceed 64 bit.
But it seems that there is still problem in tree_type_common and 
tree_decl_common, is that right?

After several trys (remove all redundant TI/TF vector modes and FP16 vector 
mode), now there are 252 modes in RISC-V port. Basically, I can keep supporting 
new RVV intrinsisc features recently.
However, we can't support more in the future, for example, FP16 vector, BF16 
vector, matrix modes, VLS modes,...etc.

From RVV side, I think extending 1 more bit of machine mode should be enough 
for RVV (overal 512 modes).
Is it possible make it happen in tree_type_common and tree_decl_common, 
Richards?

Thank you so much for all comments.


juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-04-12 17:31
To: Richard Biener
CC: juzhe.zh...@rivai.ai; richard.sandiford; jeffreyalaw; gcc-patches; palmer; 
jakub
Subject: Re: Re: [PATCH] machine_mode type size: Extend enum size from 8-bit to 
16-bit
> > The concept of fractional LMUL is the same as the concept of 
> > AArch64's partial SVE vectors, so they can only access the lowest 
> > part, like SVE's partial vector.
> >
> > We want to spill/restore the exact size of those modes (1/2, 1/4, 
> > 1/8), so adding dedicated modes for those partial vector modes 
> > should be unavoidable IMO.
> >
> > And even if we use sub-vector, we still need to define those partial 
> > vector types.
>
> Could you use integer modes for the fractional vectors?
 
You mean using the 

[pushed] Revert "c++: restore instantiate_decl assert"

2023-05-04 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

In the testcase the assert fails because we use one member function from
another while we're in the middle of instantiating them all, which is
perfectly fine.  It seems complicated to detect this situation, so let's
remove the assert again.

PR c++/109658

This reverts commit 95d4c0d2e6318aef88ba0bc607dfc1ec6b7a612f.

gcc/testsuite/ChangeLog:

* g++.dg/template/local10.C: New test.
---
 gcc/cp/pt.cc|  6 --
 gcc/testsuite/g++.dg/template/local10.C | 10 ++
 2 files changed, 10 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/local10.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 5446b5058b7..0f4fb258f9e 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -27061,12 +27061,6 @@ instantiate_decl (tree d, bool defer_ok, bool 
expl_inst_class_mem_p)
pattern_defined = ! DECL_EXTERNAL (code_pattern);
 }
 
-  /* Any local class members should be instantiated from the TAG_DEFN
- with defer_ok == 0.  */
-  gcc_checking_assert (!defer_ok || !pattern_defined
-  || !decl_function_context (d)
-  || LAMBDA_TYPE_P (DECL_CONTEXT (d)));
-
   /* We may be in the middle of deferred access check.  Disable it now.  */
   push_deferring_access_checks (dk_no_deferred);
 
diff --git a/gcc/testsuite/g++.dg/template/local10.C 
b/gcc/testsuite/g++.dg/template/local10.C
new file mode 100644
index 000..9a70b846ff3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/local10.C
@@ -0,0 +1,10 @@
+// PR c++/109658
+
+template  void encode(OutputStream, int *) {
+  struct ValueBaseVisitor {
+void visit() { encodeString(); }
+void encodeString() {}
+  };
+}
+int encode_json;
+void encode_out() { encode(encode_out, _json); }

base-commit: 4657977541de1056a1cb651d6e2ba22472f62d04
-- 
2.31.1



[PATCH] MATCH: Add ABSU == 0 to a == 0 simplification

2023-05-04 Thread Andrew Pinski via Gcc-patches
There is already an `ABS == 0` to `a == 0` pattern,
this just extends that to ABSU too.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/109722

gcc/ChangeLog:

* match.pd: Extend the `ABS == 0` pattern
to cover `ABSU == 0` too.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/abs-1.c: New test.
---
 gcc/match.pd  | 11 ++-
 gcc/testsuite/gcc.dg/tree-ssa/abs-1.c | 12 
 2 files changed, 18 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/abs-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 08a4f8ebdc1..ceae1c34abc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5807,11 +5807,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (if (tem && !TREE_OVERFLOW (tem))
  (scmp @0 { tem; }))
 
-/* Convert ABS_EXPR == 0 or ABS_EXPR != 0 to x == 0 or x != 0.  */
-(for op (eq ne)
- (simplify
-  (op (abs @0) zerop@1)
-  (op @0 @1)))
+/* Convert ABS[U]_EXPR == 0 or ABS[U]_EXPR != 0 to x == 0 or x != 0.  */
+(for op (abs absu)
+ (for eqne (eq ne)
+  (simplify
+   (eqne (op @0) zerop@1)
+   (eqne @0 { build_zero_cst (TREE_TYPE (@0)); }
 
 /* From fold_sign_changed_comparison and fold_widened_comparison.
FIXME: the lack of symmetry is disturbing.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/abs-1.c
new file mode 100644
index 000..ce40403
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-1.c
@@ -0,0 +1,12 @@
+/* PR tree-optimization/109722 */
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-gimple -fdump-tree-optimized" } */
+
+int g(signed char x){
+x = x < 0 ? -x : x;
+return x == 0;
+}
+
+/* This should work even if int is 16bits. */
+/* { dg-final { scan-tree-dump "ABSU_EXPR" "gimple"} } */
+/* { dg-final { scan-tree-dump-not "ABSU_EXPR" "optimized"} } */
-- 
2.31.1



Re: [PATCH 5/5] match.pd: Use splits in makefile and make configurable.

2023-05-04 Thread Jeff Law via Gcc-patches




On 5/4/23 03:56, Tamar Christina wrote:

-Original Message-
From: Kyrylo Tkachov 
Sent: Wednesday, May 3, 2023 4:19 PM
To: Tamar Christina ; Jeff Law
; gcc-patches@gcc.gnu.org
Cc: nd ; bonz...@gnu.org; nero...@gcc.gnu.org;
aol...@gcc.gnu.org; ralf.wildenh...@gmx.de
Subject: RE: [PATCH 5/5] match.pd: Use splits in makefile and make
configurable.




-Original Message-
From: Gcc-patches  On Behalf Of Tamar
Christina via Gcc-patches
Sent: Tuesday, May 2, 2023 8:08 AM
To: Jeff Law ; gcc-patches@gcc.gnu.org
Cc: nd ; bonz...@gnu.org; nero...@gcc.gnu.org;
aol...@gcc.gnu.org; ralf.wildenh...@gmx.de
Subject: RE: [PATCH 5/5] match.pd: Use splits in makefile and make
configurable.


-Original Message-
From: Jeff Law 
Sent: Sunday, April 30, 2023 8:46 PM
To: Tamar Christina ;
gcc-patches@gcc.gnu.org
Cc: nd ; bonz...@gnu.org; nero...@gcc.gnu.org;
aol...@gcc.gnu.org; ralf.wildenh...@gmx.de
Subject: Re: [PATCH 5/5] match.pd: Use splits in makefile and make
configurable.



On 4/28/23 04:44, Tamar Christina via Gcc-patches wrote:

Hi All,

This updates the build system to split up match.pd files into chunks of

10.

This also introduces a new flag --with-matchpd-partitions which
can be used to change the number of partitions.

For the analysis of why 10 please look at the previous patch in the series.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR bootstrap/84402
* Makefile.in (NUM_MATCH_SPLITS, MATCH_SPLITS_SEQ,
GIMPLE_MATCH_PD_SEQ_SRC, GIMPLE_MATCH_PD_SEQ_O,
GENERIC_MATCH_PD_SEQ_SRC, GENERIC_MATCH_PD_SEQ_O): New.
(OBJS, MOSTLYCLEANFILES, .PRECIOUS): Use them.
(s-match): Split into s-generic-match and s-gimple-match.
* configure.ac (with-matchpd-partitions,
DEFAULT_MATCHPD_PARTITIONS): New.
* configure: Regenerate.

This looks pretty reasonable to me.  Are there any patches left in
this series that need review?  I'm very much looking forward to
build time provements related to this patch, particularly for
targets that I bootstrap with qemu emulation -- we take multiple
hours to build gimple-match and the ability to parallelize those component

builds should be a significant win.


Hi,

No this is the last one, Richi already approved the rest but he didn't
feel he had enough knowledge about the build system to say if this
code was portable enough.


I'm looking forward to this going as well for improved bootstrap times, thanks
for working on this!



So just waiting on this one and can commit the series.


Can we treat Jeff's LGTM above as an ok given his global reviewer position?


Ah I didn't treat it as such as it wasn't in reply to the "ok for master" part. 
But
perhaps I misunderstood.  In case it wasn't, this is also a PING for the *.in 
files
maintainers.
My message was a fairly ambiguous.   I just gave it another once over 
and I'll give an explicit OK for the trunk.


Jeff


[PATCH] PHIOPT: Fix diamond case of match_simplify_replacement

2023-05-04 Thread Andrew Pinski via Gcc-patches
So it turns out I messed checking which edge was true/false for the diamond
form. The edges, e0 and e1 here are edges from the merge block but the
true/false edges are from the conditional block and with diamond/threeway,
there is a bb inbetween on both edges.
Most of the time, the check that was in match_simplify_replacement would
happen to be correct for diamond form as most of the time the first edge in
the conditional is the edge for the true side of the conditional.
This is why I didn't see the issue during bootstrap/testing.

I added a fragile gimple testcase which exposed the issue. Since there is
no way to specify the order of the edges in the gimple fe, we have to
have forwprop to swap the false/true edges (not order of them, just swapping
true/false flags) and hope not to do cleanupcfg inbetween forwprop and the
first phiopt pass. This is the fragile part really, it is not that we will
produce wrong code, just we won't hit what was the failing case.

OK? Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/109732

gcc/ChangeLog:

* tree-ssa-phiopt.cc (match_simplify_replacement): Fix the selection
of the argtrue/argfalse.

gcc/testsuite/ChangeLog:

* gcc.dg/pr109732.c: New test.
---
 gcc/testsuite/gcc.dg/pr109732.c | 40 +
 gcc/tree-ssa-phiopt.cc  | 29 +---
 2 files changed, 66 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr109732.c

diff --git a/gcc/testsuite/gcc.dg/pr109732.c b/gcc/testsuite/gcc.dg/pr109732.c
new file mode 100644
index 000..d8374705cd8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr109732.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* We need to disable passes which might cause cfg cleanup */
+/* { dg-options "-O1 -fgimple -fdisable-tree-ethread -fdisable-tree-fre1" } */
+
+/* This code is done this way to have the false edge as 1st
+   successor edge of BB2. Normally the true edge would be
+   the first and you would not hit the bug.  */
+[[gnu::noipa]]
+_Bool __GIMPLE (ssa, startwith("forwprop1"))
+f3 (_Bool a)
+{
+  _Bool i;
+  _Bool tt;
+
+  __BB(2):
+  tt_4 = a_1(D) == _Literal (_Bool)0;
+  if (tt_4 != _Literal (_Bool)0)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+goto __BB5;
+
+  __BB(4):
+goto __BB5;
+
+  __BB(5):
+  i_2 = __PHI (__BB4: a_1(D), __BB3: _Literal (_Bool)0);
+
+  return i_2;
+}
+
+int main()
+{
+  if (f3(0))
+__builtin_abort();
+  if (!f3(1))
+__builtin_abort();
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 14aeaadd6f6..2fb28b4e60e 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -726,6 +726,7 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
   gimple *stmt_to_move = NULL;
   gimple *stmt_to_move_alt = NULL;
   auto_bitmap inserted_exprs;
+  tree arg_true, arg_false;
 
   /* Special case A ? B : B as this will always simplify to B. */
   if (operand_equal_for_phi_arg_p (arg0, arg1))
@@ -756,12 +757,34 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
   /* We need to know which is the true edge and which is the false
  edge so that we know when to invert the condition below.  */
   extract_true_false_edges_from_block (cond_bb, _edge, _edge);
-  if (e1 == true_edge || e0 == false_edge)
-std::swap (arg0, arg1);
+
+  /* Forward the edges over the middle basic block.  */
+  if (true_edge->dest == middle_bb)
+true_edge = EDGE_SUCC (true_edge->dest, 0);
+  if (false_edge->dest == middle_bb)
+false_edge = EDGE_SUCC (false_edge->dest, 0);
+
+  /* When THREEWAY_P then e1 will point to the edge of the final transition
+ from middle-bb to end.  */
+  if (true_edge == e0)
+{
+  if (!threeway_p)
+   gcc_assert (false_edge == e1);
+  arg_true = arg0;
+  arg_false = arg1;
+}
+  else
+{
+  gcc_assert (false_edge == e0);
+  if (!threeway_p)
+   gcc_assert (true_edge == e1);
+  arg_true = arg1;
+  arg_false = arg0;
+}
 
   tree type = TREE_TYPE (gimple_phi_result (phi));
   result = gimple_simplify_phiopt (early_p, type, stmt,
-  arg0, arg1,
+  arg_true, arg_false,
   );
   if (!result)
 return false;
-- 
2.39.1



Re: [PATCH V2, rs6000] Disable generation of scalar modulo instructions

2023-05-04 Thread Pat Haugen via Gcc-patches

Ping.

On 4/18/23 7:22 AM, Pat Haugen via Gcc-patches wrote:

Updated from prior patch to also disable for int128.


Disable generation of scalar modulo instructions.

It was recently discovered that the scalar modulo instructions can suffer
noticeable performance issues for certain input values. This patch disables
their generation since the equivalent div/mul/sub sequence does not suffer
the same problem.

Bootstrapped and regression tested on powerpc64/powerpc64le.
Ok for master and backports after burn in?

-Pat


2023-04-18  Pat Haugen  

gcc/
 * config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
 * config/rs6000/rs6000.md (mod3, *mod3): Disable.
 (define_expand umod3): New.
 (define_insn umod3): Rename to *umod3 and disable.
 (umodti3, modti3): Disable.

gcc/testsuite/
 * gcc.target/powerpc/clone1.c: Add xfails.
 * gcc.target/powerpc/clone3.c: Likewise.
 * gcc.target/powerpc/mod-1.c: Likewise.
 * gcc.target/powerpc/mod-2.c: Likewise.
 * gcc.target/powerpc/p10-vdivq-vmodq.c: Likewise.


diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..1cf0a0013c0 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2492,3 +2492,9 @@ while (0)
     rs6000_asm_output_opcode (STREAM);    \
  }    \
    while (0)
+
+/* Disable generation of scalar modulo instructions due to performance 
issues

+   with certain input values. This can be removed in the future when the
+   issues have been resolved.  */
+#define RS6000_DISABLE_SCALAR_MODULO 1
+
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 44f7dd509cb..4f397bc9179 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3421,6 +3421,17 @@ (define_expand "mod3"
  FAIL;

    operands[2] = force_reg (mode, operands[2]);
+
+  if (RS6000_DISABLE_SCALAR_MODULO)
+    {
+  temp1 = gen_reg_rtx (mode);
+  temp2 = gen_reg_rtx (mode);
+
+  emit_insn (gen_div3 (temp1, operands[1], operands[2]));
+  emit_insn (gen_mul3 (temp2, temp1, operands[2]));
+  emit_insn (gen_sub3 (operands[0], operands[1], temp2));
+  DONE;
+    }
  }
    else
  {
@@ -3440,17 +3451,42 @@ (define_insn "*mod3"
    [(set (match_operand:GPR 0 "gpc_reg_operand" "=,r")
  (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
   (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
    "mods %0,%1,%2"
    [(set_attr "type" "div")
     (set_attr "size" "")])

+;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
+;; removed.
+(define_expand "umod3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+    (umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
+  (match_operand:GPR 2 "gpc_reg_operand")))]
+  ""
+{
+  rtx temp1;
+  rtx temp2;
+
+  if (!TARGET_MODULO)
+    FAIL;

-(define_insn "umod3"
+  if (RS6000_DISABLE_SCALAR_MODULO)
+    {
+  temp1 = gen_reg_rtx (mode);
+  temp2 = gen_reg_rtx (mode);
+
+  emit_insn (gen_udiv3 (temp1, operands[1], operands[2]));
+  emit_insn (gen_mul3 (temp2, temp1, operands[2]));
+  emit_insn (gen_sub3 (operands[0], operands[1], temp2));
+  DONE;
+    }
+})
+
+(define_insn "*umod3"
    [(set (match_operand:GPR 0 "gpc_reg_operand" "=,r")
  (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
    (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
    "modu %0,%1,%2"
    [(set_attr "type" "div")
     (set_attr "size" "")])
@@ -3507,7 +3543,7 @@ (define_insn "umodti3"
    [(set (match_operand:TI 0 "altivec_register_operand" "=v")
  (umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
   (match_operand:TI 2 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_POWERPC64"
+  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
    "vmoduq %0,%1,%2"
    [(set_attr "type" "vecdiv")
     (set_attr "size" "128")])
@@ -3516,7 +3552,7 @@ (define_insn "modti3"
    [(set (match_operand:TI 0 "altivec_register_operand" "=v")
  (mod:TI (match_operand:TI 1 "altivec_register_operand" "v")
  (match_operand:TI 2 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_POWERPC64"
+  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
    "vmodsq %0,%1,%2"
    [(set_attr "type" "vecdiv")
     (set_attr "size" "128")])
diff --git a/gcc/testsuite/gcc.target/powerpc/clone1.c 
b/gcc/testsuite/gcc.target/powerpc/clone1.c

index c69fd2aa1b8..74323ca0e8c 100644
--- a/gcc/testsuite/gcc.target/powerpc/clone1.c
+++ b/gcc/testsuite/gcc.target/powerpc/clone1.c
@@ -21,6 +21,7 @@ long mod_func_or (long a, long b, long c)
    return mod_func (a, b) | c;
  }

-/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } 

Re: [PATCH][RFC] tree-optimization/104475 - bogus -Wstringop-overflow

2023-05-04 Thread Jason Merrill via Gcc-patches

On 5/4/23 09:59, Richard Biener wrote:


I've previously sent
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608077.html
adding ADDR_EXPR_NONZERO and there were comments from Jason
where I just realized I ignored ARRAY_REF for the following.
Anyway, here's a more aggressive variant not going for an extra
flag set by the frontend but instead have the middle-end treat
all &*.component as non-NULL (all handled_component_p).

This passes bootstrap for all languages, testing there isn't
complete but it already shows for example
gcc.c-torture/execute/pr44555.c explicitely testing that
we keep >z NULL when p is NULL and z is at offset zero.

There's also execute FAILs for gfortran.dg/class_optional_2.f90
and some optimization dump scan fails I did not yet investigate.

Nevertheless I'd like to hear opinions on whether a middle-end
implementation without frontend help is the way to go and
what the reasonable restrictions should be there?  Is
gcc.c-torture/execute/pr44555.c sanctioned by the C standard?
If so I think we have a lost cause without some help from
the frontend?


The relevant C++ rule is https://eel.is/c++draft/expr.ref#8

The corresponding C clause doesn't have as explicit a rule that I can 
see, I don't know what the sense of the C committee is about this.  The 
special allowance for the common initial sequence suggests such that it 
is an exception to such a rule, but I'm not sure where that rule is, 
exactly.


I imagine that not all languages are as strict about this, so an 
unconditional rule like this may not be what we want.


And as I think I commented before, this kind of assumption based on 
undefined behavior ought to have a -fsanitize=undefined check.



Thanks,
Richard.


--

The following avoids a bogus -Wstringop-overflow diagnostic by
properly recognizing that >m_mutex cannot be nullptr
even if m_mutex is at offset zero.  The C++ frontend already diagnoses
a >m_mutex != nullptr comparison and the following transfers
this knowledge to the middle-end in the most general way.

To avoid the bogus diagnostic this avoids separating the nullptr
path via jump-threading by eliminating the nullptr check.

PR tree-optimization/104475
* fold-const.cc (tree_single_nonzero_warnv_p): An ADDR_EXPR
of a component reference can never be null.

* g++.dg/opt/pr104475.C: New testcase.
---
  gcc/fold-const.cc   | 11 ++-
  gcc/testsuite/g++.dg/opt/pr104475.C | 12 
  2 files changed, 22 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/opt/pr104475.C

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index db54bfc5662..c5c923e059d 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -15368,7 +15368,16 @@ tree_single_nonzero_warnv_p (tree t, bool 
*strict_overflow_p)
tree base = TREE_OPERAND (t, 0);
  
  	if (!DECL_P (base))

- base = get_base_address (base);
+ {
+   gcc_checking_assert (TREE_CODE (base) != WITH_SIZE_EXPR);
+   /* Any component reference, even if at offset zero, requires
+  a non-null base.  */
+   if (handled_component_p (base)
+   && !targetm.addr_space.zero_address_valid
+ (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (t)
+ return true;
+   base = get_base_address (base);
+ }
  
  	if (base && TREE_CODE (base) == TARGET_EXPR)

  base = TARGET_EXPR_SLOT (base);
diff --git a/gcc/testsuite/g++.dg/opt/pr104475.C 
b/gcc/testsuite/g++.dg/opt/pr104475.C
new file mode 100644
index 000..013c70302c6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr104475.C
@@ -0,0 +1,12 @@
+// { dg-do compile }
+// { dg-require-effective-target c++11 }
+// { dg-options "-O -Waddress -fdump-tree-original" }
+
+struct X { int i; };
+
+bool foo (struct X *p)
+{
+  return >i != nullptr; /* { dg-warning "never be NULL" } */
+}
+
+/* { dg-final { scan-tree-dump "return  = 1;" "original" } } */




Re: [PATCH] c++: outer args for level-lowered ttp [PR109651]

2023-05-04 Thread Jason Merrill via Gcc-patches

On 4/28/23 08:54, Patrick Palka wrote:

On Thu, 27 Apr 2023, Patrick Palka wrote:


On Thu, Apr 27, 2023 at 4:46 PM Patrick Palka  wrote:


Now that with r14-11-g2245459c85a3f4 made us coerce the template
arguments of a bound ttp again after level-lowering, this unfortunately
causes a crash from coerce_template_args_for_ttp in the below testcase.

During the level-lowering substitution T=int into the bound ttp TT
as part of substitution into the lambda signature, current_template_parms
is just U=U rather than the ideal TT=TT, U=U.  And because we don't
consistently set DECL_CONTEXT for level-lowered ttps (it's kind of a
chicken of the egg problem in this case), we attempt to use
current_template_parms to obtain the outer arguments during
coerce_template_args_for_ttp.  But the depth 1 of c_t_p
current_template_parms is less than the depth 2 of the level-lowered TT,
and we end up segfaulting from there.

So for level-lowered ttps it seems we need to get the outer arguments a
different way -- namely, we can look at the trailing parms of its
DECL_TEMPLATE_PARMS.


Note this is not an ideal solution because TREE_CHAIN of
DECL_TEMPLATE_PARMS in this case is just "2 , 1 U", so we're
missing tparm information for the level that the ttp belongs to :/ So
the only difference compared to using current_template_parms in this
case is the extra empty level of args corresponding to the ttp's
level.


And on the other hand, this issue seems specific to lambdas because
it's in tsubst_lambda_expr that we substitute the function type _before_
substituting and installing the template parameters, which is opposite
to the typical order that tsubst_template_decl does things in.  And
that's ultimately the reason the current_template_parms fallback in
coerce_template_args_for_ttp misbehaves in this testcase.

So the following seems to be a better fix.  With it, current_template_parms
is correctly 2 TT, 1 U during substitution the lambda's function type,
which makes coerce_template_args_for_ttp happy when level lowering
the bound ttp within the function type.


OK.


-- >8 --

Subject: [PATCH] c++: bound ttp in lambda function type [PR109651]

PR c++/109651

gcc/cp/ChangeLog:

* pt.cc (tsubst_template_decl): Add default argument to
lambda_fntype parameter.  Add defaulted lambda_tparms parameter.
Prefer to use lambda_tparms instead of substituting
DECL_TEMPLATE_PARMS.
(tsubst_decl) : Adjust tsubst_template_decl
call.
(tsubst_lambda_expr): For a generic lambda, substitute
DECL_TEMPLATE_PARMS and update current_template_parms
before substituting the function type.  Pass the substituted
DECL_TEMPLATE_PARMS to tsubst_template_decl.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/lambda-generic-ttp1.C: New test.
* g++.dg/cpp2a/lambda-generic-ttp2.C: New test.
---
  gcc/cp/pt.cc  | 30 ++-
  .../g++.dg/cpp2a/lambda-generic-ttp1.C| 11 +++
  .../g++.dg/cpp2a/lambda-generic-ttp2.C| 13 
  3 files changed, 47 insertions(+), 7 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/lambda-generic-ttp1.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/lambda-generic-ttp2.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 678cb7930e3..43713d9ab72 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -14629,7 +14629,8 @@ tsubst_function_decl (tree t, tree args, tsubst_flags_t 
complain,
  
  static tree

  tsubst_template_decl (tree t, tree args, tsubst_flags_t complain,
- tree lambda_fntype)
+ tree lambda_fntype = NULL_TREE,
+ tree lambda_tparms = NULL_TREE)
  {
/* We can get here when processing a member function template,
   member class template, or template template parameter.  */
@@ -14719,8 +14720,10 @@ tsubst_template_decl (tree t, tree args, 
tsubst_flags_t complain,
auto tparm_guard = make_temp_override (current_template_parms);
DECL_TEMPLATE_PARMS (r)
  = current_template_parms
-= tsubst_template_parms (DECL_TEMPLATE_PARMS (t), args,
-complain);
+= (lambda_tparms
+   ? lambda_tparms
+   : tsubst_template_parms (DECL_TEMPLATE_PARMS (t), args,
+   complain));
  
bool class_p = false;

tree inner = decl;
@@ -14888,7 +14891,7 @@ tsubst_decl (tree t, tree args, tsubst_flags_t complain)
switch (TREE_CODE (t))
  {
  case TEMPLATE_DECL:
-  r = tsubst_template_decl (t, args, complain, /*lambda*/NULL_TREE);
+  r = tsubst_template_decl (t, args, complain);
break;
  
  case FUNCTION_DECL:

@@ -20130,12 +20133,24 @@ tsubst_lambda_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
  ? DECL_TI_TEMPLATE (oldfn)
  : NULL_TREE);
  
+  tree tparms = NULL_TREE;

+  if (oldtmpl)
+tparms = tsubst_template_parms (DECL_TEMPLATE_PARMS (oldtmpl), args, 

[PATCH] libffi: fix handling of homogeneous float128 structs [PR109447]

2023-05-04 Thread Peter Bergner via Gcc-patches
I'd like to pull in Dan's upstream libffi commit into trunk to fix a
wrong code bug/testsuite failure on powerpc64le-linux with long double
defaulting to ieee128.  This passed bootstrap and regtesting with no
regressions.  Ok for trunk?

This bug is also on the GCC 12 and GCC 11 release branches. Ok there too
assuming testing is clean?  I can wait to push the gcc12 backport until
after the release.

Peter


If there is a homogeneous struct with float128 members, they should be
copied to vector register save area. The current code incorrectly copies
only the value of the first member, not increasing the pointer with each
iteration. Fix this.

Merged from upstream libffi commit: 464b4b66e3cf3b5489e730c1466ee1bf825560e0

2023-05-03  Dan Horák 

libffi/
PR libffi/109447
* src/powerpc/ffi_linux64.c (ffi_prep_args64): Update arg.f128 pointer.

diff --git a/libffi/src/powerpc/ffi_linux64.c b/libffi/src/powerpc/ffi_linux64.c
index 4d50878e402..3454dacd3d6 100644
--- a/libffi/src/powerpc/ffi_linux64.c
+++ b/libffi/src/powerpc/ffi_linux64.c
@@ -680,7 +680,7 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const 
stack)
 {
   if (vecarg_count < NUM_VEC_ARG_REGISTERS64
   && i < nfixedargs)
-   memcpy (vec_base.f128++, arg.f128, sizeof (float128));
+   memcpy (vec_base.f128++, arg.f128++, sizeof (float128));
   else
memcpy (next_arg.f128, arg.f128++, sizeof (float128));
   if (++next_arg.f128 == gpr_end.f128)



[PATCH] i386: Tighten ashift to lea splitter operand predicates [PR109733]

2023-05-04 Thread Uros Bizjak via Gcc-patches
The predicates of ashift to lea post-reload splitter were too broad
so the splitter tried to convert the mask shift instruction.  Tighten
operand predicates to match only general registers.

gcc/ChangeLog:

PR target/109733
* config/i386/predicates.md (index_reg_operand): New predicate.
* config/i386/i386.md (ashift to lea splitter): Use
general_reg_operand and index_reg_operand predicates.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d49f1cdc3fe..63207fc9305 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -13331,8 +13331,8 @@ (define_insn_and_split "*ashl3_1_slp"
 
 ;; Convert ashift to the lea pattern to avoid flags dependency.
 (define_split
-  [(set (match_operand:SWI 0 "register_operand")
-   (ashift:SWI (match_operand:SWI 1 "index_register_operand")
+  [(set (match_operand:SWI 0 "general_reg_operand")
+   (ashift:SWI (match_operand:SWI 1 "index_reg_operand")
(match_operand 2 "const_0_to_3_operand")))
(clobber (reg:CC FLAGS_REG))]
   "reload_completed
@@ -13350,9 +13350,9 @@ (define_split
 
 ;; Convert ashift to the lea pattern to avoid flags dependency.
 (define_split
-  [(set (match_operand:DI 0 "register_operand")
+  [(set (match_operand:DI 0 "general_reg_operand")
(zero_extend:DI
- (ashift:SI (match_operand:SI 1 "index_register_operand")
+ (ashift:SI (match_operand:SI 1 "index_reg_operand")
 (match_operand 2 "const_0_to_3_operand"
(clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && reload_completed
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 878b144b0fb..362266e1f6c 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -32,6 +32,11 @@ (define_predicate "general_reg_operand"
   (and (match_code "reg")
(match_test "GENERAL_REGNO_P (REGNO (op))")))
 
+;; True if the operand is an INDEX class register.
+(define_predicate "index_reg_operand"
+  (and (match_code "reg")
+   (match_test "INDEX_REGNO_P (REGNO (op))")))
+
 ;; True if the operand is a nonimmediate operand with GENERAL class register.
 (define_predicate "nonimmediate_gr_operand"
   (if_then_else (match_code "reg")


Re: [PATCH] c++: some assorted code improvements

2023-05-04 Thread Jason Merrill via Gcc-patches

On 5/4/23 12:33, Patrick Palka wrote:

* Harden some tree accessor macros and fix some incorrect uses of
   PLACEHOLDER_TYPE_CONSTRAINTS.
* Use strip_innermost_template_args in outer_template_args.
* Add !processing_template_decl early exit tests to some dependence
   predicates.


OK.


gcc/cp/ChangeLog:

* cp-tree.h (PLACEHOLDER_TYPE_CONSTRAINTS_INFO): Use
TEMPLATE_TYPE_PARM_CHECK.
(TPARMS_PRIMARY_TEMPLATE): Use TREE_VEC_CHECK.
(TEMPLATE_TEMPLATE_PARM_TEMPLATE_DECL): Use
TEMPLATE_TEMPLATE_PARM_CHECK.
* cxx-pretty-print.cc (cxx_pretty_printer::simple_type_specifier):
Only use PLACEHOLDER_TYPE_CONSTRAINTS on TEMPLATE_TYPE_PARM.
* error.cc (dump_type) : Use separate
variable for CLASS_PLACEHOLDER_TEMPLATE result.
* pt.cc (outer_template_args): Use strip_innermost_template_args.
(any_type_dependent_arguments_p): Return false if
!processing_template_decl.  Use range-based for.
(any_dependent_template_arguments_p): Likewise.
---
  gcc/cp/cp-tree.h   |  6 +++---
  gcc/cp/cxx-pretty-print.cc |  5 +++--
  gcc/cp/error.cc|  4 ++--
  gcc/cp/pt.cc   | 30 --
  4 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c9c4cd6f32f..a02461481a2 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -1636,7 +1636,7 @@ check_constraint_info (tree t)
 holds the set of template parameters that were in-scope when this 'auto'
 was formed.  */
  #define PLACEHOLDER_TYPE_CONSTRAINTS_INFO(NODE) \
-  DECL_SIZE_UNIT (TYPE_NAME (NODE))
+  DECL_SIZE_UNIT (TYPE_NAME (TEMPLATE_TYPE_PARM_CHECK (NODE)))
  
  /* The constraints on the 'auto' placeholder type NODE.  */

  #define PLACEHOLDER_TYPE_CONSTRAINTS(NODE)   \
@@ -5084,7 +5084,7 @@ get_vec_init_expr (tree t)
 templates are primary, too.  */
  
  /* Returns the primary template corresponding to these parameters.  */

-#define TPARMS_PRIMARY_TEMPLATE(NODE) (TREE_TYPE (NODE))
+#define TPARMS_PRIMARY_TEMPLATE(NODE) (TREE_TYPE (TREE_VEC_CHECK (NODE)))
  
  #define DECL_PRIMARY_TEMPLATE(NODE) \

(TPARMS_PRIMARY_TEMPLATE (DECL_INNERMOST_TEMPLATE_PARMS (NODE)))
@@ -6098,7 +6098,7 @@ const unsigned int STF_STRIP_DEPENDENT = 1U << 1;
  #define TEMPLATE_TEMPLATE_PARM_TEMPLATE_DECL(NODE)\
((TREE_CODE (NODE) == BOUND_TEMPLATE_TEMPLATE_PARM) \
 ? TYPE_TI_TEMPLATE (NODE)  \
-   : TYPE_NAME (NODE))
+   : TYPE_NAME (TEMPLATE_TEMPLATE_PARM_CHECK (NODE)))
  
  /* in lex.cc  */
  
diff --git a/gcc/cp/cxx-pretty-print.cc b/gcc/cp/cxx-pretty-print.cc

index 4cda27f2b30..950295effc6 100644
--- a/gcc/cp/cxx-pretty-print.cc
+++ b/gcc/cp/cxx-pretty-print.cc
@@ -1364,8 +1364,9 @@ cxx_pretty_printer::simple_type_specifier (tree t)
  case TEMPLATE_PARM_INDEX:
  case BOUND_TEMPLATE_TEMPLATE_PARM:
pp_cxx_unqualified_id (this, t);
-  if (tree c = PLACEHOLDER_TYPE_CONSTRAINTS (t))
-pp_cxx_constrained_type_spec (this, c);
+  if (TREE_CODE (t) == TEMPLATE_TYPE_PARM)
+   if (tree c = PLACEHOLDER_TYPE_CONSTRAINTS (t))
+ pp_cxx_constrained_type_spec (this, c);
break;
  
  case TYPENAME_TYPE:

diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc
index a5d888926a6..1cfa4f1a240 100644
--- a/gcc/cp/error.cc
+++ b/gcc/cp/error.cc
@@ -639,8 +639,8 @@ dump_type (cxx_pretty_printer *pp, tree t, int flags)
pp_cxx_cv_qualifier_seq (pp, t);
if (template_placeholder_p (t))
{
- t = TREE_TYPE (CLASS_PLACEHOLDER_TEMPLATE (t));
- pp_cxx_tree_identifier (pp, TYPE_IDENTIFIER (t));
+ tree tmpl = TREE_TYPE (CLASS_PLACEHOLDER_TEMPLATE (t));
+ pp_cxx_tree_identifier (pp, TYPE_IDENTIFIER (tmpl));
  pp_string (pp, "<...auto...>");
}
else if (TYPE_IDENTIFIER (t))
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 3f1cf139bbd..e62cca38195 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -4982,9 +4982,7 @@ outer_template_args (tree tmpl)
  return args;
if (TMPL_ARGS_DEPTH (args) == 1)
  return NULL_TREE;
-  args = copy_node (args);
-  --TREE_VEC_LENGTH (args);
-  return args;
+  return strip_innermost_template_args (args, 1);
  }
  
  /* Update the declared TYPE by doing any lookups which were thought to be

@@ -28635,14 +28633,13 @@ type_dependent_expression_p_push (tree expr)
  bool
  any_type_dependent_arguments_p (const vec *args)
  {
-  unsigned int i;
-  tree arg;
+  if (!processing_template_decl || !args)
+return false;
+
+  for (tree arg : *args)
+if (type_dependent_expression_p (arg))
+  return true;
  
-  FOR_EACH_VEC_SAFE_ELT (args, i, arg)

-{
-  if (type_dependent_expression_p (arg))
-   return true;
-}
return false;
  }
  
@@ -28805,19 +28802,16 @@ any_template_arguments_need_structural_equality_p (tree args)

  bool
  any_dependent_template_arguments_p (const_tree args)
  {
-  int i;
-  

Re: [PATCH] c++: fix pretty printing of 'alignof' vs '__alignof__' [PR85979]

2023-05-04 Thread Jason Merrill via Gcc-patches

On 5/4/23 12:33, Patrick Palka wrote:

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?


OK.


PR c++/85979

gcc/cp/ChangeLog:

* cxx-pretty-print.cc (cxx_pretty_printer::unary_expression)
: Consider ALIGNOF_EXPR_STD_P.
* error.cc (dump_expr) : Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/diagnostic/alignof4.C: New test.
---
  gcc/cp/cxx-pretty-print.cc |  7 ++-
  gcc/cp/error.cc|  7 +++
  gcc/testsuite/g++.dg/diagnostic/alignof4.C | 21 +
  3 files changed, 30 insertions(+), 5 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/diagnostic/alignof4.C

diff --git a/gcc/cp/cxx-pretty-print.cc b/gcc/cp/cxx-pretty-print.cc
index 4cda27f2b30..4e9de3eff87 100644
--- a/gcc/cp/cxx-pretty-print.cc
+++ b/gcc/cp/cxx-pretty-print.cc
@@ -844,7 +844,12 @@ cxx_pretty_printer::unary_expression (tree t)
/* Fall through  */
  
  case ALIGNOF_EXPR:

-  pp_cxx_ws_string (this, code == SIZEOF_EXPR ? "sizeof" : "__alignof__");
+  if (code == SIZEOF_EXPR)
+   pp_cxx_ws_string (this, "sizeof");
+  else if (ALIGNOF_EXPR_STD_P (t))
+   pp_cxx_ws_string (this, "alignof");
+  else
+   pp_cxx_ws_string (this, "__alignof__");
pp_cxx_whitespace (this);
if (TREE_CODE (t) == SIZEOF_EXPR && SIZEOF_EXPR_TYPE_P (t))
{
diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc
index a5d888926a6..7865f6518fc 100644
--- a/gcc/cp/error.cc
+++ b/gcc/cp/error.cc
@@ -2840,11 +2840,10 @@ dump_expr (cxx_pretty_printer *pp, tree t, int flags)
  case ALIGNOF_EXPR:
if (TREE_CODE (t) == SIZEOF_EXPR)
pp_cxx_ws_string (pp, "sizeof");
+  else if (ALIGNOF_EXPR_STD_P (t))
+   pp_cxx_ws_string (pp, "alignof");
else
-   {
- gcc_assert (TREE_CODE (t) == ALIGNOF_EXPR);
- pp_cxx_ws_string (pp, "__alignof__");
-   }
+   pp_cxx_ws_string (pp, "__alignof__");
op = TREE_OPERAND (t, 0);
if (PACK_EXPANSION_P (op))
{
diff --git a/gcc/testsuite/g++.dg/diagnostic/alignof4.C 
b/gcc/testsuite/g++.dg/diagnostic/alignof4.C
new file mode 100644
index 000..f6fc5c31563
--- /dev/null
+++ b/gcc/testsuite/g++.dg/diagnostic/alignof4.C
@@ -0,0 +1,21 @@
+// PR c++/85979
+// { dg-do compile { target c++11 } }
+
+template struct A { };
+
+template
+void f(A) { }
+
+#if __cpp_concepts
+template
+void g() requires (alignof(T) == 0);
+#endif
+
+int main() {
+  f(); // { dg-error "no match" }
+#if __cpp_concepts
+  g(); // { dg-error "no match" "" { target c++20 } }
+#endif
+}
+
+// { dg-bogus "__alignof__" "" { target *-*-* } 0 }




[PATCH] RISC-V: Fix CTZ unnecessary sign extension [PR #106888]

2023-05-04 Thread Raphael Moreira Zinsly
We were not able to match the CTZ sign extend pattern on RISC-V
because it get optimized to zero extend and/or to ANDI patterns.
For the ANDI case, combine scrambles the RTL and generates the
extension by using subregs.

gcc/ChangeLog:
PR target/106888
* config/riscv/bitmanip.md
(disi2): Match with any_extend.
(disi2_sext): New pattern to match
with sign extend using an ANDI instruction.

gcc/testsuite/ChangeLog:
PR target/106888
* gcc.target/riscv/pr106888.c: New test.
* gcc.target/riscv/zbbw.c: Check for ANDI.
---
 gcc/config/riscv/bitmanip.md  | 14 +-
 gcc/testsuite/gcc.target/riscv/pr106888.c | 12 
 gcc/testsuite/gcc.target/riscv/zbbw.c |  1 +
 3 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr106888.c

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index a27fc3e34a1..8dc3e85a338 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -246,13 +246,25 @@
 
 (define_insn "*disi2"
   [(set (match_operand:DI 0 "register_operand" "=r")
-(sign_extend:DI
+(any_extend:DI
   (clz_ctz_pcnt:SI (match_operand:SI 1 "register_operand" "r"]
   "TARGET_64BIT && TARGET_ZBB"
   "w\t%0,%1"
   [(set_attr "type" "")
(set_attr "mode" "SI")])
 
+;; A SImode clz_ctz_pcnt may be extended to DImode via subreg.
+(define_insn "*disi2_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(and:DI (subreg:DI
+  (clz_ctz_pcnt:SI (subreg:SI
+ (match_operand:DI 1 "register_operand" "r") 0)) 0)
+  (match_operand:DI 2 "const_int_operand")))]
+  "TARGET_64BIT && TARGET_ZBB && ((INTVAL (operands[2]) & 0x3f) == 0x3f)"
+  "w\t%0,%1"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
 (define_insn "*di2"
   [(set (match_operand:DI 0 "register_operand" "=r")
 (clz_ctz_pcnt:DI (match_operand:DI 1 "register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/riscv/pr106888.c 
b/gcc/testsuite/gcc.target/riscv/pr106888.c
new file mode 100644
index 000..77fb8e5b79c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr106888.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
+
+int
+ctz (int i)
+{
+  int res = __builtin_ctz (i);
+  return res&0x;
+}
+
+/* { dg-final { scan-assembler-times "ctzw" 1 } } */
+/* { dg-final { scan-assembler-not "andi" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbbw.c 
b/gcc/testsuite/gcc.target/riscv/zbbw.c
index 709743c3b68..f7b2b63853f 100644
--- a/gcc/testsuite/gcc.target/riscv/zbbw.c
+++ b/gcc/testsuite/gcc.target/riscv/zbbw.c
@@ -23,3 +23,4 @@ popcount (int i)
 /* { dg-final { scan-assembler-times "clzw" 1 } } */
 /* { dg-final { scan-assembler-times "ctzw" 1 } } */
 /* { dg-final { scan-assembler-times "cpopw" 1 } } */
+/* { dg-final { scan-assembler-not "andi\t" } } */
-- 
2.40.0



[PATCH] RISC-V: Add bext pattern for ZBS

2023-05-04 Thread Raphael Moreira Zinsly
When (a & (1 << bit_no)) is tested inside an IF we can use a bit extract.

gcc/ChangeLog:

* config/riscv/bitmanip.md
(bext): Rename one to avoid name clash.
(branch_bext): New split pattern.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/zbs-bext-02.c: New test.
---
 gcc/config/riscv/bitmanip.md | 24 +++-
 gcc/testsuite/gcc.target/riscv/zbs-bext-02.c | 18 +++
 2 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbs-bext-02.c

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index a27fc3e34a1..e29e2d1fa53 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -595,7 +595,7 @@
 ;; When performing `(a & (1UL << bitno)) ? 0 : -1` the combiner
 ;; usually has the `bitno` typed as X-mode (i.e. no further
 ;; zero-extension is performed around the bitno).
-(define_insn "*bext"
+(define_insn "*bext_2"
   [(set (match_operand:X 0 "register_operand" "=r")
(zero_extract:X (match_operand:X 1 "register_operand" "r")
(const_int 1)
@@ -720,6 +720,28 @@
operands[9] = GEN_INT (clearbit);
 })
 
+;; IF_THEN_ELSE: test for (a & (1 << BIT_NO))
+(define_insn_and_split "*branch_bext"
+  [(set (pc)
+   (if_then_else
+ (match_operator 1 "equality_operator"
+[(zero_extract:X (match_operand:X 2 "register_operand" "r")
+(const_int 1)
+(zero_extend:X (match_operand:QI 3 "register_operand" "r")))
+   (const_int 0)])
+(label_ref (match_operand 0 "" ""))
+(pc)))
+   (clobber (match_scratch:X 4 "="))]
+  "TARGET_ZBS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (zero_extract:X (match_dup 2)
+ (const_int 1)
+ (zero_extend:X (match_dup 3
+   (set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)])
+  (label_ref (match_dup 0))
+  (pc)))])
+
 ;; ZBKC or ZBC extension
 (define_insn "riscv_clmul_"
   [(set (match_operand:X 0 "register_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-bext-02.c 
b/gcc/testsuite/gcc.target/riscv/zbs-bext-02.c
new file mode 100644
index 000..3f3b8404eca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-bext-02.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-funroll-loops" } } */
+
+int
+foo(const long long B, int a)
+{
+  long long b = 1;
+  for (int sq = 0; sq < 64; sq++)
+if (B & (b << sq)) 
+  a++;
+
+  return a;
+}
+
+/* { dg-final { scan-assembler-times "bext\t" 1 } } */
+/* { dg-final { scan-assembler-not "bset" } } */
+/* { dg-final { scan-assembler-not "and" } } */
-- 
2.40.0



[PATCH] c++: fix pretty printing of 'alignof' vs '__alignof__' [PR85979]

2023-05-04 Thread Patrick Palka via Gcc-patches
Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/85979

gcc/cp/ChangeLog:

* cxx-pretty-print.cc (cxx_pretty_printer::unary_expression)
: Consider ALIGNOF_EXPR_STD_P.
* error.cc (dump_expr) : Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/diagnostic/alignof4.C: New test.
---
 gcc/cp/cxx-pretty-print.cc |  7 ++-
 gcc/cp/error.cc|  7 +++
 gcc/testsuite/g++.dg/diagnostic/alignof4.C | 21 +
 3 files changed, 30 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/diagnostic/alignof4.C

diff --git a/gcc/cp/cxx-pretty-print.cc b/gcc/cp/cxx-pretty-print.cc
index 4cda27f2b30..4e9de3eff87 100644
--- a/gcc/cp/cxx-pretty-print.cc
+++ b/gcc/cp/cxx-pretty-print.cc
@@ -844,7 +844,12 @@ cxx_pretty_printer::unary_expression (tree t)
   /* Fall through  */
 
 case ALIGNOF_EXPR:
-  pp_cxx_ws_string (this, code == SIZEOF_EXPR ? "sizeof" : "__alignof__");
+  if (code == SIZEOF_EXPR)
+   pp_cxx_ws_string (this, "sizeof");
+  else if (ALIGNOF_EXPR_STD_P (t))
+   pp_cxx_ws_string (this, "alignof");
+  else
+   pp_cxx_ws_string (this, "__alignof__");
   pp_cxx_whitespace (this);
   if (TREE_CODE (t) == SIZEOF_EXPR && SIZEOF_EXPR_TYPE_P (t))
{
diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc
index a5d888926a6..7865f6518fc 100644
--- a/gcc/cp/error.cc
+++ b/gcc/cp/error.cc
@@ -2840,11 +2840,10 @@ dump_expr (cxx_pretty_printer *pp, tree t, int flags)
 case ALIGNOF_EXPR:
   if (TREE_CODE (t) == SIZEOF_EXPR)
pp_cxx_ws_string (pp, "sizeof");
+  else if (ALIGNOF_EXPR_STD_P (t))
+   pp_cxx_ws_string (pp, "alignof");
   else
-   {
- gcc_assert (TREE_CODE (t) == ALIGNOF_EXPR);
- pp_cxx_ws_string (pp, "__alignof__");
-   }
+   pp_cxx_ws_string (pp, "__alignof__");
   op = TREE_OPERAND (t, 0);
   if (PACK_EXPANSION_P (op))
{
diff --git a/gcc/testsuite/g++.dg/diagnostic/alignof4.C 
b/gcc/testsuite/g++.dg/diagnostic/alignof4.C
new file mode 100644
index 000..f6fc5c31563
--- /dev/null
+++ b/gcc/testsuite/g++.dg/diagnostic/alignof4.C
@@ -0,0 +1,21 @@
+// PR c++/85979
+// { dg-do compile { target c++11 } }
+
+template struct A { };
+
+template
+void f(A) { }
+
+#if __cpp_concepts
+template
+void g() requires (alignof(T) == 0);
+#endif
+
+int main() {
+  f(); // { dg-error "no match" }
+#if __cpp_concepts
+  g(); // { dg-error "no match" "" { target c++20 } }
+#endif
+}
+
+// { dg-bogus "__alignof__" "" { target *-*-* } 0 }
-- 
2.40.1.476.g69c786637d



[PATCH] c++: some assorted code improvements

2023-05-04 Thread Patrick Palka via Gcc-patches
* Harden some tree accessor macros and fix some incorrect uses of
  PLACEHOLDER_TYPE_CONSTRAINTS.
* Use strip_innermost_template_args in outer_template_args.
* Add !processing_template_decl early exit tests to some dependence
  predicates.

gcc/cp/ChangeLog:

* cp-tree.h (PLACEHOLDER_TYPE_CONSTRAINTS_INFO): Use
TEMPLATE_TYPE_PARM_CHECK.
(TPARMS_PRIMARY_TEMPLATE): Use TREE_VEC_CHECK.
(TEMPLATE_TEMPLATE_PARM_TEMPLATE_DECL): Use
TEMPLATE_TEMPLATE_PARM_CHECK.
* cxx-pretty-print.cc (cxx_pretty_printer::simple_type_specifier):
Only use PLACEHOLDER_TYPE_CONSTRAINTS on TEMPLATE_TYPE_PARM.
* error.cc (dump_type) : Use separate
variable for CLASS_PLACEHOLDER_TEMPLATE result.
* pt.cc (outer_template_args): Use strip_innermost_template_args.
(any_type_dependent_arguments_p): Return false if
!processing_template_decl.  Use range-based for.
(any_dependent_template_arguments_p): Likewise.
---
 gcc/cp/cp-tree.h   |  6 +++---
 gcc/cp/cxx-pretty-print.cc |  5 +++--
 gcc/cp/error.cc|  4 ++--
 gcc/cp/pt.cc   | 30 --
 4 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c9c4cd6f32f..a02461481a2 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -1636,7 +1636,7 @@ check_constraint_info (tree t)
holds the set of template parameters that were in-scope when this 'auto'
was formed.  */
 #define PLACEHOLDER_TYPE_CONSTRAINTS_INFO(NODE) \
-  DECL_SIZE_UNIT (TYPE_NAME (NODE))
+  DECL_SIZE_UNIT (TYPE_NAME (TEMPLATE_TYPE_PARM_CHECK (NODE)))
 
 /* The constraints on the 'auto' placeholder type NODE.  */
 #define PLACEHOLDER_TYPE_CONSTRAINTS(NODE)\
@@ -5084,7 +5084,7 @@ get_vec_init_expr (tree t)
templates are primary, too.  */
 
 /* Returns the primary template corresponding to these parameters.  */
-#define TPARMS_PRIMARY_TEMPLATE(NODE) (TREE_TYPE (NODE))
+#define TPARMS_PRIMARY_TEMPLATE(NODE) (TREE_TYPE (TREE_VEC_CHECK (NODE)))
 
 #define DECL_PRIMARY_TEMPLATE(NODE) \
   (TPARMS_PRIMARY_TEMPLATE (DECL_INNERMOST_TEMPLATE_PARMS (NODE)))
@@ -6098,7 +6098,7 @@ const unsigned int STF_STRIP_DEPENDENT = 1U << 1;
 #define TEMPLATE_TEMPLATE_PARM_TEMPLATE_DECL(NODE) \
   ((TREE_CODE (NODE) == BOUND_TEMPLATE_TEMPLATE_PARM)  \
? TYPE_TI_TEMPLATE (NODE)   \
-   : TYPE_NAME (NODE))
+   : TYPE_NAME (TEMPLATE_TEMPLATE_PARM_CHECK (NODE)))
 
 /* in lex.cc  */
 
diff --git a/gcc/cp/cxx-pretty-print.cc b/gcc/cp/cxx-pretty-print.cc
index 4cda27f2b30..950295effc6 100644
--- a/gcc/cp/cxx-pretty-print.cc
+++ b/gcc/cp/cxx-pretty-print.cc
@@ -1364,8 +1364,9 @@ cxx_pretty_printer::simple_type_specifier (tree t)
 case TEMPLATE_PARM_INDEX:
 case BOUND_TEMPLATE_TEMPLATE_PARM:
   pp_cxx_unqualified_id (this, t);
-  if (tree c = PLACEHOLDER_TYPE_CONSTRAINTS (t))
-pp_cxx_constrained_type_spec (this, c);
+  if (TREE_CODE (t) == TEMPLATE_TYPE_PARM)
+   if (tree c = PLACEHOLDER_TYPE_CONSTRAINTS (t))
+ pp_cxx_constrained_type_spec (this, c);
   break;
 
 case TYPENAME_TYPE:
diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc
index a5d888926a6..1cfa4f1a240 100644
--- a/gcc/cp/error.cc
+++ b/gcc/cp/error.cc
@@ -639,8 +639,8 @@ dump_type (cxx_pretty_printer *pp, tree t, int flags)
   pp_cxx_cv_qualifier_seq (pp, t);
   if (template_placeholder_p (t))
{
- t = TREE_TYPE (CLASS_PLACEHOLDER_TEMPLATE (t));
- pp_cxx_tree_identifier (pp, TYPE_IDENTIFIER (t));
+ tree tmpl = TREE_TYPE (CLASS_PLACEHOLDER_TEMPLATE (t));
+ pp_cxx_tree_identifier (pp, TYPE_IDENTIFIER (tmpl));
  pp_string (pp, "<...auto...>");
}
   else if (TYPE_IDENTIFIER (t))
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 3f1cf139bbd..e62cca38195 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -4982,9 +4982,7 @@ outer_template_args (tree tmpl)
 return args;
   if (TMPL_ARGS_DEPTH (args) == 1)
 return NULL_TREE;
-  args = copy_node (args);
-  --TREE_VEC_LENGTH (args);
-  return args;
+  return strip_innermost_template_args (args, 1);
 }
 
 /* Update the declared TYPE by doing any lookups which were thought to be
@@ -28635,14 +28633,13 @@ type_dependent_expression_p_push (tree expr)
 bool
 any_type_dependent_arguments_p (const vec *args)
 {
-  unsigned int i;
-  tree arg;
+  if (!processing_template_decl || !args)
+return false;
+
+  for (tree arg : *args)
+if (type_dependent_expression_p (arg))
+  return true;
 
-  FOR_EACH_VEC_SAFE_ELT (args, i, arg)
-{
-  if (type_dependent_expression_p (arg))
-   return true;
-}
   return false;
 }
 
@@ -28805,19 +28802,16 @@ any_template_arguments_need_structural_equality_p 
(tree args)
 bool
 any_dependent_template_arguments_p (const_tree args)
 {
-  int i;
-  int j;
-
-  if (!args)
-return false;
   if (args == error_mark_node)
 return true;
+  if 

Re: [PATCH] c++: outer args for level-lowered ttp [PR109651]

2023-05-04 Thread Patrick Palka via Gcc-patches
On Fri, Apr 28, 2023 at 8:54 AM Patrick Palka  wrote:
>
> On Thu, 27 Apr 2023, Patrick Palka wrote:
>
> > On Thu, Apr 27, 2023 at 4:46 PM Patrick Palka  wrote:
> > >
> > > Now that with r14-11-g2245459c85a3f4 made us coerce the template
> > > arguments of a bound ttp again after level-lowering, this unfortunately
> > > causes a crash from coerce_template_args_for_ttp in the below testcase.
> > >
> > > During the level-lowering substitution T=int into the bound ttp TT
> > > as part of substitution into the lambda signature, current_template_parms
> > > is just U=U rather than the ideal TT=TT, U=U.  And because we don't
> > > consistently set DECL_CONTEXT for level-lowered ttps (it's kind of a
> > > chicken of the egg problem in this case), we attempt to use
> > > current_template_parms to obtain the outer arguments during
> > > coerce_template_args_for_ttp.  But the depth 1 of c_t_p
> > > current_template_parms is less than the depth 2 of the level-lowered TT,
> > > and we end up segfaulting from there.
> > >
> > > So for level-lowered ttps it seems we need to get the outer arguments a
> > > different way -- namely, we can look at the trailing parms of its
> > > DECL_TEMPLATE_PARMS.
> >
> > Note this is not an ideal solution because TREE_CHAIN of
> > DECL_TEMPLATE_PARMS in this case is just "2 , 1 U", so we're
> > missing tparm information for the level that the ttp belongs to :/ So
> > the only difference compared to using current_template_parms in this
> > case is the extra empty level of args corresponding to the ttp's
> > level.
>
> And on the other hand, this issue seems specific to lambdas because
> it's in tsubst_lambda_expr that we substitute the function type _before_
> substituting and installing the template parameters, which is opposite
> to the typical order that tsubst_template_decl does things in.  And
> that's ultimately the reason the current_template_parms fallback in
> coerce_template_args_for_ttp misbehaves in this testcase.
>
> So the following seems to be a better fix.  With it, current_template_parms
> is correctly 2 TT, 1 U during substitution the lambda's function type,
> which makes coerce_template_args_for_ttp happy when level lowering
> the bound ttp within the function type.

Ping.

>
> -- >8 --
>
> Subject: [PATCH] c++: bound ttp in lambda function type [PR109651]
>
> PR c++/109651
>
> gcc/cp/ChangeLog:
>
> * pt.cc (tsubst_template_decl): Add default argument to
> lambda_fntype parameter.  Add defaulted lambda_tparms parameter.
> Prefer to use lambda_tparms instead of substituting
> DECL_TEMPLATE_PARMS.
> (tsubst_decl) : Adjust tsubst_template_decl
> call.
> (tsubst_lambda_expr): For a generic lambda, substitute
> DECL_TEMPLATE_PARMS and update current_template_parms
> before substituting the function type.  Pass the substituted
> DECL_TEMPLATE_PARMS to tsubst_template_decl.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp2a/lambda-generic-ttp1.C: New test.
> * g++.dg/cpp2a/lambda-generic-ttp2.C: New test.
> ---
>  gcc/cp/pt.cc  | 30 ++-
>  .../g++.dg/cpp2a/lambda-generic-ttp1.C| 11 +++
>  .../g++.dg/cpp2a/lambda-generic-ttp2.C| 13 
>  3 files changed, 47 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp2a/lambda-generic-ttp1.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp2a/lambda-generic-ttp2.C
>
> diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> index 678cb7930e3..43713d9ab72 100644
> --- a/gcc/cp/pt.cc
> +++ b/gcc/cp/pt.cc
> @@ -14629,7 +14629,8 @@ tsubst_function_decl (tree t, tree args, 
> tsubst_flags_t complain,
>
>  static tree
>  tsubst_template_decl (tree t, tree args, tsubst_flags_t complain,
> - tree lambda_fntype)
> + tree lambda_fntype = NULL_TREE,
> + tree lambda_tparms = NULL_TREE)
>  {
>/* We can get here when processing a member function template,
>   member class template, or template template parameter.  */
> @@ -14719,8 +14720,10 @@ tsubst_template_decl (tree t, tree args, 
> tsubst_flags_t complain,
>auto tparm_guard = make_temp_override (current_template_parms);
>DECL_TEMPLATE_PARMS (r)
>  = current_template_parms
> -= tsubst_template_parms (DECL_TEMPLATE_PARMS (t), args,
> -complain);
> += (lambda_tparms
> +   ? lambda_tparms
> +   : tsubst_template_parms (DECL_TEMPLATE_PARMS (t), args,
> +   complain));
>
>bool class_p = false;
>tree inner = decl;
> @@ -14888,7 +14891,7 @@ tsubst_decl (tree t, tree args, tsubst_flags_t 
> complain)
>switch (TREE_CODE (t))
>  {
>  case TEMPLATE_DECL:
> -  r = tsubst_template_decl (t, args, complain, /*lambda*/NULL_TREE);
> +  r = tsubst_template_decl (t, args, complain);
>break;
>
>  case FUNCTION_DECL:
> @@ -20130,12 +20133,24 @@ 

Re: [wwwdocs] gcc-13: Mention new gcov feature

2023-05-04 Thread Sebastian Huber

On 26.04.23 08:10, Sebastian Huber wrote:

---
  htdocs/gcc-13/changes.html | 5 +
  1 file changed, 5 insertions(+)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 4515a6af..bae65219 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -154,6 +154,11 @@ a work-in-progress.
  
  where the json-prefixed variants refer to GCC's own JSON 
diagnostic format.

+  
+Support for profiling and test coverage in freestanding environments has
+been added, see also
+https://gcc.gnu.org/onlinedocs/gcc/Freestanding-Environments.html;>Profiling and 
Test Coverage in Freestanding Environments.
+  
  


Yes, no, maybe?

--
embedded brains GmbH
Herr Sebastian HUBER
Dornierstr. 4
82178 Puchheim
Germany
email: sebastian.hu...@embedded-brains.de
phone: +49-89-18 94 741 - 16
fax:   +49-89-18 94 741 - 08

Registergericht: Amtsgericht München
Registernummer: HRB 157899
Vertretungsberechtigte Geschäftsführer: Peter Rasmussen, Thomas Dörfler
Unsere Datenschutzerklärung finden Sie hier:
https://embedded-brains.de/datenschutzerklaerung/


Re: [PATCH 1/3] Refactor to allow internal_fn's

2023-05-04 Thread Andre Vieira (lists) via Gcc-patches




On 03/05/2023 12:55, Richard Biener wrote:

On Fri, 28 Apr 2023, Andre Vieira (lists) wrote:


Hi,

I'm posting the patches separately now with ChangeLogs.

I made the suggested changes and tried to simplify the code a bit further.
Where internal to tree-vect-stmts I changed most functions to use code_helper
to avoid having to check at places we didn't need to. I was trying to simplify
things further by also modifying supportable_half_widening_operation and
supportable_convert_operation but the result of that was that I ended up
moving the code to cast to tree code inside them rather than at the call site
and it didn't look simpler, so I left those. Though if we did make those
changes we'd no longer need to keep around the tc1 variable in
vectorizable_conversion... Let me know what you think.


I see that

-  else if (CONVERT_EXPR_CODE_P (code)
+  else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())

is convenient (as much as I dislike safe_as_tree_code).  Isn't
the following

-  if (!CONVERT_EXPR_CODE_P (code))
+  if (!CONVERT_EXPR_CODE_P ((tree_code) code))
  return false;
For some reason I thought the code could only reach here if code was a 
tree code, but I guess if we have an ifn and the modes aren't the same 
as the wide_vectype it would fall to this, which for an ifn this would 
fail. I am wondering whether it needs to though, the multi-step widening 
should also work for ifn's no? We'd need to adapt it, to not use c1, c2 
but hi, lo in case of ifn I guess.. and then use a different optab look 
up too?


Though I'm thinking, maybe this should be a follow-up and just not have 
that 'feature' for now. The feature being, supporting multi-step 
conversion for new widening IFN's.


Re: [libstdc++] use strtold for from_chars even without locale

2023-05-04 Thread Alexandre Oliva via Gcc-patches
On May  4, 2023, Jonathan Wakely  wrote:

> And we could use strtod for a target that doesn't support locales *at all*
> (so strtod always behaves as specified for LANG=C).

Oh, sorry, I misread the *_USELOCALE macro as *_USE_LOCALE, and I
thought this was what I was doing.  Nevermind, patch withdrawn.

I guess I should look into how to xfail or skip the tests involving
full-precision long doubles on targets that are limited to doubles with
lower precision to convert chars to long doubles.  It's a pity to xfail
the whole tests over an expected issue.

Thanks,

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: [libstdc++] use strtold for from_chars even without locale

2023-05-04 Thread Jonathan Wakely via Gcc-patches
On Thu, 4 May 2023 at 13:06, Alexandre Oliva via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

>
> When we're using fast_float for 32- and 64-bit floating point, use
> strtold for wider long double, even if locales are unavailable.
>
> On vxworks, test for strtof's and strtold's declarations, so that they
> can be used even when cross compiling.
>
> Include stdlib.h in the decl-checking macro, so that it can find them.
>
> Regstrapped on x86_64-linux-gnu.  Also tested on aarch64-vx7r2 with
> gcc-12, where uselocale is not available, and using strtold rather than
> fast_math's double fallback avoids a couple of from_chars-related
> testsuite fails (from_chars/4.cc and to_chars/long_double.cc).  Ok to
> install?
>

The reason we don't use strtod (or strtold) without uselocale is that it is
locale-dependent, and so doesn't have the correct semantics for from_chars.

Using fast_float's binary64 implementation for 80-bit or 128-bit long
double might give inaccurate results, but using the global locale can give
completely incorrect results. For example, if the global locale is set to
"fr_FR" then from_chars would parse "1.23" as 1.0L and would parse "1,23"
as 1.23L, both of which are wrong.

We could use strtod for a single-threaded target (i.e.
!defined(_GLIBCXX_HAS_GTHREADS) by changing the global locale using
setlocale, instead of changing the per-thread locale using uselocale.

And we could use strtod for a target that doesn't support locales *at all*
(so strtod always behaves as specified for LANG=C).

But unless I'm missing something, your change applies to multi-threaded
targets that support locales. I think it needs to be more specific, maybe
via some "really use strtod for from_chars, I know it's wrong in the
general case" target-specific macro that you could define for vxworks.

The attached (lightly-tested) patch uses RAII to set/restore the locale and
the FE rounding mode, and extends the use of strtod to single-threaded
targets. That removes some of the repetition in the preprocessor
conditions, which should make it simpler to extend with a "really use
strtod" macro if we want to do that.

Patrick, could you please review Alex's patch and my one attached here, in
case we've missed anything else w.r.t from_chars, thanks.
commit b9733838ba64a748745b9aac640a35417a36dc0e
Author: Jonathan Wakely 
Date:   Thu May 4 15:22:07 2023

libstdc++: Use RAII types in strtod-based std::from_chars implementation

This adds auto_locale and auto_ferounding types to use RAII for changing
and restoring the local and floating-point environment when using strtod
to implement std::from_chars.

The destructors for the RAII objects run slightly later than the
previous statements that restored the locale/fenv, but not the
difference is not significant.

After this change it would be safe to define USE_STRTOD_FOR_FROM_CHARS
for single-threaded targets, where it's OK to change the global locale
while we use strtod.  This would be an ABI change for affected targets,
but it's possible that targets with no thread support don't care about
that anyway.  It would also mean that AIX would use a different
std::from_chars implementation depending whether -pthread was used or
not, since it has separate multilibs for single-threaded and
multi-threaded.  That seems less desirable.

libstdc++-v3/ChangeLog:

* src/c++17/floating_from_chars.cc [USE_STRTOD_FOR_FROM_CHARS]
(auto_locale, auto_ferounding): New class types.
(from_chars_impl): Use auto_locale and auto_ferounding.

diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc 
b/libstdc++-v3/src/c++17/floating_from_chars.cc
index 78b9d92cdc0..234cacd872c 100644
--- a/libstdc++-v3/src/c++17/floating_from_chars.cc
+++ b/libstdc++-v3/src/c++17/floating_from_chars.cc
@@ -50,7 +50,7 @@
 # include 
 #endif
 
-#if _GLIBCXX_HAVE_USELOCALE
+#if _GLIBCXX_HAVE_USELOCALE // || !defined _GLIBCXX_HAS_GTHREADS
 // FIXME: This should be reimplemented so it doesn't use strtod and newlocale.
 // That will avoid the need for any memory allocation, meaning that the
 // non-conforming errc::not_enough_memory result cannot happen.
@@ -597,6 +597,87 @@ namespace
 return buf.c_str();
   }
 
+  // RAII type to change and restore the locale.
+  struct auto_locale
+  {
+#if _GLIBCXX_HAVE_USELOCALE
+// When we have uselocale we can change the current thread's locale.
+locale_t loc;
+locale_t orig;
+
+auto_locale()
+: loc(::newlocale(LC_ALL_MASK, "C", (locale_t)0))
+{
+  if (loc)
+   orig = ::uselocale(loc);
+  else
+   ec = errc{errno};
+}
+
+~auto_locale()
+{
+  if (loc)
+   {
+ ::uselocale(orig);
+ ::freelocale(loc);
+   }
+}
+#elif !defined _GLIBCXX_HAS_GTHREADS
+// For a single-threaded target it's safe to change the global locale.
+string orig;
+
+auto_locale()
+{
+  const char* curloc 

[PATCH][2/2][committed] aarch64: Reimplement (R){ADD, SUB}HN2 patterns with standard RTL codes

2023-05-04 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

Similar to the previous patch, this one converts the high-half versions of the 
patterns.
With this patch we can remove the UNSPEC_* codes involved entirely.

Bootstrapped and tested on aarch64-none-linux-gnu. Also tested on 
aarch64_be-none-elf.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md 
(aarch64_hn2_insn_le):
Rename and reimplement with RTL codes to...
(aarch64_hn2_insn_le): .. This.
(aarch64_rhn2_insn_le): New pattern.
(aarch64_hn2_insn_be): Rename and reimplement with 
RTL
codes to...
(aarch64_hn2_insn_be): ... This.
(aarch64_rhn2_insn_be): New pattern.
(aarch64_hn2): Rename and adjust expander to...
(aarch64_hn2): ... This.
(aarch64_rhn2): New expander.
* config/aarch64/iterators.md (UNSPEC_ADDHN, UNSPEC_RADDHN,
UNSPEC_SUBHN, UNSPEC_RSUBHN): Delete unspecs.
(ADDSUBHN): Delete.
(sur): Remove handling of the above.
(addsub): Likewise.


addhn2.patch
Description: addhn2.patch


[PATCH][1/2][committed] aarch64: Reimplement (R){ADD,SUB}HN intrinsics with RTL codes

2023-05-04 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

We can implement the halving-narrowing add/sub patterns with standard RTL codes 
as well rather than relying on unspecs.
This patch handles the low-part ones and the second patch does the high-part 
ones and removes the unspecs themselves.
The operation ADDHN on V4SI, for example, is represented as (truncate:V4HI 
((src1:V4SI + src2:V4SI) >> 16))
and RADDHN as (truncate:V4HI ((src1:V4SI + src2:V4SI + (1 << 15)) >> 16)).
Taking this opportunity I specified the patterns returning the narrow mode and 
annotated them with the
 define_subst rules to get the vec_concat-zero meta-patterns too. 
This allows us to simplify
the expanders somewhat too. Tests are added to check that the combinations work.

Bootstrapped and tested on aarch64-none-linux-gnu. Also tested on 
aarch64_be-none-elf.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md 
(aarch64_hn_insn_le):
Delete.
(aarch64_hn_insn): New define_insn.
(aarch64_hn_insn_be): Delete.
(aarch64_rhn_insn): New define_insn.
(aarch64_hn): Delete.
(aarch64_hn): New define_expand.
(aarch64_rhn): Likewise.
* config/aarch64/predicates.md (aarch64_simd_raddsubhn_imm_vec):
New predicate.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/simd/pr99195_4.c: New test.


addhn.patch
Description: addhn.patch


[PATCH][RFC] tree-optimization/104475 - bogus -Wstringop-overflow

2023-05-04 Thread Richard Biener via Gcc-patches


I've previously sent 
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608077.html
adding ADDR_EXPR_NONZERO and there were comments from Jason
where I just realized I ignored ARRAY_REF for the following.
Anyway, here's a more aggressive variant not going for an extra
flag set by the frontend but instead have the middle-end treat
all &*.component as non-NULL (all handled_component_p).

This passes bootstrap for all languages, testing there isn't
complete but it already shows for example
gcc.c-torture/execute/pr44555.c explicitely testing that
we keep >z NULL when p is NULL and z is at offset zero.

There's also execute FAILs for gfortran.dg/class_optional_2.f90
and some optimization dump scan fails I did not yet investigate.

Nevertheless I'd like to hear opinions on whether a middle-end
implementation without frontend help is the way to go and
what the reasonable restrictions should be there?  Is
gcc.c-torture/execute/pr44555.c sanctioned by the C standard?
If so I think we have a lost cause without some help from
the frontend?

Thanks,
Richard.


--

The following avoids a bogus -Wstringop-overflow diagnostic by
properly recognizing that >m_mutex cannot be nullptr
even if m_mutex is at offset zero.  The C++ frontend already diagnoses
a >m_mutex != nullptr comparison and the following transfers
this knowledge to the middle-end in the most general way.

To avoid the bogus diagnostic this avoids separating the nullptr
path via jump-threading by eliminating the nullptr check.

PR tree-optimization/104475
* fold-const.cc (tree_single_nonzero_warnv_p): An ADDR_EXPR
of a component reference can never be null.

* g++.dg/opt/pr104475.C: New testcase.
---
 gcc/fold-const.cc   | 11 ++-
 gcc/testsuite/g++.dg/opt/pr104475.C | 12 
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/opt/pr104475.C

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index db54bfc5662..c5c923e059d 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -15368,7 +15368,16 @@ tree_single_nonzero_warnv_p (tree t, bool 
*strict_overflow_p)
tree base = TREE_OPERAND (t, 0);
 
if (!DECL_P (base))
- base = get_base_address (base);
+ {
+   gcc_checking_assert (TREE_CODE (base) != WITH_SIZE_EXPR);
+   /* Any component reference, even if at offset zero, requires
+  a non-null base.  */
+   if (handled_component_p (base)
+   && !targetm.addr_space.zero_address_valid
+ (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (t)
+ return true;
+   base = get_base_address (base);
+ }
 
if (base && TREE_CODE (base) == TARGET_EXPR)
  base = TARGET_EXPR_SLOT (base);
diff --git a/gcc/testsuite/g++.dg/opt/pr104475.C 
b/gcc/testsuite/g++.dg/opt/pr104475.C
new file mode 100644
index 000..013c70302c6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr104475.C
@@ -0,0 +1,12 @@
+// { dg-do compile }
+// { dg-require-effective-target c++11 }
+// { dg-options "-O -Waddress -fdump-tree-original" }
+
+struct X { int i; };
+
+bool foo (struct X *p)
+{
+  return >i != nullptr; /* { dg-warning "never be NULL" } */
+}
+
+/* { dg-final { scan-tree-dump "return  = 1;" "original" } } */
-- 
2.35.3


Re: [PATCH 1/2] c++: potentiality of templated memfn call [PR109480]

2023-05-04 Thread Jason Merrill via Gcc-patches

On 5/3/23 16:50, Patrick Palka wrote:

On Wed, 3 May 2023, Jason Merrill wrote:


On 5/2/23 15:53, Patrick Palka wrote:

on Tue, 2 May 2023, Patrick Palka wrote:


On Tue, 2 May 2023, Jason Merrill wrote:


On 5/1/23 15:59, Patrick Palka wrote:

Here we're incorrectly deeming the templated call a.g() inside b's
initializer as potentially constant, despite g being non-constexpr,
which leads to us wastefully instantiating the initializer ahead of
time
and triggering a bug in access checking deferral (which will get fixed
in the subsequent patch).

This patch fixes this by calling get_fns earlier during potentiality
checking so that we also handle the templated form of a member
function
call (whose overall callee is a COMPONENT_REF) when checking if the
called
function is constexpr etc.

PR c++/109480

gcc/cp/ChangeLog:

* constexpr.cc (potential_constant_expression_1) :
Reorganize to call get_fns sooner.  Remove dead store to
'fun'.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept59.C: Make e() constexpr so that the
expected "without object" diagnostic isn't replaced by a
"call to non-constexpr function" diagnostic.
* g++.dg/template/non-dependent25.C: New test.
---
gcc/cp/constexpr.cc | 16

gcc/testsuite/g++.dg/cpp0x/noexcept59.C |  2 +-
gcc/testsuite/g++.dg/template/non-dependent25.C | 14 ++
3 files changed, 23 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/template/non-dependent25.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index d1097764b10..29d872d0a5e 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -9132,6 +9132,10 @@ potential_constant_expression_1 (tree t, bool
want_rval, bool strict, bool now,
if (fun && is_overloaded_fn (fun))
  {
+   if (!RECUR (fun, true))
+ return false;
+   fun = get_fns (fun);
+
if (TREE_CODE (fun) == FUNCTION_DECL)
  {
if (builtin_valid_in_constant_expr_p (fun))
@@ -9167,7 +9171,8 @@ potential_constant_expression_1 (tree t, bool
want_rval, bool strict, bool now,
   expression the address will be folded away, so look
   through it now.  */
if (DECL_NONSTATIC_MEMBER_FUNCTION_P (fun)
-   && !DECL_CONSTRUCTOR_P (fun))
+   && !DECL_CONSTRUCTOR_P (fun)
+   && !processing_template_decl)


I don't see any rationale for this hunk?


Now that we call get_fns earlier, we can reach this code path with a
templated non-static memfn call, but the code that follows assumes
non-templated form.

I tried teaching it to handle the templated form too, but there's
apparently two different templated forms for non-static memfn calls,
one with a COMPONENT_REF callee and one with an ordinary BASELINK
callee (without a implicit object argument).  In the former the implict
object argument is inside the COMPONENT_REF (and is a reference instead
of a pointer), and in the latter we don't even have an implicit object
argument to inspect.

FWIW I think which form we use depends on whether we know if the called
function is a member of the current instantiation, e.g

struct A { void f(); };

template struct B;

template
struct C : B {
  void g();

  void h() {
A::f(); // templated form has BASELINK callee, no object arg
C::g(); // templated form has COMPONENT_REF callee
  }
};

So it seemed best to punt on templated non-static memfn calls here for
now and treat that as a separate enhancement.


And I'm not even sure if the code path in question is necessary at all
anymore: disabling it outright doesn't cause any regressions in the
testsuite.
It seems effectively equivalent to the body of the loop over the args a few
lines later:


If removing that hunk doesn't regress anything, let's do it.  Probably that
should have happened in r13-55-ge9d2adc17d0dbe


Sounds good, here's the combined patch which I'm bootstrapping for good
measure.  Does it look OK for trunk if bootstrap+regtest succeeds?


OK.


-- >8 --

Subject: [PATCH] c++: potentiality of templated memfn call [PR109480]

Here we're incorrectly deeming the templated call a.g() inside b's
initializer as potentially constant, despite g being non-constexpr,
which leads to us wastefully instantiating the initializer ahead of time,
which incidentally tiggers a bug in access checking deferral (to be
fixed by the subsequent patch).

This patch fixes this by calling get_fns earlier during CALL_EXPR
potentiality checking so that we're able to extract a FUNCTION_DECL out
of a templated member function call (whose overall is typically a
COMPONENT_REF) and to the usual checking if the called function is
constexpr etc.

In passing, I noticed potential_constant_expression_1's special handling
of the object argument of a non-static member function call is 

Re: Question on patch -fprofile-partial-training

2023-05-04 Thread Qing Zhao via Gcc-patches


> On May 4, 2023, at 9:05 AM, Martin Liška  wrote:
> 
> On 5/4/23 14:54, Qing Zhao wrote:
>> 
>> 
>>> On May 4, 2023, at 4:30 AM, Martin Liška  wrote:
>>> 
>>> On 5/3/23 21:10, Qing Zhao via Gcc-patches wrote:
 Hi, Jan,
 
 You added the following patch into gcc10:
 
 From 34fbe3f0946f88828765184ed6581bda62cdf49f Mon Sep 17 00:00:00 2001
 From: Jan Hubicka 
 Date: Thu, 5 Dec 2019 19:12:51 +0100
 Subject: [PATCH] cgraphclones.c (localize_profile): New function.
 
   * cgraphclones.c (localize_profile): New function.
   (cgraph_node::create_clone): Use it for partial profiles.
   * common.opt (fprofile-partial-training): New flag.
   * doc/invoke.texi (-fprofile-partial-training): Document.
   * ipa-cp.c (update_profiling_info): For partial profiles do not
   set function profile to zero.
   * profile.c (compute_branch_probabilities): With partial profile
   watch if edge count is zero and turn all probabilities to guessed.
   (compute_branch_probabilities): For partial profiles do not apply
   profile when entry count is zero.
   * tree-profile.c (tree_profiling): Only do 
 value_profile_transformations
   when profile is read.
 
 My question is:
>>> 
>>> Hello.
>>> 
>>> Why would anybody backport such change to unsupported code-stream of GCC 8?
>>> Generally speaking, I discourage from doing that.
>> 
>> Yes, I agree.
>> However, many users still use GCC8 right now, and some of them are asking 
>> for more performance
>> from PGO recently. That’s the reason I am studying this right now. 
> 
> I understand there are products that are based on GCC8, but as the branch is 
> officially unsupported, I don't
> see a reason to backport a new feature from newer release. It's just asking 
> for troubles. If your clients are
> interested in more performance, then they should use a recent supported 
> release.
We are trying to persuade them to use newer GCC, but it’s quite hard...
> 
>> 
>> From my understanding, -fprofile-partial-training is one important option 
>> for PGO performance.
> 
> I don't think so, speed benefit would be rather small I guess.
I saw some articles online to introduce this option for gcc10, 
https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html#sec-gcc10-pgo
And also based on my previous experience in Studio compiler, I guess that this 
one might have
Some good performance impact on PGO.  Is there any old performance data on this 
option? (I cannot find online)

thanks.

Qing

> 
>> I’d like
>> to see any big technique difficult to prevent it from being back ported to 
>> GCC8. 
> 
> There might be of course some patch dependencies and I don't see a point why 
> should we waste
> time with that.
> 
> Cheers,
> Martin
> 
>> 
>> Thanks.
>> 
>> Qing
>> 
>>> 
>>> Martin
>>> 
 
 Can this patch be back ported to GCC8 easily? I am wondering any 
 significant
 Change between GCC8 and GCC10 that might make the backporting very hard> 
 Thanks a lot for your help.
 
 Qing



[PATCH V4] VECT: Add decrement IV iteration loop control by variable amount support

2023-05-04 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch is fixing V3 patch:
https://patchwork.sourceware.org/project/gcc/patch/20230407014741.139387-1-juzhe.zh...@rivai.ai/

Fix issues according to Richard Sandiford && Richard Biener.

1. Rename WHILE_LEN pattern into SELECT_VL according to Richard Sandiford.
2. Support multiple-rgroup for non-SLP auto-vectorization.

   For vec_pack_trunc pattern (multi-rgroup of non-SLP), we generate the total 
length:

 _36 = MIN_EXPR ;

 First length (MIN (X, VF/N)):
   loop_len_15 = MIN_EXPR <_36, POLY_INT_CST [2, 2]>;

 Second length (X - MIN (X, 1 * VF/N)):
   loop_len_16 = _36 - loop_len_15;

 Third length (X - MIN (X, 2 * VF/N)):
   _38 = MIN_EXPR <_36, POLY_INT_CST [4, 4]>;
   loop_len_17 = _36 - _38;

 Forth length (X - MIN (X, 3 * VF/N)):
   _39 = MIN_EXPR <_36, POLY_INT_CST [6, 6]>;
   loop_len_18 = _36 - _39;

The reason that I use MIN_EXPR instead of SELECT_VL to calculate total length 
since using SELECT_VL
to adapt induction IV consumes more instructions than just using MIN_EXPR. 
Also, during testing,
I found it's hard to adjust length correctly according to SELECT_VL.

So, this patch we only use SELECT_VL for single-rgroup with single length 
control.

3. Fix document of select_vl for Richard Biener (remove mode N).
4. Fix comments of vect_set_loop_controls_by_select_vl according to Richard 
Biener.
5. Keep loop_vinfo as first parameter for "vect_get_loop_len".
6. make requirement of get_while_len_data_ref_ptr outside, let it to be gated 
at the caller site.

More comments from Richard Biener:
>> So it's not actually saturating.  The saturating operation is done by 
>> .WHILE_LEN?
I define the outcome of SELECT_VL (n, vf)  (WHILE_LEN) = IN_RANGE (0, min (n, 
vf)) will make 
the loop control counter never underflow zero.

>> I see.  I wonder if it makes sense to leave .WHILE_LEN aside for a start,
>> the above scheme should also work for single rgroups, no?
>> As said, it _looks_ like you can progress without .WHILE_LEN and using
>> .WHILE_LEN is a pure optimization?
Yes, SELECT_VL (WHILE_LEN) is pure optimization for single-rgroup and allow
target adjust any length = INRANGE (0, min (n, vf)) each iteration.

Let me known if I missed something for the V3 patch.
Thanks.

---
 gcc/cfgloopmanip.cc|   2 +-
 gcc/doc/md.texi|  34 +++
 gcc/gimple-loop-interchange.cc |   2 +-
 gcc/internal-fn.def|   1 +
 gcc/optabs.def |   1 +
 gcc/tree-ssa-loop-ivcanon.cc   |   2 +-
 gcc/tree-ssa-loop-ivopts.cc|   2 +-
 gcc/tree-ssa-loop-manip.cc |  18 +-
 gcc/tree-ssa-loop-manip.h  |   4 +-
 gcc/tree-vect-data-refs.cc |   8 +-
 gcc/tree-vect-loop-manip.cc| 374 -
 gcc/tree-vect-loop.cc  |  32 ++-
 gcc/tree-vect-stmts.cc |  89 +++-
 gcc/tree-vectorizer.h  |   4 +-
 14 files changed, 535 insertions(+), 38 deletions(-)

diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc
index 0e3ad8ed742..6e09dcbb0b1 100644
--- a/gcc/cfgloopmanip.cc
+++ b/gcc/cfgloopmanip.cc
@@ -826,7 +826,7 @@ create_empty_loop_on_edge (edge entry_edge,
 }
 
   gsi = gsi_last_bb (loop_header);
-  create_iv (initial_value, stride, iv, loop, , false,
+  create_iv (initial_value, PLUS_EXPR, stride, iv, loop, , false,
 iv_before, iv_after);
 
   /* Insert loop exit condition.  */
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index cc4a93a8763..99cf0cdbdca 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4974,6 +4974,40 @@ for (i = 1; i < operand3; i++)
   operand0[i] = operand0[i - 1] && (operand1 + i < operand2);
 @end smallexample
 
+@cindex @code{select_vl@var{m}} instruction pattern
+@item @code{select_vl@var{m}}
+Set operand 0 to the number of active elements in vector will be updated value.
+operand 1 is the total elements need to be updated value.
+operand 2 is the vectorization factor.
+The value of operand 0 is target dependent and flexible in each iteration.
+The operation of this pattern can be:
+
+@smallexample
+Case 1:
+operand0 = MIN (operand1, operand2);
+operand2 can be const_poly_int or poly_int related to vector mode size.
+Some target like RISC-V has a standalone instruction to get MIN (n, MODE SIZE) 
so
+that we can reduce a use of general purpose register.
+
+In this case, only the last iteration of the loop is partial iteration.
+@end smallexample
+
+@smallexample
+Case 2:
+if (operand1 <= operand2)
+  operand0 = operand1;
+else if (operand1 < 2 * operand2)
+  operand0 = IN_RANGE (ceil (operand1 / 2), operand2);
+else
+  operand0 = operand2;
+
+This case will evenly distribute work over the last 2 iterations of a 
stripmine loop.
+@end smallexample
+
+The output of this pattern is not only used as IV of loop control counter, but 
also
+is used as the IV of address calculation with multiply/shift operation. This 
allow
+us dynamic adjust the number of elements is processed in each iteration of the 
loop.
+
 @cindex 

Re: [RFC, patch] Linker plugin - extend API for offloading corner case (aka: LDPT_REGISTER_CLAIM_FILE_HOOK_V2 linker plugin hook [GCC PR109128])

2023-05-04 Thread Tobias Burnus

On 04.05.23 13:02, Richard Biener wrote:

So since we expect the linker to use the host side table is there a way
for the plugin to exactly query that


Background - feel free to skip to the next quote / reply bit.

The following is what we have for the host side:

We have (→ libgcc/offloadstuff.c)
#define OFFLOAD_FUNC_TABLE_SECTION_NAME ".gnu.offload_funcs"

* crtoffloadbegin.a with:
const void *const __offload_func_table[0]
  __attribute__ ((__used__, visibility ("hidden"),
  section (OFFLOAD_FUNC_TABLE_SECTION_NAME))) = { };

* crtoffloadend.a with:
const void *const __offload_funcs_end[0]
  __attribute__ ((__used__, visibility ("hidden"),
  section (OFFLOAD_FUNC_TABLE_SECTION_NAME))) = { };

* crtoffloadtable.a with:
const void *const __OFFLOAD_TABLE__[]
  __attribute__ ((__visibility__ ("hidden"))) =
{
  &__offload_func_table, &__offload_funcs_end,


Each TU generates an a static array with constructor in that
section – and the values for the constructor are the function
(or variable) addresses, i.e. omp_finish_file has:

  tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
get_identifier (".offload_func_table"),
funcs_decl_type);
  set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);


(the set of symbols the linker
uses from the object passed to the plugin)?  Because if the linker
uses something from the file but _not_ the host side offload table
(-ffunction-sections -fdata-sections) then things would still go
wrong, right?


Shouldn't this only affect where the functions/variables themselves are
placed to - and not the section in which the two offload-funs/-vars arrays
are placed to, given that it was explicitly set?

At least that's how I understand the GCC documentation and after glancing
at the varasm.c code.

That matches also what I see when using those flags. There are differences
related to, e.g. .text.s1.0._omp_fn.0 but .offload_var_table and
.offload_func_table still look fine.


(Side remark, I am wondering whether we should use "retain" for everything
that goes into the special sections, i.e. whether the following should be added:

+#ifndef ACCEL_COMPILER
+  if (SUPPORTS_SHF_GNU_RETAIN)
+   {
+ DECL_ATTRIBUTES (funcs_decl) = tree_cons (get_identifier ("retain"),
+   NULL_TREE, NULL_TREE);

to omp-offload.c (+ "retain" as __attribute__ in libgcc/offloadstuff.c)?


Is there a way to connect both in a way that the linker discards
either if the other isn't present?


I think as soon as the file is used, they are present, at least with 'retain',
even though they might be size-zero arrays.

My attempts to check with get_symbols{_v2,_v3} failed. If I recall correctly,
the way everything it setup for the hash, which includes also the file name,
makes it hard to query something which has not been added by get_symbols.
Even if we added a new interface, implementing it in a generic way and being
compatible with the ld.bfd way of storing symbols as hash might be a bit 
complex.

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: Question on patch -fprofile-partial-training

2023-05-04 Thread Martin Liška
On 5/4/23 14:54, Qing Zhao wrote:
> 
> 
>> On May 4, 2023, at 4:30 AM, Martin Liška  wrote:
>>
>> On 5/3/23 21:10, Qing Zhao via Gcc-patches wrote:
>>> Hi, Jan,
>>>
>>> You added the following patch into gcc10:
>>>
>>> From 34fbe3f0946f88828765184ed6581bda62cdf49f Mon Sep 17 00:00:00 2001
>>> From: Jan Hubicka 
>>> Date: Thu, 5 Dec 2019 19:12:51 +0100
>>> Subject: [PATCH] cgraphclones.c (localize_profile): New function.
>>>
>>>* cgraphclones.c (localize_profile): New function.
>>>(cgraph_node::create_clone): Use it for partial profiles.
>>>* common.opt (fprofile-partial-training): New flag.
>>>* doc/invoke.texi (-fprofile-partial-training): Document.
>>>* ipa-cp.c (update_profiling_info): For partial profiles do not
>>>set function profile to zero.
>>>* profile.c (compute_branch_probabilities): With partial profile
>>>watch if edge count is zero and turn all probabilities to guessed.
>>>(compute_branch_probabilities): For partial profiles do not apply
>>>profile when entry count is zero.
>>>* tree-profile.c (tree_profiling): Only do 
>>> value_profile_transformations
>>>when profile is read.
>>>
>>> My question is:
>>
>> Hello.
>>
>> Why would anybody backport such change to unsupported code-stream of GCC 8?
>> Generally speaking, I discourage from doing that.
> 
> Yes, I agree.
> However, many users still use GCC8 right now, and some of them are asking for 
> more performance
> from PGO recently. That’s the reason I am studying this right now. 

I understand there are products that are based on GCC8, but as the branch is 
officially unsupported, I don't
see a reason to backport a new feature from newer release. It's just asking for 
troubles. If your clients are
interested in more performance, then they should use a recent supported release.

> 
> From my understanding, -fprofile-partial-training is one important option for 
> PGO performance.

I don't think so, speed benefit would be rather small I guess.

> I’d like
> to see any big technique difficult to prevent it from being back ported to 
> GCC8. 

There might be of course some patch dependencies and I don't see a point why 
should we waste
time with that.

Cheers,
Martin

> 
> Thanks.
> 
> Qing
> 
>>
>> Martin
>>
>>>
>>> Can this patch be back ported to GCC8 easily? I am wondering any significant
>>> Change between GCC8 and GCC10 that might make the backporting very hard> 
>>> Thanks a lot for your help.
>>>
>>> Qing
> 



Re: Question on patch -fprofile-partial-training

2023-05-04 Thread Qing Zhao via Gcc-patches


> On May 4, 2023, at 4:30 AM, Martin Liška  wrote:
> 
> On 5/3/23 21:10, Qing Zhao via Gcc-patches wrote:
>> Hi, Jan,
>> 
>> You added the following patch into gcc10:
>> 
>> From 34fbe3f0946f88828765184ed6581bda62cdf49f Mon Sep 17 00:00:00 2001
>> From: Jan Hubicka 
>> Date: Thu, 5 Dec 2019 19:12:51 +0100
>> Subject: [PATCH] cgraphclones.c (localize_profile): New function.
>> 
>>* cgraphclones.c (localize_profile): New function.
>>(cgraph_node::create_clone): Use it for partial profiles.
>>* common.opt (fprofile-partial-training): New flag.
>>* doc/invoke.texi (-fprofile-partial-training): Document.
>>* ipa-cp.c (update_profiling_info): For partial profiles do not
>>set function profile to zero.
>>* profile.c (compute_branch_probabilities): With partial profile
>>watch if edge count is zero and turn all probabilities to guessed.
>>(compute_branch_probabilities): For partial profiles do not apply
>>profile when entry count is zero.
>>* tree-profile.c (tree_profiling): Only do 
>> value_profile_transformations
>>when profile is read.
>> 
>> My question is:
> 
> Hello.
> 
> Why would anybody backport such change to unsupported code-stream of GCC 8?
> Generally speaking, I discourage from doing that.

Yes, I agree.
However, many users still use GCC8 right now, and some of them are asking for 
more performance
from PGO recently. That’s the reason I am studying this right now. 

From my understanding, -fprofile-partial-training is one important option for 
PGO performance. I’d like
to see any big technique difficult to prevent it from being back ported to 
GCC8. 

Thanks.

Qing

> 
> Martin
> 
>> 
>> Can this patch be back ported to GCC8 easily? I am wondering any significant
>> Change between GCC8 and GCC10 that might make the backporting very hard> 
>> Thanks a lot for your help.
>> 
>> Qing



[vxworks] [testsuite] [aarch64] use builtin in pred-not-gen-4.c

2023-05-04 Thread Alexandre Oliva via Gcc-patches


On vxworks, isunordered is defined as a macro that ultimately calls a
_Fpcomp function, that GCC doesn't recognize as a builtin, so it
can't optimize accordingly.

Use __builtin_isunordered instead to get the desired code for the
test.

Regstrapped on x86_64-linux-gnu.  Also tested on aarch64-vx7r2 with
gcc-12.  Ok to install?


for  gcc/testsuite/ChangeLog

* gcc.target/aarch64/pred-not-gen-4.c: Drop math.h include,
call builtin.
---
 .../gcc.target/aarch64/sve/pred-not-gen-4.c|4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
index 0001dd3fc211f..1845bd3f0f704 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
@@ -1,12 +1,10 @@
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
-#include 
-
 void f13(double * restrict z, double * restrict w, double * restrict x, double 
* restrict y, int n)
 {
 for (int i = 0; i < n; i++) {
-z[i] = (isunordered(w[i], 0)) ? x[i] + w[i] : y[i] - w[i];
+z[i] = (__builtin_isunordered(w[i], 0)) ? x[i] + w[i] : y[i] - w[i];
 }
 }
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[libstdc++] use strtold for from_chars even without locale

2023-05-04 Thread Alexandre Oliva via Gcc-patches


When we're using fast_float for 32- and 64-bit floating point, use
strtold for wider long double, even if locales are unavailable.

On vxworks, test for strtof's and strtold's declarations, so that they
can be used even when cross compiling.

Include stdlib.h in the decl-checking macro, so that it can find them.

Regstrapped on x86_64-linux-gnu.  Also tested on aarch64-vx7r2 with
gcc-12, where uselocale is not available, and using strtold rather than
fast_math's double fallback avoids a couple of from_chars-related
testsuite fails (from_chars/4.cc and to_chars/long_double.cc).  Ok to
install?


for  libstdc++-v3/ChangeLog

* src/c++17/floating_from_chars.cc
(USE_STRTOD_FOR_FROM_CHARS): Define when using fast_float if
long double is not as wide as double and strtold is not
broken.
* crossconfig.m4: Test for strtof and strtold declarations on
vxworks.
(GLIBCXX_CHECK_MATH_DECL): Include stdlib.h too.
* configure: Rebuilt.
---
 libstdc++-v3/configure|  131 +
 libstdc++-v3/crossconfig.m4   |3 -
 libstdc++-v3/src/c++17/floating_from_chars.cc |   10 ++
 3 files changed, 143 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
[omitted]
diff --git a/libstdc++-v3/crossconfig.m4 b/libstdc++-v3/crossconfig.m4
index b3269cb88e077..9db32f4d422da 100644
--- a/libstdc++-v3/crossconfig.m4
+++ b/libstdc++-v3/crossconfig.m4
@@ -293,7 +293,7 @@ dnl # switch to more elaborate tests.
 GLIBCXX_CHECK_MATH_DECLS([
   acosl asinl atan2l atanl ceill cosl coshl expl fabsl floorl fmodl
   frexpl ldexpl log10l logl modfl powl sinl sinhl sqrtl tanl tanhl hypotl
-  ldexpf modff hypotf frexpf])
+  ldexpf modff hypotf frexpf strtof strtold])
 dnl # sincosl is the only one missing here, compared with the *l
 dnl # functions in the list guarded by
 dnl # long_double_math_on_this_cpu in configure.ac, right after
@@ -323,6 +323,7 @@ AC_DEFUN([GLIBCXX_CHECK_MATH_DECL], [
   AC_LANG_SAVE
   AC_LANG_C
   AC_TRY_COMPILE([
+#include 
 #include 
 #ifdef HAVE_IEEEFP_H
 # include 
diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc 
b/libstdc++-v3/src/c++17/floating_from_chars.cc
index 78b9d92cdc0fa..15af811d198c4 100644
--- a/libstdc++-v3/src/c++17/floating_from_chars.cc
+++ b/libstdc++-v3/src/c++17/floating_from_chars.cc
@@ -80,6 +80,10 @@ extern "C" _Float128 __strtof128(const char*, char**)
 # if __LDBL_MANT_DIG__ == __DBL_MANT_DIG__
 // No need to use strtold.
 #  undef USE_STRTOD_FOR_FROM_CHARS
+# elif !defined USE_STRTOD_FOR_FROM_CHARS \
+   && defined _GLIBCXX_HAVE_STRTOLD && !defined 
_GLIBCXX_HAVE_BROKEN_STRTOLD
+// A working strtold will be more compliant than fast_float's double.
+#  define USE_STRTOD_FOR_FROM_CHARS 1
 # endif
 #endif
 
@@ -607,9 +611,11 @@ namespace
   ptrdiff_t
   from_chars_impl(const char* str, T& value, errc& ec) noexcept
   {
+#if _GLIBCXX_HAVE_USELOCALE
 if (locale_t loc = ::newlocale(LC_ALL_MASK, "C", (locale_t)0)) [[likely]]
   {
locale_t orig = ::uselocale(loc);
+#endif
 
 #if _GLIBCXX_USE_C99_FENV_TR1 && defined(FE_TONEAREST)
const int rounding = std::fegetround();
@@ -652,8 +658,10 @@ namespace
  std::fesetround(rounding);
 #endif
 
+#if _GLIBCXX_HAVE_USELOCALE
::uselocale(orig);
::freelocale(loc);
+#endif
 
const ptrdiff_t n = endptr - str;
if (conv_errno == ERANGE) [[unlikely]]
@@ -674,9 +682,11 @@ namespace
ec = errc();
  }
return n;
+#if _GLIBCXX_HAVE_USELOCALE
   }
 else if (errno == ENOMEM)
   ec = errc::not_enough_memory;
+#endif
 
 return 0;
   }


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[committed] libstdc++: Document new library version in manual

2023-05-04 Thread Jonathan Wakely via Gcc-patches
Pushed to trunk and gcc-13.

-- >8 --

libstdc++-v3/ChangeLog:

* doc/xml/manual/abi.xml (abi.versioning.history): Document
libstdc++.so.6.0.32 and GLIBCXX_3.4.32 version.
* doc/html/manual/abi.html: Regenerate.
---
 libstdc++-v3/doc/html/manual/abi.html | 6 +++---
 libstdc++-v3/doc/xml/manual/abi.xml   | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/doc/xml/manual/abi.xml 
b/libstdc++-v3/doc/xml/manual/abi.xml
index e0e241de3bd..4b4930bef4c 100644
--- a/libstdc++-v3/doc/xml/manual/abi.xml
+++ b/libstdc++-v3/doc/xml/manual/abi.xml
@@ -280,6 +280,7 @@ compatible.
 GCC 11.1.0: libstdc++.so.6.0.29
 GCC 12.1.0: libstdc++.so.6.0.30
 GCC 13.1.0: libstdc++.so.6.0.31
+GCC 13.2.0: libstdc++.so.6.0.32
 
 
   Note 1: Error should be libstdc++.so.3.0.3.
@@ -357,6 +358,7 @@ compatible.
 GCC 11.1.0: GLIBCXX_3.4.29, CXXABI_1.3.13
 GCC 12.1.0: GLIBCXX_3.4.30, CXXABI_1.3.13
 GCC 13.1.0: GLIBCXX_3.4.31, CXXABI_1.3.14
+GCC 13.2.0: GLIBCXX_3.4.32, CXXABI_1.3.14
 
 
 
-- 
2.40.1



Re: [aarch64] Use dup and zip1 for interleaving elements in initializing vector

2023-05-04 Thread Prathamesh Kulkarni via Gcc-patches
On Mon, 24 Apr 2023 at 15:00, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> > [aarch64] Recursively intialize even and odd sub-parts and merge with zip1.
> >
> > gcc/ChangeLog:
> >   * config/aarch64/aarch64.cc (aarch64_expand_vector_init_fallback): 
> > Rename
> >   aarch64_expand_vector_init to this, and remove  interleaving case.
> >   Recursively call aarch64_expand_vector_init_fallback, instead of
> >   aarch64_expand_vector_init.
> >   (aarch64_unzip_vector_init): New function.
> >   (aarch64_expand_vector_init): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >   * gcc.target/aarch64/ldp_stp_16.c (cons2_8_float): Adjust for new
> >   code-gen.
> >   * gcc.target/aarch64/sve/acle/general/dupq_5.c: Likewise.
> >   * gcc.target/aarch64/sve/acle/general/dupq_6.c: Likewise.
> >   * gcc.target/aarch64/vec-init-18.c: Rename interleave-init-1.c to
> >   this.
> >   * gcc.target/aarch64/vec-init-19.c: New test.
> >   * gcc.target/aarch64/vec-init-20.c: Likewise.
> >   * gcc.target/aarch64/vec-init-21.c: Likewise.
> >   * gcc.target/aarch64/vec-init-22-size.c: Likewise.
> >   * gcc.target/aarch64/vec-init-22-speed.c: Likewise.
> >   * gcc.target/aarch64/vec-init-22.h: New header.
> >
> > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> > index d7e895f8d34..416e062829c 100644
> > --- a/gcc/config/aarch64/aarch64.cc
> > +++ b/gcc/config/aarch64/aarch64.cc
> > @@ -22026,11 +22026,12 @@ aarch64_simd_make_constant (rtx vals)
> >  return NULL_RTX;
> >  }
> >
> > -/* Expand a vector initialisation sequence, such that TARGET is
> > -   initialised to contain VALS.  */
> > +/* A subroutine of aarch64_expand_vector_init, with the same interface.
> > +   The caller has already tried a divide-and-conquer approach, so do
> > +   not consider that case here.  */
> >
> >  void
> > -aarch64_expand_vector_init (rtx target, rtx vals)
> > +aarch64_expand_vector_init_fallback (rtx target, rtx vals)
> >  {
> >machine_mode mode = GET_MODE (target);
> >scalar_mode inner_mode = GET_MODE_INNER (mode);
> > @@ -22090,38 +22091,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
> >return;
> >  }
> >
> > -  /* Check for interleaving case.
> > - For eg if initializer is (int16x8_t) {x, y, x, y, x, y, x, y}.
> > - Generate following code:
> > - dup v0.h, x
> > - dup v1.h, y
> > - zip1 v0.h, v0.h, v1.h
> > - for "large enough" initializer.  */
> > -
> > -  if (n_elts >= 8)
> > -{
> > -  int i;
> > -  for (i = 2; i < n_elts; i++)
> > - if (!rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, i % 2)))
> > -   break;
> > -
> > -  if (i == n_elts)
> > - {
> > -   machine_mode mode = GET_MODE (target);
> > -   rtx dest[2];
> > -
> > -   for (int i = 0; i < 2; i++)
> > - {
> > -   rtx x = expand_vector_broadcast (mode, XVECEXP (vals, 0, i));
> > -   dest[i] = force_reg (mode, x);
> > - }
> > -
> > -   rtvec v = gen_rtvec (2, dest[0], dest[1]);
> > -   emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
> > -   return;
> > - }
> > -}
> > -
> >enum insn_code icode = optab_handler (vec_set_optab, mode);
> >gcc_assert (icode != CODE_FOR_nothing);
> >
> > @@ -22243,7 +22212,7 @@ aarch64_expand_vector_init (rtx target, rtx vals)
> >   }
> > XVECEXP (copy, 0, i) = subst;
> >   }
> > -  aarch64_expand_vector_init (target, copy);
> > +  aarch64_expand_vector_init_fallback (target, copy);
> >  }
> >
> >/* Insert the variable lanes directly.  */
> > @@ -22257,6 +6,81 @@ aarch64_expand_vector_init (rtx target, rtx vals)
> >  }
> >  }
> >
> > +/* Return even or odd half of VALS depending on EVEN_P.  */
> > +
> > +static rtx
> > +aarch64_unzip_vector_init (machine_mode mode, rtx vals, bool even_p)
> > +{
> > +  int n = XVECLEN (vals, 0);
> > +  machine_mode new_mode
> > += aarch64_simd_container_mode (GET_MODE_INNER (mode),
> > +GET_MODE_BITSIZE (mode).to_constant () / 
> > 2);
> > +  rtvec vec = rtvec_alloc (n / 2);
> > +  for (int i = 0; i < n/2; i++)
>
> Formatting nit: n / 2
>
> > +RTVEC_ELT (vec, i) = (even_p) ? XVECEXP (vals, 0, 2 * i)
> > +   : XVECEXP (vals, 0, 2 * i + 1);
> > +  return gen_rtx_PARALLEL (new_mode, vec);
> > +}
> > +
> > +/* Expand a vector initialisation sequence, such that TARGET is
>
> initialization
>
> > +   initialized to contain VALS.  */
> > +
> > +void
> > +aarch64_expand_vector_init (rtx target, rtx vals)
> > +{
> > +  /* Try decomposing the initializer into even and odd halves and
> > + then ZIP them together.  Use the resulting sequence if it is
> > + strictly cheaper than loading VALS directly.
> > +
> > + Prefer the fallback sequence in the event of a tie, since it
> > + will tend to use fewer registers.  */
> > +
> > +  

[PATCH] i386: Improve index_register_operand predicate

2023-05-04 Thread Uros Bizjak via Gcc-patches
Use the same approach as in register_no_elim_operand predicate, but also
reject stack_pointer_rtx operands.

gcc/ChangeLog:

* config/i386/predicates.md (index_register_operand): Reject
arg_pointer_rtx, frame_pointer_rtx, stack_pointer_rtx and
VIRTUAL_REGISTER_P operands.  Allow subregs of memory before reload.
(call_register_no_elim_operand): Rewrite as ...
(call_register_operand): ... this.
(call_insn_operand): Use call_register_operand predicate.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 91400d7d8fb..878b144b0fb 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -686,22 +686,6 @@ (define_predicate "constant_call_address_operand"
   return true;
 })
 
-;; P6 processors will jump to the address after the decrement when %esp
-;; is used as a call operand, so they will execute return address as a code.
-;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
-
-(define_predicate "call_register_no_elim_operand"
-  (match_operand 0 "register_operand")
-{
-  if (SUBREG_P (op))
-op = SUBREG_REG (op);
-
-  if (!TARGET_64BIT && op == stack_pointer_rtx)
-return false;
-
-  return register_no_elim_operand (op, mode);
-})
-
 ;; True for any non-virtual and non-eliminable register.  Used in places where
 ;; instantiation of such a register may cause the pattern to not be recognized.
 (define_predicate "register_no_elim_operand"
@@ -720,21 +704,34 @@ (define_predicate "register_no_elim_operand"
   || VIRTUAL_REGISTER_P (op));
 })
 
-;; Similarly, but include the stack pointer.  This is used to prevent esp
-;; from being used as an index reg.
+;; Similarly, but include the stack pointer.  This is used
+;; to prevent esp from being used as an index reg.
 (define_predicate "index_register_operand"
   (match_operand 0 "register_operand")
 {
   if (SUBREG_P (op))
 op = SUBREG_REG (op);
 
-  unsigned int regno = REGNO (op);
-  if (reload_completed)
-return REGNO_OK_FOR_INDEX_P (regno);
-  else
-return REGNO_OK_FOR_INDEX_NONSTRICT_P (regno);
+  /* Before reload, we can allow (SUBREG (MEM...)) as a register operand
+ because it is guaranteed to be reloaded into one.  */
+  if (MEM_P (op))
+return true;
+
+  return !(op == arg_pointer_rtx
+  || op == frame_pointer_rtx
+  || op == stack_pointer_rtx
+  || VIRTUAL_REGISTER_P (op));
 })
 
+;; P6 processors will jump to the address after the decrement when %esp
+;; is used as a call operand, so they will execute return address as a code.
+;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
+
+(define_predicate "call_register_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+(match_operand 0 "register_operand")
+(match_operand 0 "index_register_operand")))
+
 ;; Return false if this is any eliminable register.  Otherwise general_operand.
 (define_predicate "general_no_elim_operand"
   (if_then_else (match_code "reg,subreg")
@@ -790,7 +787,7 @@ (define_predicate "GOT_memory_operand"
 (define_special_predicate "call_insn_operand"
   (ior (match_test "constant_call_address_operand
 (op, mode == VOIDmode ? mode : Pmode)")
-   (match_operand 0 "call_register_no_elim_operand")
+   (match_operand 0 "call_register_operand")
(and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
(ior (and (not (match_test "TARGET_X32"))
  (match_operand 0 "memory_operand"))


Re: [RFC,patch] Linker plugin - extend API for offloading corner case (aka: LDPT_REGISTER_CLAIM_FILE_HOOK_V2 linker plugin hook [GCC PR109128])

2023-05-04 Thread Richard Biener via Gcc-patches
On Thu, 4 May 2023, Tobias Burnus wrote:

> Based on the the discussion with Richard B. on #gcc, some more details why the
> linker does
> (and might want) to call the plugin for files it does not need:
> 
> For LTO with no-fat binaries, a symbol in a static library might still be
> needed but the linker
> does not know as the file might only contain LTO objects. The linker then
> calls:
> 
>   bfd_link_plugin_object_p  -> ld_plugin_object_p
> which calls in case of GCC's lto-plugin:
>   claim_file_handler
> 
> * The latter file checks for LTO symbols and, if found, sets *claimed = true
> and
>   registers the found symbols via add_symbols
> ? The linker than ignores all other symbols and only uses the plugin-provided
> symbols.
> 
> * When all files have been processed, the linker calls
>   all_symbols_read_handler and the lto-plugin can ask (get_symbols) whether a
>   given
>   symbol was used.
> 
> 
> For the offload usage, the problem is:
> 
> * The code currently assumes that a file is used when 'claim_file_handler' is
> invoked
>   by the linker
> ? It cannot claim the file itself as then symbols in the file are ignored by
> the linker.
> ? Thus, if the file contains host-side LTO and is *claimed = true, it is fine.
>   Likewise, if symbols in the file cause the linker itself to use the file, it
>   is fine.
>   However, if neither the plugin claims the file nor the file is linked via
>   the linker,
>   there is a problem (? mismatch between host and device side).
> 
> Inside the all_symbols_read_handler, GCC's lto-plugin.cc could remove a file
> from the
> to-be-used for device-side lto-wrapper list, but there is no way to ask the
> linker if
> a given file is will be linked/is used or not. (Only per-symbol enquiringly
> for LTO exists.)
> 
> The additional flag now permits for the offload use to ignore the file if the
> linker has
> no use for it (and passes it only for non-fat symbols host-side LTO check to
> the linker);
> thus, we can then ignore the file for offloading purpose, unless it is used
> for LTO - but
> that's something the plugin knows itself.
> 
> 
> Note: That's really only an issue about whether a file (from a static library)
> is used
> and not whether a certain symbol is used. For the host side, the
> function-pointer table
> is constructed by section merging and the device side also forces the output
> of the table.
> But all output forcing/keep-symbol handling fails if the whole file is dropped
> on one side
> (linker for the host side) but not on the other (linker plugin for the
> non-host offload side).

So since we expect the linker to use the host side table is there a way
for the plugin to exactly query that (the set of symbols the linker
uses from the object passed to the plugin)?  Because if the linker
uses something from the file but _not_ the host side offload table
(-ffunction-sections -fdata-sections) then things would still go
wrong, right?

Is there a way to connect both in a way that the linker discards
either if the other isn't present?

> 
> I hope this clarifies the problem, background and solution a bit better.
> 
> Tobias
> 
> On 02.05.23 19:19, Tobias Burnus wrote:
> > See also https://gcc.gnu.org/PR109128 (+ description in the patch log)
> >
> > The linker plugin API was designed to handle LTO - such that the
> > compiler (i.e. GCC's lto-plugin)
> > can claim an input file if it finds LTO code. In that case, the
> > symbols inside that file are ignored
> > by 'ld'.
> >
> > However, GCC also uses the LTO for offloading: code designated for
> > running on a non-host device
> > (GPUs) is saved in a special section in LTO format. This code then
> > ends up being compiled for
> > offloading but otherwise not the file is not claimed, keeping the
> > symbols for 'ld' to process,
> > unless that file is also uses real, host-side LTO.
> >
> > This mostly works okay, but a corner case exists (see PR for an
> > example) where 'ld' calls the
> > GCC's lto-plugin but does not actually use the symbols of that file.
> > That's fine, in principle,
> > but if that file contains offloading code, there is a problem: To
> > match host and device functions,
> > a table is created on both sides, but that table obviously must match.
> > However, when lto-plugin's
> > offload code processes those while ld does not link them, it fails.
> >
> > It turned out (kudos to Joseph for debugging + writing the patches)
> > that in that case ld.bfd does
> > not actually regards that file as being used but just offers it to
> > llto-plugin in case it needs
> > symbols from it.
> >
> > To get this working, the current API is insufficient.
> >
> > Possible solutions:
> > * Tell lto-plugin whether 'ld' actually needs symbols from a file or
> > it just offers the file
> >   in case that lto-plugin wants to claim that file
> >   => That's implemented in the attached patch.
> > * Make it possible to "claim" a file without discarding the ld-visible
> > symbols
> > * Asking the linker later 

[PATCH] tree-optimization/109721 - emulated vectors

2023-05-04 Thread Richard Biener via Gcc-patches
When fixing PR109672 I noticed we let SImode AND through when
target_support_p even though it isn't word_mode and I didn't want to
change that but had to catch the case where SImode PLUS is supported
but emulated vectors rely on it being word_mode.  The following
makes sure to preserve the word_mode check when !target_support_p
to avoid excessive lowering later even for bit operations.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/109721
* tree-vect-stmts.cc (vectorizable_operation): Make sure
to test word_mode for all !target_support_p operations.
---
 gcc/tree-vect-stmts.cc | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 3ad6a7d28d7..cf5194ea444 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6389,9 +6389,15 @@ vectorizable_operation (vec_info *vinfo,
   if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 "op not supported by target.\n");
-  /* Check only during analysis.  */
-  if (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
+  /* When vec_mode is not a vector mode and we verified ops we
+do not have to lower like AND are natively supported let
+those through even when the mode isn't word_mode.  For
+ops we have to lower the lowering code assumes we are
+dealing with word_mode.  */
+  if code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
+   || !target_support_p)
   && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
+ /* Check only during analysis.  */
  || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
{
  if (dump_enabled_p ())
-- 
2.35.3


Re: [RFC, patch] Linker plugin - extend API for offloading corner case (aka: LDPT_REGISTER_CLAIM_FILE_HOOK_V2 linker plugin hook [GCC PR109128])

2023-05-04 Thread Tobias Burnus

Based on the the discussion with Richard B. on #gcc, some more details why the 
linker does
(and might want) to call the plugin for files it does not need:

For LTO with no-fat binaries, a symbol in a static library might still be 
needed but the linker
does not know as the file might only contain LTO objects. The linker then calls:

  bfd_link_plugin_object_p  -> ld_plugin_object_p
which calls in case of GCC's lto-plugin:
  claim_file_handler

* The latter file checks for LTO symbols and, if found, sets *claimed = true and
  registers the found symbols via add_symbols
→ The linker than ignores all other symbols and only uses the plugin-provided 
symbols.

* When all files have been processed, the linker calls
  all_symbols_read_handler and the lto-plugin can ask (get_symbols) whether a 
given
  symbol was used.


For the offload usage, the problem is:

* The code currently assumes that a file is used when 'claim_file_handler' is 
invoked
  by the linker
→ It cannot claim the file itself as then symbols in the file are ignored by 
the linker.
→ Thus, if the file contains host-side LTO and is *claimed = true, it is fine.
  Likewise, if symbols in the file cause the linker itself to use the file, it 
is fine.
  However, if neither the plugin claims the file nor the file is linked via the 
linker,
  there is a problem (→ mismatch between host and device side).

Inside the all_symbols_read_handler, GCC's lto-plugin.cc could remove a file 
from the
to-be-used for device-side lto-wrapper list, but there is no way to ask the 
linker if
a given file is will be linked/is used or not. (Only per-symbol enquiringly for 
LTO exists.)

The additional flag now permits for the offload use to ignore the file if the 
linker has
no use for it (and passes it only for non-fat symbols host-side LTO check to 
the linker);
thus, we can then ignore the file for offloading purpose, unless it is used for 
LTO - but
that's something the plugin knows itself.


Note: That's really only an issue about whether a file (from a static library) 
is used
and not whether a certain symbol is used. For the host side, the 
function-pointer table
is constructed by section merging and the device side also forces the output of 
the table.
But all output forcing/keep-symbol handling fails if the whole file is dropped 
on one side
(linker for the host side) but not on the other (linker plugin for the non-host 
offload side).


I hope this clarifies the problem, background and solution a bit better.

Tobias

On 02.05.23 19:19, Tobias Burnus wrote:

See also https://gcc.gnu.org/PR109128 (+ description in the patch log)

The linker plugin API was designed to handle LTO - such that the
compiler (i.e. GCC's lto-plugin)
can claim an input file if it finds LTO code. In that case, the
symbols inside that file are ignored
by 'ld'.

However, GCC also uses the LTO for offloading: code designated for
running on a non-host device
(GPUs) is saved in a special section in LTO format. This code then
ends up being compiled for
offloading but otherwise not the file is not claimed, keeping the
symbols for 'ld' to process,
unless that file is also uses real, host-side LTO.

This mostly works okay, but a corner case exists (see PR for an
example) where 'ld' calls the
GCC's lto-plugin but does not actually use the symbols of that file.
That's fine, in principle,
but if that file contains offloading code, there is a problem: To
match host and device functions,
a table is created on both sides, but that table obviously must match.
However, when lto-plugin's
offload code processes those while ld does not link them, it fails.

It turned out (kudos to Joseph for debugging + writing the patches)
that in that case ld.bfd does
not actually regards that file as being used but just offers it to
llto-plugin in case it needs
symbols from it.

To get this working, the current API is insufficient.

Possible solutions:
* Tell lto-plugin whether 'ld' actually needs symbols from a file or
it just offers the file
  in case that lto-plugin wants to claim that file
  => That's implemented in the attached patch.
* Make it possible to "claim" a file without discarding the ld-visible
symbols
* Asking the linker later whether the file/some symbols are actually
used.
* something else ...


What this patch does:
* It adds a new API callback (LDPT_REGISTER_CLAIM_FILE_HOOK_V2) that
takes an additional
  boolean argument which states whether ld.bdf intens to use that
file/symbols from that
  file or whether it just asks the plugin in case it wants to claim it.
* On the ld.bfd side, it wires this up.
* On the GCC lto-plugin side, it uses that API is available, otherwise
it uses the existing API.

The way the linker plugin handling is written, it works fine at
runtime if only one side
supports the new hook. (Except, of course, that for fixing the issue
both need to support it.)

Regarding those patches: Are they ok for mainline? Any comment, better
approach, suggestion?

Tobias

PS: Attached is the 

Re: Re: GCC 12.2.1 Status Report (2023-05-02), branch frozen for release

2023-05-04 Thread Richard Biener via Gcc-patches
On Thu, 4 May 2023, Martin Uecker wrote:

> Am Donnerstag, dem 04.05.2023 um 09:53 + schrieb Richard Biener:
> > On Thu, 4 May 2023, Martin Uecker wrote:
> > 
> > > 
> > > Can I please get permission for fixing this ICE?
> > > 
> > > https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616221.html
> > 
> > Please wait until after the branch is unfrozen.  When patches were
> > approved for trunk and they are regressions there's no further
> > approval needed to backport them (but of course bootstrap & testing
> > is required on the branches backported to).
> 
> Ok, thanks - good to know! Next time I just push important
> fixes for regressions approved for trunk.   But does this mean
> we need to wait for about a year to get this fix into 12? 
> This would be a bit unfortunate for this problem I think.

You have to wait about a year to have a released compiler based on 12
with the fix, yes.  But most people use snapshots from the respective
branches anyway.

Richard.


Re: Re: GCC 12.2.1 Status Report (2023-05-02), branch frozen for release

2023-05-04 Thread Martin Uecker via Gcc-patches
Am Donnerstag, dem 04.05.2023 um 09:53 + schrieb Richard Biener:
> On Thu, 4 May 2023, Martin Uecker wrote:
> 
> > 
> > Can I please get permission for fixing this ICE?
> > 
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616221.html
> 
> Please wait until after the branch is unfrozen.  When patches were
> approved for trunk and they are regressions there's no further
> approval needed to backport them (but of course bootstrap & testing
> is required on the branches backported to).

Ok, thanks - good to know! Next time I just push important
fixes for regressions approved for trunk.   But does this mean
we need to wait for about a year to get this fix into 12? 
This would be a bit unfortunate for this problem I think.

Martin



RE: [PATCH 5/5] match.pd: Use splits in makefile and make configurable.

2023-05-04 Thread Tamar Christina via Gcc-patches
> -Original Message-
> From: Kyrylo Tkachov 
> Sent: Wednesday, May 3, 2023 4:19 PM
> To: Tamar Christina ; Jeff Law
> ; gcc-patches@gcc.gnu.org
> Cc: nd ; bonz...@gnu.org; nero...@gcc.gnu.org;
> aol...@gcc.gnu.org; ralf.wildenh...@gmx.de
> Subject: RE: [PATCH 5/5] match.pd: Use splits in makefile and make
> configurable.
> 
> 
> 
> > -Original Message-
> > From: Gcc-patches  > bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Tamar
> > Christina via Gcc-patches
> > Sent: Tuesday, May 2, 2023 8:08 AM
> > To: Jeff Law ; gcc-patches@gcc.gnu.org
> > Cc: nd ; bonz...@gnu.org; nero...@gcc.gnu.org;
> > aol...@gcc.gnu.org; ralf.wildenh...@gmx.de
> > Subject: RE: [PATCH 5/5] match.pd: Use splits in makefile and make
> > configurable.
> >
> > > -Original Message-
> > > From: Jeff Law 
> > > Sent: Sunday, April 30, 2023 8:46 PM
> > > To: Tamar Christina ;
> > > gcc-patches@gcc.gnu.org
> > > Cc: nd ; bonz...@gnu.org; nero...@gcc.gnu.org;
> > > aol...@gcc.gnu.org; ralf.wildenh...@gmx.de
> > > Subject: Re: [PATCH 5/5] match.pd: Use splits in makefile and make
> > > configurable.
> > >
> > >
> > >
> > > On 4/28/23 04:44, Tamar Christina via Gcc-patches wrote:
> > > > Hi All,
> > > >
> > > > This updates the build system to split up match.pd files into chunks of
> 10.
> > > > This also introduces a new flag --with-matchpd-partitions which
> > > > can be used to change the number of partitions.
> > > >
> > > > For the analysis of why 10 please look at the previous patch in the 
> > > > series.
> > > >
> > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > > >
> > > > Ok for master?
> > > >
> > > > Thanks,
> > > > Tamar
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > PR bootstrap/84402
> > > > * Makefile.in (NUM_MATCH_SPLITS, MATCH_SPLITS_SEQ,
> > > > GIMPLE_MATCH_PD_SEQ_SRC, GIMPLE_MATCH_PD_SEQ_O,
> > > > GENERIC_MATCH_PD_SEQ_SRC, GENERIC_MATCH_PD_SEQ_O): New.
> > > > (OBJS, MOSTLYCLEANFILES, .PRECIOUS): Use them.
> > > > (s-match): Split into s-generic-match and s-gimple-match.
> > > > * configure.ac (with-matchpd-partitions,
> > > > DEFAULT_MATCHPD_PARTITIONS): New.
> > > > * configure: Regenerate.
> > > This looks pretty reasonable to me.  Are there any patches left in
> > > this series that need review?  I'm very much looking forward to
> > > build time provements related to this patch, particularly for
> > > targets that I bootstrap with qemu emulation -- we take multiple
> > > hours to build gimple-match and the ability to parallelize those component
> builds should be a significant win.
> >
> > Hi,
> >
> > No this is the last one, Richi already approved the rest but he didn't
> > feel he had enough knowledge about the build system to say if this
> > code was portable enough.
> 
> I'm looking forward to this going as well for improved bootstrap times, thanks
> for working on this!
> 
> >
> > So just waiting on this one and can commit the series.
> 
> Can we treat Jeff's LGTM above as an ok given his global reviewer position?

Ah I didn't treat it as such as it wasn't in reply to the "ok for master" part. 
But
perhaps I misunderstood.  In case it wasn't, this is also a PING for the *.in 
files
maintainers.

Regards,
Tamar

> Thanks,
> Kyrill
> 
> >
> > Cheers,
> > Tamar
> > >
> > > jeff


Re: Re: GCC 12.2.1 Status Report (2023-05-02), branch frozen for release

2023-05-04 Thread Richard Biener via Gcc-patches
On Thu, 4 May 2023, Martin Uecker wrote:

> 
> Can I please get permission for fixing this ICE?
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616221.html

Please wait until after the branch is unfrozen.  When patches were
approved for trunk and they are regressions there's no further
approval needed to backport them (but of course bootstrap & testing
is required on the branches backported to).

Richard.


Re: [PATCH V2 2/2] [x86] x86: Add a new option -mdaz-ftz to enable FTZ and DAZ flags in MXCSR.

2023-05-04 Thread Richard Biener via Gcc-patches
On Thu, May 4, 2023 at 7:37 AM Hongtao Liu via Gcc-patches
 wrote:
>
> On Thu, May 4, 2023 at 1:35 PM Hongtao Liu  wrote:
> >
> > On Thu, Dec 22, 2022 at 4:04 PM Uros Bizjak  wrote:
> > >
> > > On Thu, Dec 22, 2022 at 5:40 AM Hongtao Liu  wrote:
> > > >
> > > > On Thu, Dec 22, 2022 at 6:46 AM Jakub Jelinek  wrote:
> > > > >
> > > > > On Wed, Dec 21, 2022 at 02:43:43PM -0800, H.J. Lu wrote:
> > > > > > > > > > > >  Target RejectNegative
> > > > > > > > > > > >  Set 80387 floating-point precision to 80-bit.
> > > > > > > > > > > >
> > > > > > > > > > > > +mdaz-ftz
> > > > > > > > > > > > +Target
> > > > > > > > > > >
> > > > > > > > > > > s/Target/Driver/
> > > > > > > > > > Change to Driver and Got error like:cc1: error: 
> > > > > > > > > > command-line option
> > > > > > > > > > ‘-mdaz-ftz’ is valid for the driver but not for C.
> > > > > > > > > Hi Jakub:
> > > > > > > > >   I didn't find a good solution to handle this error after 
> > > > > > > > > changing
> > > > > > > > > *Target* to *Driver*, Could you give some hints how to solve 
> > > > > > > > > this
> > > > > > > > > problem?
> > > > > > > > > Or is it ok for you to mark this as *Target*(there won't be 
> > > > > > > > > any save
> > > > > > > > > and restore in cfun since there's no variable defined here.)
> > > > > > > >
> > > > > > > > Since all -m* options are passed to cc1, -mdaz-ftz can't be 
> > > > > > > > marked
> > > > > > > > as Driver.  We need to give it a different name to mark it as 
> > > > > > > > Driver.
> > > > > > >
> > > > > > > It is ok like that.
> > > > > > >
> > > > > > > Jakub
> > > > > > >
> > > > > >
> > > > > > The GCC driver handles -mno-XXX automatically for -mXXX.  Use
> > > > > > a different name needs to handle the negation.   Or we can do 
> > > > > > something
> > > > > > like this to check for CL_DRIVER before passing it to cc1.
> > > > >
> > > > > I meant I'm ok with -m{,no-}daz-ftz option being Target rather than 
> > > > > Driver.
> > > > >
> > > > Thanks.
> > > > Uros, Is the patch for you?
> > >
> > > The original patch is then OK.
> > Some users found the -mdaz-ftz option to be very useful, and want it
> > to be backport to GCC12 and GCC11.
> > But the patch is not a bugfix one, so i'd like to ask options from
> s/options/opinions/g
> > other maintainers, if the patch is suitable for backport?
> >
> > The backport patches include both this one and [1] which apply
> > -mdaz-ftz to all other x86 targets.
> >
> > [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-January/610053.html

Please make sure to not backport the -ffast-math linker spec change though.
Also note the 12 branch is currently frozen.

I'll defer to x86 maintainers on the -mdaz-ftz flag itself.

Richard.

> > >
> > > Thanks,
> > > Uros.
> >
> >
> >
> > --
> > BR,
> > Hongtao
>
>
>
> --
> BR,
> Hongtao


Re: 2nd Ping: Re: [PATCH v3] doc: Document order of define_peephole2 scanning

2023-05-04 Thread Richard Biener via Gcc-patches
On Thu, May 4, 2023 at 2:10 AM Hans-Peter Nilsson via Gcc-patches
 wrote:
>
> Ping again.

OK.

> > From: Hans-Peter Nilsson 
> > Date: Thu, 27 Apr 2023 01:55:24 +0200
> >
> > > From: Hans-Peter Nilsson 
> > > Date: Wed, 19 Apr 2023 18:59:14 +0200
> > [...]
> >
> > > So again: Approvers: pdf output reviewed.  Ok to commit?
> > > -- >8 --
> > > I was a bit surprised when my newly-added define_peephole2 didn't
> > > match, but it was because it was expected to partially match the
> > > generated output of a previous define_peephole2, which matched and
> > > modified the last insn of a sequence to be matched.  I had assumed
> > > that the algorithm backed-up the size of the match-buffer, thereby
> > > exposing newly created opportunities *with sufficient context* to all
> > > define_peephole2's.  While things can change in that direction, let's
> > > start with documenting the current state.
> > >
> > > * doc/md.texi (define_peephole2): Document order of scanning.
> > > ---
> > >  gcc/doc/md.texi | 9 +
> > >  1 file changed, 9 insertions(+)
> > >
> > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > > index 07bf8bdebffb..300d104d58ab 100644
> > > --- a/gcc/doc/md.texi
> > > +++ b/gcc/doc/md.texi
> > > @@ -9362,6 +9362,15 @@ If the preparation falls through (invokes neither 
> > > @code{DONE} nor
> > >  @code{FAIL}), then the @code{define_peephole2} uses the replacement
> > >  template.
> > >
> > > +Insns are scanned in forward order from beginning to end for each basic
> > > +block.  Matches are attempted in order of @code{define_peephole2}
> > > +appearance in the @file{md} file.  After a successful replacement,
> > > +scanning for further opportunities for @code{define_peephole2}, resumes
> > > +with the first generated replacement insn as the first insn to be
> > > +matched against all @code{define_peephole2}.  For the example above,
> > > +after its successful replacement, the first insn that can be matched by
> > > +a @code{define_peephole2} is @code{(set (match_dup 4) (match_dup 1))}.
> > > +
> > >  @end ifset
> > >  @ifset INTERNALS
> > >  @node Insn Attributes
> > > --
> > > 2.30.2
> > >
> >


Re: [PATCHv2 2/2] PHIOPT: Improve replace_phi_edge_with_variable for diamond shapped bb

2023-05-04 Thread Richard Biener via Gcc-patches
On Thu, May 4, 2023 at 1:13 AM Andrew Pinski via Gcc-patches
 wrote:
>
> While looking at differences between what minmax_replacement
> and match_simplify_replacement does. I noticed that they sometimes
> chose different edges to remove. I decided we should be able to do
> better and be able to remove both empty basic blocks in the
> case of match_simplify_replacement as that moves the statements.
>
> This also updates the testcases as now match_simplify_replacement
> will remove the unused MIN/MAX_EXPR and they were checking for
> those.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

> gcc/ChangeLog:
>
> * tree-ssa-phiopt.cc (replace_phi_edge_with_variable): Handle
> diamond form bb with forwarder only empty blocks better.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/minmax-15.c: Update test.
> * gcc.dg/tree-ssa/minmax-16.c: Update test.
> * gcc.dg/tree-ssa/minmax-3.c: Update test.
> * gcc.dg/tree-ssa/minmax-4.c: Update test.
> * gcc.dg/tree-ssa/minmax-5.c: Update test.
> * gcc.dg/tree-ssa/minmax-8.c: Update test.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c |  3 +-
>  gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c |  9 ++
>  gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c  |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c  |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c  |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/minmax-8.c  |  2 +-
>  gcc/tree-ssa-phiopt.cc| 35 ++-
>  7 files changed, 43 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c
> index 8a39871c938..6731f91e6c3 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c
> @@ -30,5 +30,6 @@ main (void)
>return 0;
>  }
>
> -/* { dg-final { scan-tree-dump-times "MIN_EXPR" 3 "phiopt1" } } */
> +/* There should only be two MIN_EXPR left, the 3rd one was removed. */
> +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "phiopt1" } } */
>  /* { dg-final { scan-tree-dump-times "MAX_EXPR" 0 "phiopt1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c
> index 623b12b3f74..094364e6424 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c
> @@ -25,11 +25,8 @@ main (void)
>return 0;
>  }
>
> -/* After phiopt1, there really should be only 3 MIN_EXPR in the IR 
> (including debug statements).
> -   But the way phiopt does not cleanup the CFG all the time, the PHI might 
> still reference the
> -   alternative bb's moved statement.
> -   Note in the end, we do dce the statement and other debug statements to 
> end up with only 2 MIN_EXPR.
> -   So check that too. */
> -/* { dg-final { scan-tree-dump-times "MIN_EXPR" 4 "phiopt1" } } */
> +/* After phiopt1, will be only 2 MIN_EXPR in the IR (including debug 
> statements). */
> +/* xk will only have the final result so the extra debug info does not 
> change anything. */
> +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "phiopt1" } } */
>  /* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "optimized" } } */
>  /* { dg-final { scan-tree-dump-times "MAX_EXPR" 0 "phiopt1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c
> index 2af10776346..521afe3e4d9 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c
> @@ -25,5 +25,5 @@ main (void)
>return 0;
>  }
>
> -/* { dg-final { scan-tree-dump-times "MIN_EXPR" 3 "phiopt1" } } */
> +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "phiopt1" } } */
>  /* { dg-final { scan-tree-dump-times "MAX_EXPR" 0 "phiopt1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c
> index 973f39bfed3..49e27185b5e 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c
> @@ -26,4 +26,4 @@ main (void)
>  }
>
>  /* { dg-final { scan-tree-dump-times "MIN_EXPR" 0 "phiopt1" } } */
> -/* { dg-final { scan-tree-dump-times "MAX_EXPR" 3 "phiopt1" } } */
> +/* { dg-final { scan-tree-dump-times "MAX_EXPR" 2 "phiopt1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c
> index 34e4e720511..194c881cc98 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c
> @@ -25,5 +25,5 @@ main (void)
>return 0;
>  }
>
> -/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "phiopt1" } } */
> +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 1 "phiopt1" } } */
>  /* { dg-final { scan-tree-dump-times "MAX_EXPR" 1 "phiopt1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-8.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/minmax-8.c
> index 0160e573fef..d5cb53145ea 100644
> --- 

Re: [PATCH 1/2] Move copy_phi_arg_into_existing_phi to common location and use it

2023-05-04 Thread Richard Biener via Gcc-patches
On Thu, May 4, 2023 at 1:13 AM Andrew Pinski via Gcc-patches
 wrote:
>
> While improving replace_phi_edge_with_variable for the diamond formed bb
> case, I need a way to copy phi entries from one edge to another as I am
> removing a forwarding bb inbetween. I was pointed out that jump threading
> code had copy_phi_arg_into_existing_phi which I can use.
> I also noticed that both gimple_duplicate_sese_tail and
> remove_forwarder_block have similar code so it makes sense to use that 
> function
> in those two locations too.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

> gcc/ChangeLog:
>
> * tree-cfg.cc (copy_phi_args): New function
> (gimple_duplicate_sese_tail): Use it instead of
> doing it inline.
> * tree-cfg.h (copy_phi_args): New declaration.
> * tree-cfgcleanup.cc (remove_forwarder_block): Use
> copy_phi_args instead of implementing it inline.
>
> gcc/ChangeLog:
>
> * tree-ssa-threadupdate.cc (copy_phi_arg_into_existing_phi): Move to 
> ...
> * tree-cfg.cc (copy_phi_arg_into_existing_phi): Here and remove 
> static.
> (gimple_duplicate_sese_tail): Use copy_phi_arg_into_existing_phi 
> instead
> of an inline version of it.
> * tree-cfgcleanup.cc (remove_forwarder_block): Likewise.
> * tree-cfg.h (copy_phi_arg_into_existing_phi): New declaration.
> ---
>  gcc/tree-cfg.cc  | 38 +---
>  gcc/tree-cfg.h   |  1 +
>  gcc/tree-cfgcleanup.cc   | 12 ++--
>  gcc/tree-ssa-threadupdate.cc | 25 
>  4 files changed, 30 insertions(+), 46 deletions(-)
>
> diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
> index 4927fc0a8d9..a94dc3c2eea 100644
> --- a/gcc/tree-cfg.cc
> +++ b/gcc/tree-cfg.cc
> @@ -6802,6 +6802,32 @@ bb_part_of_region_p (basic_block bb, basic_block* bbs, 
> unsigned n_region)
>return false;
>  }
>
> +
> +/* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
> +   Assuming the argument exists, just does not have a value.  */
> +
> +void
> +copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
> +{
> +  int src_idx = src_e->dest_idx;
> +  int tgt_idx = tgt_e->dest_idx;
> +
> +  /* Iterate over each PHI in e->dest.  */
> +  for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
> +  gsi2 = gsi_start_phis (tgt_e->dest);
> +   !gsi_end_p (gsi);
> +   gsi_next (), gsi_next ())
> +{
> +  gphi *src_phi = gsi.phi ();
> +  gphi *dest_phi = gsi2.phi ();
> +  tree val = gimple_phi_arg_def (src_phi, src_idx);
> +  location_t locus = gimple_phi_arg_location (src_phi, src_idx);
> +
> +  SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
> +  gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
> +}
> +}
> +
>  /* Duplicates REGION consisting of N_REGION blocks.  The new blocks
> are stored to REGION_COPY in the same order in that they appear
> in REGION, if REGION_COPY is not NULL.  ENTRY is the entry to
> @@ -6847,9 +6873,6 @@ gimple_duplicate_sese_tail (edge entry, edge exit,
>gimple_stmt_iterator gsi;
>edge sorig, snew;
>basic_block exit_bb;
> -  gphi_iterator psi;
> -  gphi *phi;
> -  tree def;
>class loop *target, *aloop, *cloop;
>
>gcc_assert (EDGE_COUNT (exit->src->succs) == 2);
> @@ -6947,14 +6970,7 @@ gimple_duplicate_sese_tail (edge entry, edge exit,
> gcc_assert (single_succ_edge (region_copy[i]));
> e = redirect_edge_and_branch (single_succ_edge (region_copy[i]), 
> exit_bb);
> PENDING_STMT (e) = NULL;
> -   for (psi = gsi_start_phis (exit_bb);
> -!gsi_end_p (psi);
> -gsi_next ())
> - {
> -   phi = psi.phi ();
> -   def = PHI_ARG_DEF (phi, nexits[0]->dest_idx);
> -   add_phi_arg (phi, def, e, gimple_phi_arg_location_from_edge (phi, 
> e));
> - }
> +   copy_phi_arg_into_existing_phi (nexits[0], e);
>}
>e = redirect_edge_and_branch (nexits[1], nexits[0]->dest);
>PENDING_STMT (e) = NULL;
> diff --git a/gcc/tree-cfg.h b/gcc/tree-cfg.h
> index 9b56a68fe9d..9a1605be305 100644
> --- a/gcc/tree-cfg.h
> +++ b/gcc/tree-cfg.h
> @@ -113,6 +113,7 @@ extern basic_block gimple_switch_default_bb (function *, 
> gswitch *);
>  extern edge gimple_switch_edge (function *, gswitch *, unsigned);
>  extern edge gimple_switch_default_edge (function *, gswitch *);
>  extern bool cond_only_block_p (basic_block);
> +extern void copy_phi_arg_into_existing_phi (edge, edge);
>
>  /* Return true if the LHS of a call should be removed.  */
>
> diff --git a/gcc/tree-cfgcleanup.cc b/gcc/tree-cfgcleanup.cc
> index 42b25312122..193d87e3278 100644
> --- a/gcc/tree-cfgcleanup.cc
> +++ b/gcc/tree-cfgcleanup.cc
> @@ -610,17 +610,9 @@ remove_forwarder_block (basic_block bb)
>
>if (s == e)
> {
> - /* Create arguments for the phi nodes, since the edge was not
> + /* Copy arguments 

Re: [PATCH] PHIOPT: Improve replace_phi_edge_with_variable's dce_ssa_names slightly

2023-05-04 Thread Richard Biener via Gcc-patches
On Thu, May 4, 2023 at 1:11 AM Andrew Pinski via Gcc-patches
 wrote:
>
> When I added the dce_ssa_names argument, I didn't realize bitmap was a
> pointer so I used the default argument value as auto_bitmap(). But
> instead we could just use nullptr and check if it was a nullptr
> before calling simple_dce_from_worklist.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

> gcc/ChangeLog:
>
> * tree-ssa-phiopt.cc (replace_phi_edge_with_variable): Change
> the default argument value for dce_ssa_names to nullptr.
> Check to make sure dce_ssa_names is a non-nullptr before
> calling simple_dce_from_worklist.
> ---
>  gcc/tree-ssa-phiopt.cc | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
> index 37b98ef3c52..fbc1fd66607 100644
> --- a/gcc/tree-ssa-phiopt.cc
> +++ b/gcc/tree-ssa-phiopt.cc
> @@ -89,7 +89,7 @@ single_non_singleton_phi_for_edges (gimple_seq seq, edge 
> e0, edge e1)
>  static void
>  replace_phi_edge_with_variable (basic_block cond_block,
> edge e, gphi *phi, tree new_tree,
> -   bitmap dce_ssa_names = auto_bitmap())
> +   bitmap dce_ssa_names = nullptr)
>  {
>basic_block bb = gimple_bb (phi);
>gimple_stmt_iterator gsi;
> @@ -164,7 +164,8 @@ replace_phi_edge_with_variable (basic_block cond_block,
> gimple_cond_make_true (cond);
>  }
>
> -  simple_dce_from_worklist (dce_ssa_names);
> +  if (dce_ssa_names)
> +simple_dce_from_worklist (dce_ssa_names);
>
>statistics_counter_event (cfun, "Replace PHI with variable", 1);
>
> --
> 2.31.1
>


[PATCH v2] RISC-V: Legitimise the const0_rtx for RVV indexed load/store

2023-05-04 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch try to legitimise the const0_rtx (aka zero register)
as the base register for the RVV indexed load/store instructions
by allowing the const as the operand of the indexed RTL pattern.
Then the underlying combine pass will try to perform the const
propagation.

For example:
vint32m1_t
test_vluxei32_v_i32m1_shortcut (vuint32m1_t bindex, size_t vl)
{
  return __riscv_vluxei32_v_i32m1 ((int32_t *)0, bindex, vl);
}

Before this patch:
li a5,0 <- can be eliminated.
vl1re32.v  v1,0(a1)
vsetvlizero,a2,e32,m1,ta,ma
vluxei32.v v1,(a5),v1   <- can propagate the const 0 to a5 here.
vs1r.v v1,0(a0)
ret

After this patch:
test_vluxei32_v_i32m1_shortcut:
vl1re32.v   v1,0(a1)
vsetvli zero,a2,e32,m1,ta,ma
vluxei32.v  v1,(0),v1
vs1r.v  v1,0(a0)
ret

As above, this patch allow you to propagaate the const 0 (aka zero
register) to the base register of the RVV indexed load in the combine
pass. This may benefit the underlying RVV auto-vectorization.

gcc/ChangeLog:

* config/riscv/vector.md: Allow const as the operand of RVV
  indexed load/store.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/zero_base_load_store_optimization.c:
  Adjust indexed load/store check condition.

Signed-off-by: Pan Li 
Co-authored-by: Ju-Zhe Zhong 
---
 gcc/config/riscv/vector.md| 62 +--
 .../base/zero_base_load_store_optimization.c  |  3 +-
 2 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 92115e3935f..dc05e9fc713 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1511,12 +1511,12 @@ (define_insn "@pred_indexed_load_same_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:V
-   [(match_operand 3 "pmode_register_operand""  r,  r, r,  r")
+   [(match_operand 3 "pmode_reg_or_0_operand"" rJ, rJ,rJ, rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" " vr, vr,vr, vr")] 
ORDER)
  (match_operand:V 2 "vector_merge_operand"   " vu, vu, 0,  0")))]
   "TARGET_VECTOR"
-  "vlxei.v\t%0,(%3),%4%p1"
+  "vlxei.v\t%0,(%z3),%4%p1"
   [(set_attr "type" "vldx")
(set_attr "mode" "")])
 
@@ -1533,12 +1533,12 @@ (define_insn 
"@pred_indexed_load_x2_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT2
-   [(match_operand 3 "pmode_register_operand" "r,  
  r")
+   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "   vr,  
 vr")] ORDER)
  (match_operand:VEEWEXT2 2 "vector_merge_operand" "   vu,  
  0")))]
   "TARGET_VECTOR"
-  "vlxei.v\t%0,(%3),%4%p1"
+  "vlxei.v\t%0,(%z3),%4%p1"
   [(set_attr "type" "vldx")
(set_attr "mode" "")])
 
@@ -1554,12 +1554,12 @@ (define_insn 
"@pred_indexed_load_x4_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT4
-   [(match_operand 3 "pmode_register_operand" "r,  
  r")
+   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand"   "   vr,  
 vr")] ORDER)
  (match_operand:VEEWEXT4 2 "vector_merge_operand" "   vu,  
  0")))]
   "TARGET_VECTOR"
-  "vlxei.v\t%0,(%3),%4%p1"
+  "vlxei.v\t%0,(%z3),%4%p1"
   [(set_attr "type" "vldx")
(set_attr "mode" "")])
 
@@ -1575,12 +1575,12 @@ (define_insn 
"@pred_indexed_load_x8_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT8
-   [(match_operand 3 "pmode_register_operand" "r,  
  r")
+   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand""   vr,  
 vr")] ORDER)
  (match_operand:VEEWEXT8 2 "vector_merge_operand" "   vu,  
  0")))]
   "TARGET_VECTOR"
-  "vlxei.v\t%0,(%3),%4%p1"
+  "vlxei.v\t%0,(%z3),%4%p1"
   [(set_attr "type" "vldx")
(set_attr "mode" "")])
 
@@ -1597,12 +1597,12 @@ (define_insn 
"@pred_indexed_load_x2_smaller_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWTRUNC2
-   [(match_operand 3 "pmode_register_operand"   "  r,  r,  
r,  r,r,r")
+   [(match_operand 3 "pmode_reg_or_0_operand"   " rJ, rJ, 
rJ, rJ,   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "  0,  0,  
0,  0,   vr,   vr")] ORDER)
  (match_operand:VEEWTRUNC2 2 

Re: [PATCH 4/5] RISC-V: Add Zcmp extension supports.

2023-05-04 Thread Kito Cheng via Gcc-patches
Could you rebase this patch, we have some changes on

> All "zcmpe" means Zcmp with RVE extension.

Use zcmp_rve instead, zcmpe seems like a new ext. name

> diff --git a/gcc/config/riscv/riscv-zcmp-popret.cc 
> b/gcc/config/riscv/riscv-zcmp-popret.cc
> new file mode 100644
> index 000..d7b40f6a3e2
> --- /dev/null
> +++ b/gcc/config/riscv/riscv-zcmp-popret.cc
> @@ -0,0 +1,260 @@

Need a header here like "^#$% for RISC-V Copyright (C) 2023 Free
Software Foundation, Inc." here

> +#include "config.h"
...
> +#include "cfgrtl.h"
> +
> +#define IN_TARGET_CODE 1

This should appear before include anything.

> +
> +namespace {
> +
> +/*
> +  1. preprocessing:
> +1.1. if there is no push rtx, then just return. e.g.
> +(note 5 1 22 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +(insn/f 22 5 23 2 (set (reg/f:SI 2 sp)
> +  (plus:SI (reg/f:SI 2 sp)
> +   (const_int -32 [0xffe0])))
> +(nil))
> +(note 23 22 2 2 NOTE_INSN_PROLOGUE_END)
> +1.2. if push rtx exists, then we compute the number of
> +pushed s-registers, n_sreg.
> +
> +  push rtx should be find before NOTE_INSN_PROLOGUE_END tag
> +
> +  [2 and 3 happend simultaneously]
> +  2. find valid move pattern, mv sN, aN, where N < n_sreg,
> +and aN is not used the move pattern, and sN is not
> +defined before the move pattern (from prologue to the
> +position of move pattern).
> +  3. analysis use and reach of every instruction from prologue
> +to the position of move pattern.
> +if any sN is used, then we mark the corresponding argument list
> +candidate as invalid.
> +e.g.
> +   push  {ra,s0-s3}, {}, -32
> +   sw  s0,44(sp) # s0 is used, then argument list is invalid
> +   mv  a0,a5 # a0 is defined, then argument list is invalid
> +   ...
> +   mv  s0,a0
> +   mv  s1,a1
> +   mv  s2,a2
> +
> +  4. if there is a valid argument list, then replace the pop
> +push parallel insn, and delete mv pattern.
> + if not, skip.
> +*/

I am not sure I understand this optimization pass correctly,
could you give more example or indicate which testcase can demonstrate
this pass?

And I would prefer this pass split from this patch, let it become a separated
patch including testcase.


> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 5f8cbfc15ed..17df2f3f8cf 100644
> +/* Order for the CLOBBERs/USEs of push/pop.  */
> +static const unsigned push_save_reg_order[] = {

push_save_reg_order -> zcmp_push_save_reg_order

> +  INVALID_REGNUM, RETURN_ADDR_REGNUM, S0_REGNUM,
> +  S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
> +  S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM,
> +  S9_REGNUM, S10_REGNUM, S11_REGNUM
> +};
> +
> +/* Order for the CLOBBERs/USEs of push/pop in rve.  */
> +static const unsigned push_save_reg_order_zcmpe[] = {

push_save_reg_order_zcmpe -> zcmp_rve_push_save_reg_order

> @@ -4777,6 +4881,66 @@ riscv_use_save_libcall (const struct riscv_frame_info 
> *frame)
>return frame->save_libcall_adjustment != 0;
>  }
>
> +/* Determine how many instructions related to push/pop instructions.  */
> +
> +static unsigned
> +riscv_save_push_pop_count (unsigned mask)
> +{
> +  if (!BITSET_P (mask, GP_REG_FIRST + RETURN_ADDR_REGNUM))
> +return 0;
> +  for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
> +if (BITSET_P (mask, n)
> +   && !call_used_regs [n])
> +  /* add ra saving and sp adjust. */
> +  return CALLEE_SAVED_REG_NUMBER (n) + 1 + 2;

What the magic number of `+ 1 + 2`?

> +  abort ();
> +}
> +
> +/* Calculate the maximum sp adjustment of push/pop instruction. */
> +
> +static unsigned
> +riscv_push_pop_base_sp_adjust (unsigned mask)
> +{
> +  unsigned n_regs = riscv_save_push_pop_count (mask) - 1;
> +  return (n_regs * UNITS_PER_WORD + 15) & (~0xf);

Use ROUND_UP

> @@ -5171,6 +5337,86 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, 
> riscv_save_restore_fn fn,
>}
>  }
>
> +static void
> +riscv_emit_pop_insn (struct riscv_frame_info *frame, HOST_WIDE_INT offset, 
> HOST_WIDE_INT size)
> +{
> +  unsigned int veclen = riscv_save_push_pop_count (frame->mask);
> +  unsigned int n_reg = veclen - 1;
> +  rtvec vec = rtvec_alloc (veclen);
> +  HOST_WIDE_INT sp_adjust;
> +  rtx dwarf = NULL_RTX;
> +
> +  const unsigned *reg_order = (TARGET_ZCMP && TARGET_RVE)
> +   ? push_save_reg_order_zcmpe
> +   : push_save_reg_order;
> +
> +  gcc_assert (n_reg >= 1
> +   && TARGET_ZCMP
> +   && ((TARGET_RVE && (n_reg <= ARRAY_SIZE (push_save_reg_order_zcmpe)))
> +   || (TARGET_ZCMP && (n_reg <= ARRAY_SIZE (push_save_reg_order);
> +
> +  /* sp adjust pattern */
> +  int max_allow_sp_adjust = riscv_push_pop_base_sp_adjust (frame->mask) + 48;
> +  int aligned_size = size;
> +
> +  /* if sp adjustment is too large, we should split it first. */
> +  if (aligned_size > max_allow_sp_adjust)
> +{
> +  rtx dwarf_pre_sp_adjust = NULL_RTX;
> +  rtx pre_adjust_rtx = gen_add3_insn 

[PATCHv2, rs6000] Splat vector small V2DI constants with ISA 2.07 instructions [PR104124]

2023-05-04 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds a new insn for vector splat with small V2DI constants on P8.
If the value of constant is in RANGE (-16, 15) and not 0 or -1, it can be loaded
with vspltisw and vupkhsw on P8. It should be efficient than loading vector from
TOC.

  Compared to last version, the main change is to move the constant check from
easy_altivec_constant to easy_altivec_constant and remove some unnecessary mode
checks.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
2023-05-04  Haochen Gui 

gcc/
PR target/104124
* config/rs6000/altivec.md (*altivec_vupkhs_direct): Rename
to...
(altivec_vupkhs_direct): ...this.
* config/rs6000/constraints.md (wT constraint): New constant for a
vector constraint that can be loaded with vspltisw and vupkhsw.
* config/rs6000/predicates.md (vspltisw_vupkhsw_constant_split): New
predicate for wT constraint.
(easy_vector_constant): Call vspltisw_vupkhsw_constant_p to Check if
a vector constant can be synthesized with a vspltisw and a vupkhsw.
* config/rs6000/rs6000-protos.h (vspltisw_vupkhsw_constant_p): Declare.
* config/rs6000/rs6000.cc (vspltisw_vupkhsw_constant_p): Call
* (vspltisw_vupkhsw_constant_p): New function to return true if OP
mode is V2DI and can be synthesized with vupkhsw and vspltisw.
* config/rs6000/vsx.md (*vspltisw_v2di_split): New insn to load up
constants with vspltisw and vupkhsw.

gcc/testsuite/
PR target/104124
* gcc.target/powerpc/pr104124.c: New.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 49b0c964f4d..2c932854c33 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2542,7 +2542,7 @@ (define_insn "altivec_vupkhs"
 }
   [(set_attr "type" "vecperm")])

-(define_insn "*altivec_vupkhs_direct"
+(define_insn "altivec_vupkhs_direct"
   [(set (match_operand:VP 0 "register_operand" "=v")
(unspec:VP [(match_operand: 1 "register_operand" "v")]
 UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index c4a6ccf4efb..e7f185660c0 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -144,6 +144,10 @@ (define_constraint "wS"
   "@internal Vector constant that can be loaded with XXSPLTIB & sign 
extension."
   (match_test "xxspltib_constant_split (op, mode)"))

+(define_constraint "wT"
+  "@internal Vector constant that can be loaded with vspltisw & vupkhsw."
+  (match_test "vspltisw_vupkhsw_constant_split (op, mode)"))
+
 ;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update form.
 ;; Used by LXSD/STXSD/LXSSP/STXSSP.  In contrast to "Y", the multiple-of-four
 ;; offset is enforced for 32-bit too.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..ff0f625d508 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -694,6 +694,16 @@ (define_predicate "xxspltib_constant_split"
   return num_insns > 1;
 })

+;; Return true if the operand is a constant that can be loaded with a vspltisw
+;; instruction and then a vupkhsw instruction.
+
+(define_predicate "vspltisw_vupkhsw_constant_split"
+  (match_code "const_vector")
+{
+  int value;
+
+  return vspltisw_vupkhsw_constant_p (op, mode, );
+})

 ;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB
 ;; instruction.
@@ -742,6 +752,11 @@ (define_predicate "easy_vector_constant"
   && xxspltib_constant_p (op, mode, _insns, ))
return true;

+  /* V2DI constant within RANGE (-16, 15) can be synthesized with a
+vspltisw and a vupkhsw.  */
+  if (vspltisw_vupkhsw_constant_p (op, mode, ))
+   return true;
+
   return easy_altivec_constant (op, mode);
 }

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..ba39a73abf8 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -32,6 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, 
rtx, int, int, int,

 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
+extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, int *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..697b18e14f1 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -6638,6 +6638,36 @@ xxspltib_constant_p (rtx op,
   return true;
 }

+/* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
+   instructions vupkhsw and vspltisw.
+
+   Return the constant 

[PATCH][committed] aarch64: PR target/99195 annotate more simple binary ops for vec-concat with zero

2023-05-04 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

More pattern annotations and tests to eliminate redundant vec-concat with zero 
instructions.
These are for the abd family of instructions and the pairwise floating-point 
max/min and fadd
operations too.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

PR target/99195
* config/aarch64/aarch64-simd.md (aarch64_abd): Rename to...
(aarch64_abd): ... This.
(fabd3): Rename to...
(fabd3): ... This.
(aarch64_p): Rename to...
(aarch64_p): ... This.
(aarch64_faddp): Rename to...
(aarch64_faddp): ... This.

gcc/testsuite/ChangeLog:

PR target/99195
* gcc.target/aarch64/simd/pr99195_1.c: Add testing for more binary ops.
* gcc.target/aarch64/simd/pr99195_2.c: Add testing for more binary ops.


vbin.patch
Description: vbin.patch


[PATCH][committed] aarch64: PR target/99195 annotate simple ternary ops for vec-concat with zero

2023-05-04 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

We're now moving onto various simple ternary instructions, including some lane 
forms.
These include intrinsics that map down to mla, mls, fma, aba, bsl instructions.
Tests are added for lane 0 and lane 1 as for some of these instructions the 
lane 0 variants
use separate simpler patterns that need a separate annotation.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

PR target/99195
* config/aarch64/aarch64-simd.md (aarch64_aba): Rename to...
(aarch64_aba): ... This.
(aarch64_mla): Rename to...
(aarch64_mla): ... This.
(*aarch64_mla_elt): Rename to...
(*aarch64_mla_elt): ... This.
(*aarch64_mla_elt_): Rename to...
(*aarch64_mla_elt_): ... This.
(aarch64_mla_n): Rename to...
(aarch64_mla_n): ... This.
(aarch64_mls): Rename to...
(aarch64_mls): ... This.
(*aarch64_mls_elt): Rename to...
(*aarch64_mls_elt): ... This.
(*aarch64_mls_elt_): Rename to...
(*aarch64_mls_elt_): ... This.
(aarch64_mls_n): Rename to...
(aarch64_mls_n): ... This.
(fma4): Rename to...
(fma4): ... This.
(*aarch64_fma4_elt): Rename to...
(*aarch64_fma4_elt): ... This.
(*aarch64_fma4_elt_): Rename to...
(*aarch64_fma4_elt_): ... This.
(*aarch64_fma4_elt_from_dup): Rename to...
(*aarch64_fma4_elt_from_dup): ... This.
(fnma4): Rename to...
(fnma4): ... This.
(*aarch64_fnma4_elt): Rename to...
(*aarch64_fnma4_elt): ... This.
(*aarch64_fnma4_elt_): Rename to...
(*aarch64_fnma4_elt_): ... This.
(*aarch64_fnma4_elt_from_dup): Rename to...
(*aarch64_fnma4_elt_from_dup): ... This.
(aarch64_simd_bsl_internal): Rename to...
(aarch64_simd_bsl_internal): ... This.
(*aarch64_simd_bsl_alt): Rename to...
(*aarch64_simd_bsl_alt): ... This.

gcc/testsuite/ChangeLog:

PR target/99195
* gcc.target/aarch64/simd/pr99195_3.c: New test.


vtern.patch
Description: vtern.patch


RE: [PATCH] RISC-V: Legitimise the const0_rtx for RVV indexed load/store

2023-05-04 Thread Li, Pan2 via Gcc-patches
Thanks Juzhe, make sense, let me update it soon.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Thursday, May 4, 2023 4:40 PM
To: Li, Pan2 ; gcc-patches 
Cc: Kito.cheng ; Li, Pan2 ; Wang, 
Yanzhang 
Subject: Re: [PATCH] RISC-V: Legitimise the const0_rtx for RVV indexed 
load/store

vluxei32.v  v1,(0),v1 is not correct assembly.
Instead,  it should be vluxei32.v  v1,(zero),v1

You should change the assembly print: (%1) --> (%z1)


juzhe.zh...@rivai.ai

From: pan2.li
Date: 2023-05-04 16:35
To: gcc-patches
CC: juzhe.zhong; 
kito.cheng; pan2.li; 
yanzhang.wang
Subject: [PATCH] RISC-V: Legitimise the const0_rtx for RVV indexed load/store
From: Pan Li mailto:pan2...@intel.com>>

This patch try to legitimise the const0_rtx (aka zero register)
as the base register for the RVV indexed load/store instructions
by allowing the const as the operand of the indexed RTL pattern.
Then the underlying combine pass will try to perform the const
propagation.

For example:
vint32m1_t
test_vluxei32_v_i32m1_shortcut (vuint32m1_t bindex, size_t vl)
{
  return __riscv_vluxei32_v_i32m1 ((int32_t *)0, bindex, vl);
}

Before this patch:
li a5,0 <- can be eliminated.
vl1re32.v  v1,0(a1)
vsetvlizero,a2,e32,m1,ta,ma
vluxei32.v v1,(a5),v1   <- can propagate the const 0 to a5 here.
vs1r.v v1,0(a0)
ret

After this patch:
test_vluxei32_v_i32m1_shortcut:
vl1re32.v   v1,0(a1)
vsetvli zero,a2,e32,m1,ta,ma
vluxei32.v  v1,(0),v1
vs1r.v  v1,0(a0)
ret

As above, this patch allow you to propagaate the const 0 (aka zero
register) to the base register of the RVV indexed load in the combine
pass. This may benefit the underlying RVV auto-vectorization.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>
Co-authored-by: Ju-Zhe Zhong mailto:juzhe.zh...@rivai.ai>>

gcc/ChangeLog:

* config/riscv/vector.md: Allow const as the operand of RVV
  indexed load/store.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/zero_base_load_store_optimization.c:
  Adjust indexed load/store check condition.
---
gcc/config/riscv/vector.md| 32 +--
.../base/zero_base_load_store_optimization.c  |  3 +-
2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 92115e3935f..c3210eacd47 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1511,7 +1511,7 @@ (define_insn "@pred_indexed_load_same_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:V
- [(match_operand 3 "pmode_register_operand""  r,  r, r,  r")
+ [(match_operand 3 "pmode_reg_or_0_operand"" rJ, rJ,rJ, rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" " vr, vr,vr, vr")] ORDER)
  (match_operand:V 2 "vector_merge_operand"   " vu, vu, 0,  0")))]
@@ -1533,7 +1533,7 @@ (define_insn 
"@pred_indexed_load_x2_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT2
- [(match_operand 3 "pmode_register_operand" "r,r")
+ [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "   vr,   vr")] 
ORDER)
  (match_operand:VEEWEXT2 2 "vector_merge_operand" "   vu,0")))]
@@ -1554,7 +1554,7 @@ (define_insn 
"@pred_indexed_load_x4_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT4
- [(match_operand 3 "pmode_register_operand" "r,r")
+ [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand"   "   vr,   vr")] 
ORDER)
  (match_operand:VEEWEXT4 2 "vector_merge_operand" "   vu,0")))]
@@ -1575,7 +1575,7 @@ (define_insn 
"@pred_indexed_load_x8_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT8
- [(match_operand 3 "pmode_register_operand" "r,r")
+ [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand""   vr,   vr")] 
ORDER)
  (match_operand:VEEWEXT8 2 "vector_merge_operand" "   vu,0")))]
@@ -1597,7 +1597,7 @@ (define_insn 
"@pred_indexed_load_x2_smaller_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWTRUNC2
- [(match_operand 3 "pmode_register_operand"   "  r,  r,  r,  
r,r,r")
+ [(match_operand 3 "pmode_reg_or_0_operand"   " rJ, rJ, rJ, 
rJ,   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "  0,  0,  0,  0,  
 

Re: [PATCH] RISC-V: Legitimise the const0_rtx for RVV indexed load/store

2023-05-04 Thread juzhe.zh...@rivai.ai
vluxei32.v  v1,(0),v1 is not correct assembly.
Instead,  it should be vluxei32.v  v1,(zero),v1

You should change the assembly print: (%1) --> (%z1)



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-05-04 16:35
To: gcc-patches
CC: juzhe.zhong; kito.cheng; pan2.li; yanzhang.wang
Subject: [PATCH] RISC-V: Legitimise the const0_rtx for RVV indexed load/store
From: Pan Li 
 
This patch try to legitimise the const0_rtx (aka zero register)
as the base register for the RVV indexed load/store instructions
by allowing the const as the operand of the indexed RTL pattern.
Then the underlying combine pass will try to perform the const
propagation.
 
For example:
vint32m1_t
test_vluxei32_v_i32m1_shortcut (vuint32m1_t bindex, size_t vl)
{
  return __riscv_vluxei32_v_i32m1 ((int32_t *)0, bindex, vl);
}
 
Before this patch:
li a5,0 <- can be eliminated.
vl1re32.v  v1,0(a1)
vsetvlizero,a2,e32,m1,ta,ma
vluxei32.v v1,(a5),v1   <- can propagate the const 0 to a5 here.
vs1r.v v1,0(a0)
ret
 
After this patch:
test_vluxei32_v_i32m1_shortcut:
vl1re32.v   v1,0(a1)
vsetvli zero,a2,e32,m1,ta,ma
vluxei32.v  v1,(0),v1
vs1r.v  v1,0(a0)
ret
 
As above, this patch allow you to propagaate the const 0 (aka zero
register) to the base register of the RVV indexed load in the combine
pass. This may benefit the underlying RVV auto-vectorization.
 
Signed-off-by: Pan Li 
Co-authored-by: Ju-Zhe Zhong 
 
gcc/ChangeLog:
 
* config/riscv/vector.md: Allow const as the operand of RVV
  indexed load/store.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/zero_base_load_store_optimization.c:
  Adjust indexed load/store check condition.
---
gcc/config/riscv/vector.md| 32 +--
.../base/zero_base_load_store_optimization.c  |  3 +-
2 files changed, 18 insertions(+), 17 deletions(-)
 
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 92115e3935f..c3210eacd47 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1511,7 +1511,7 @@ (define_insn "@pred_indexed_load_same_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:V
- [(match_operand 3 "pmode_register_operand""  r,  r, r,  r")
+ [(match_operand 3 "pmode_reg_or_0_operand"" rJ, rJ,rJ, rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" " vr, vr,vr, vr")] ORDER)
  (match_operand:V 2 "vector_merge_operand"   " vu, vu, 0,  0")))]
@@ -1533,7 +1533,7 @@ (define_insn 
"@pred_indexed_load_x2_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT2
- [(match_operand 3 "pmode_register_operand" "r,r")
+ [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "   vr,   vr")] 
ORDER)
  (match_operand:VEEWEXT2 2 "vector_merge_operand" "   vu,0")))]
@@ -1554,7 +1554,7 @@ (define_insn 
"@pred_indexed_load_x4_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT4
- [(match_operand 3 "pmode_register_operand" "r,r")
+ [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand"   "   vr,   vr")] 
ORDER)
  (match_operand:VEEWEXT4 2 "vector_merge_operand" "   vu,0")))]
@@ -1575,7 +1575,7 @@ (define_insn 
"@pred_indexed_load_x8_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT8
- [(match_operand 3 "pmode_register_operand" "r,r")
+ [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand""   vr,   vr")] 
ORDER)
  (match_operand:VEEWEXT8 2 "vector_merge_operand" "   vu,0")))]
@@ -1597,7 +1597,7 @@ (define_insn 
"@pred_indexed_load_x2_smaller_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWTRUNC2
- [(match_operand 3 "pmode_register_operand"   "  r,  r,  r,  
r,r,r")
+ [(match_operand 3 "pmode_reg_or_0_operand"   " rJ, rJ, rJ, 
rJ,   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "  0,  0,  0,  0,  
 vr,   vr")] ORDER)
  (match_operand:VEEWTRUNC2 2 "vector_merge_operand" " vu,  0, vu,  0,  
 vu,0")))]
@@ -1618,7 +1618,7 @@ (define_insn 
"@pred_indexed_load_x4_smaller_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWTRUNC4
- [(match_operand 3 "pmode_register_operand" "  r,  r,  r,  r,  
  r,r")
+ [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ, rJ,  
 rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "  0,  0,  0,  0,   
vr,   vr")] ORDER)
  

Re: [PATCH 3/5] RISC-V: Add ZC* test for march args being passed.

2023-05-04 Thread Kito Cheng via Gcc-patches
Plz drop those testcase which just contain inline asm in foo, that
should belong to binutils's testsuite IMO.

On Thu, Apr 6, 2023 at 2:23 PM Jiawei  wrote:
>
> From: Charlie Keaney 
>
> Add all ZC* extensions march args tests.
>
> Co-Authored by: Nandni Jamnadas 
> Co-Authored by: Jiawei 
> Co-Authored by: Mary Bennett 
> Co-Authored by: Simon Cook 
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/arch-20.c: New test.
> * gcc.target/riscv/arch-21.c: New test.
> * gcc.target/riscv/zc-zca-arch.c: New test.
> * gcc.target/riscv/zc-zcb-arch.c: New test.
> * gcc.target/riscv/zc-zcb-m-arch.c: New test.
> * gcc.target/riscv/zc-zcb-zba-arch.c: New test.
> * gcc.target/riscv/zc-zcb-zbb-arch.c: New test.
> * gcc.target/riscv/zc-zcf-arch.c: New test.
> * gcc.target/riscv/zc-zcmp-arch.c: New test.
> * gcc.target/riscv/zc-zcmpe-arch.c: New test.
> * gcc.target/riscv/zc-zcmt-arch.c: New test.
> ---
>  gcc/testsuite/gcc.target/riscv/arch-20.c | 5 +
>  gcc/testsuite/gcc.target/riscv/arch-21.c | 5 +
>  gcc/testsuite/gcc.target/riscv/zc-zca-arch.c | 9 +
>  gcc/testsuite/gcc.target/riscv/zc-zcb-arch.c | 9 +
>  gcc/testsuite/gcc.target/riscv/zc-zcb-m-arch.c   | 9 +
>  gcc/testsuite/gcc.target/riscv/zc-zcb-zba-arch.c | 9 +
>  gcc/testsuite/gcc.target/riscv/zc-zcb-zbb-arch.c | 9 +
>  gcc/testsuite/gcc.target/riscv/zc-zcf-arch.c | 9 +
>  gcc/testsuite/gcc.target/riscv/zc-zcmp-arch.c| 9 +
>  gcc/testsuite/gcc.target/riscv/zc-zcmpe-arch.c   | 9 +
>  gcc/testsuite/gcc.target/riscv/zc-zcmt-arch.c| 9 +
>  11 files changed, 91 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/arch-20.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/arch-21.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zc-zca-arch.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zc-zcb-arch.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zc-zcb-m-arch.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zc-zcb-zba-arch.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zc-zcb-zbb-arch.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zc-zcf-arch.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zc-zcmp-arch.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zc-zcmpe-arch.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zc-zcmt-arch.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/arch-20.c 
> b/gcc/testsuite/gcc.target/riscv/arch-20.c
> new file mode 100644
> index 000..3be4ade65a7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/arch-20.c
> @@ -0,0 +1,5 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64i_zcf -mabi=lp64" } */
> +int foo() {}
> +/* { dg-error "'-march=rv64i_zcf': zcf extension supports in rv32 only" "" { 
> target *-*-* } 0 } */
> +/* { dg-error "'-march=rv64i_zca_zcf': zcf extension supports in rv32 only" 
> "" { target *-*-* } 0 } */
> diff --git a/gcc/testsuite/gcc.target/riscv/arch-21.c 
> b/gcc/testsuite/gcc.target/riscv/arch-21.c
> new file mode 100644
> index 000..cecce06e474
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/arch-21.c
> @@ -0,0 +1,5 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64if_zce -mabi=lp64" } */
> +int foo() {}
> +/* { dg-error "'-march=rv64if_zce': zcf extension supports in rv32 only" "" 
> { target *-*-* } 0 } */
> +/* { dg-error "'-march=rv64if_zca_zcb_zce_zcf_zcmp_zcmt': zcf extension 
> supports in rv32 only" "" { target *-*-* } 0 } */
> diff --git a/gcc/testsuite/gcc.target/riscv/zc-zca-arch.c 
> b/gcc/testsuite/gcc.target/riscv/zc-zca-arch.c
> new file mode 100644
> index 000..bcb8321e709
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zc-zca-arch.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32i_zca -mabi=ilp32" } */
> +
> +int foo()
> +{
> +asm("c.sw x9, 32(x10)");
> +}
> +
> +/* { dg-final { scan-assembler "c.sw x9, 32\\(x10\\)" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/zc-zcb-arch.c 
> b/gcc/testsuite/gcc.target/riscv/zc-zcb-arch.c
> new file mode 100644
> index 000..54d4dff63ea
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zc-zcb-arch.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32i_zcb -mabi=ilp32" } */
> +
> +int foo()
> +{
> +asm("c.lbu x9,1(x8)");
> +}
> +
> +/* { dg-final { scan-assembler "c.lbu x9,1\\(x8\\)" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/zc-zcb-m-arch.c 
> b/gcc/testsuite/gcc.target/riscv/zc-zcb-m-arch.c
> new file mode 100644
> index 000..f23fe304607
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zc-zcb-m-arch.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32im_zcb -mabi=ilp32" } */
> +
> +int foo()
> +{
> +asm("c.lbu x9,1(x8)");
> +}
> +
> +/* { dg-final { scan-assembler "c.lbu x9,1\\(x8\\)" } } 

[PATCH] RISC-V: Legitimise the const0_rtx for RVV indexed load/store

2023-05-04 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch try to legitimise the const0_rtx (aka zero register)
as the base register for the RVV indexed load/store instructions
by allowing the const as the operand of the indexed RTL pattern.
Then the underlying combine pass will try to perform the const
propagation.

For example:
vint32m1_t
test_vluxei32_v_i32m1_shortcut (vuint32m1_t bindex, size_t vl)
{
  return __riscv_vluxei32_v_i32m1 ((int32_t *)0, bindex, vl);
}

Before this patch:
li a5,0 <- can be eliminated.
vl1re32.v  v1,0(a1)
vsetvlizero,a2,e32,m1,ta,ma
vluxei32.v v1,(a5),v1   <- can propagate the const 0 to a5 here.
vs1r.v v1,0(a0)
ret

After this patch:
test_vluxei32_v_i32m1_shortcut:
vl1re32.v   v1,0(a1)
vsetvli zero,a2,e32,m1,ta,ma
vluxei32.v  v1,(0),v1
vs1r.v  v1,0(a0)
ret

As above, this patch allow you to propagaate the const 0 (aka zero
register) to the base register of the RVV indexed load in the combine
pass. This may benefit the underlying RVV auto-vectorization.

Signed-off-by: Pan Li 
Co-authored-by: Ju-Zhe Zhong 

gcc/ChangeLog:

* config/riscv/vector.md: Allow const as the operand of RVV
  indexed load/store.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/zero_base_load_store_optimization.c:
  Adjust indexed load/store check condition.
---
 gcc/config/riscv/vector.md| 32 +--
 .../base/zero_base_load_store_optimization.c  |  3 +-
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 92115e3935f..c3210eacd47 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1511,7 +1511,7 @@ (define_insn "@pred_indexed_load_same_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:V
-   [(match_operand 3 "pmode_register_operand""  r,  r, r,  r")
+   [(match_operand 3 "pmode_reg_or_0_operand"" rJ, rJ,rJ, rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" " vr, vr,vr, vr")] 
ORDER)
  (match_operand:V 2 "vector_merge_operand"   " vu, vu, 0,  0")))]
@@ -1533,7 +1533,7 @@ (define_insn 
"@pred_indexed_load_x2_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT2
-   [(match_operand 3 "pmode_register_operand" "r,  
  r")
+   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "   vr,  
 vr")] ORDER)
  (match_operand:VEEWEXT2 2 "vector_merge_operand" "   vu,  
  0")))]
@@ -1554,7 +1554,7 @@ (define_insn 
"@pred_indexed_load_x4_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT4
-   [(match_operand 3 "pmode_register_operand" "r,  
  r")
+   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand"   "   vr,  
 vr")] ORDER)
  (match_operand:VEEWEXT4 2 "vector_merge_operand" "   vu,  
  0")))]
@@ -1575,7 +1575,7 @@ (define_insn 
"@pred_indexed_load_x8_greater_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT8
-   [(match_operand 3 "pmode_register_operand" "r,  
  r")
+   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand""   vr,  
 vr")] ORDER)
  (match_operand:VEEWEXT8 2 "vector_merge_operand" "   vu,  
  0")))]
@@ -1597,7 +1597,7 @@ (define_insn 
"@pred_indexed_load_x2_smaller_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWTRUNC2
-   [(match_operand 3 "pmode_register_operand"   "  r,  r,  
r,  r,r,r")
+   [(match_operand 3 "pmode_reg_or_0_operand"   " rJ, rJ, 
rJ, rJ,   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "  0,  0,  
0,  0,   vr,   vr")] ORDER)
  (match_operand:VEEWTRUNC2 2 "vector_merge_operand" " vu,  0, 
vu,  0,   vu,0")))]
@@ -1618,7 +1618,7 @@ (define_insn 
"@pred_indexed_load_x4_smaller_eew"
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWTRUNC4
-   [(match_operand 3 "pmode_register_operand" "  r,  r,  
r,  r,r,r")
+   [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, 
rJ, rJ,   rJ,   rJ")
 (mem:BLK (scratch))
 (match_operand: 4 "register_operand" "  0,  0,  
0,  0,   vr,   

Re: [PATCH 1/5] RISC-V: Minimal support for ZC extensions.

2023-05-04 Thread Kito Cheng via Gcc-patches
> @@ -1151,14 +1169,26 @@ riscv_subset_list::parse (const char *arch, 
> location_t loc)
>subset_list->handle_implied_ext (itr);
>  }
>
> +  /* Zce only imply zcf when 'f' extension exist.  */

Grammarly and gmail told me it should be "Zce only implies zcf when
'f' extension exist." :P

> +  if (subset_list->lookup ("zce") != NULL
> +   && subset_list->lookup ("f") != NULL
> +   && subset_list->lookup ("zcf") == NULL)

Wait, add zce if f not implied here, but how about rv64if_zce?
it will become rv64if_zce_zcf and then error later.

> +subset_list->add ("zcf", false);
> +
>subset_list->handle_combine_ext ();
>
> +  if (subset_list->lookup ("zcf") && subset_list->m_xlen == 64)
> +error_at (loc, "%<-march=%s%>: zcf extension supports in rv32 only"
> + , arch);
> +
>if (subset_list->lookup ("zfinx") && subset_list->lookup ("f"))
>  error_at (loc, "%<-march=%s%>: z*inx conflicts with floating-point "
>"extensions", arch);
>
>return subset_list;
>
> +
> +

Plz drop these two extra blank lines.


Re: [PATCH] gcov: add info about "calls" to JSON output format

2023-05-04 Thread Martin Liška
Hello.

After discussion with the consumers of the format:
https://github.com/gcovr/gcovr/issues/282#issuecomment-1534239536

I'm going to push a patch that prints JSON version in gcov -v.

Thanks,
MartinFrom d879d68eb309561d266ddf734ab8c69f4fef3874 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 4 May 2023 10:27:55 +0200
Subject: [PATCH] gcov: add GCOV format version to gcov -v

gcc/ChangeLog:

	* gcov.cc (GCOV_JSON_FORMAT_VERSION): New definition.
	(print_version): Use it.
	(generate_results): Likewise.
---
 gcc/gcov.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/gcov.cc b/gcc/gcov.cc
index d96b4f77e3b..2fad6aa7ede 100644
--- a/gcc/gcov.cc
+++ b/gcc/gcov.cc
@@ -58,6 +58,8 @@ using namespace std;
 #include "gcov-io.h"
 #include "gcov-io.cc"
 
+#define GCOV_JSON_FORMAT_VERSION "2"
+
 /* The gcno file is generated by -ftest-coverage option. The gcda file is
generated by a program compiled with -fprofile-arcs. Their formats
are documented in gcov-io.h.  */
@@ -964,6 +966,7 @@ static void
 print_version (void)
 {
   fnotice (stdout, "gcov %s%s\n", pkgversion_string, version_string);
+  fnotice (stdout, "JSON format version: %s\n", GCOV_JSON_FORMAT_VERSION);
   fprintf (stdout, "Copyright %s 2023 Free Software Foundation, Inc.\n",
 	   _("(C)"));
   fnotice (stdout,
@@ -1546,7 +1549,7 @@ generate_results (const char *file_name)
   gcov_intermediate_filename = get_gcov_intermediate_filename (file_name);
 
   json::object *root = new json::object ();
-  root->set ("format_version", new json::string ("2"));
+  root->set ("format_version", new json::string (GCOV_JSON_FORMAT_VERSION));
   root->set ("gcc_version", new json::string (version_string));
 
   if (bbg_cwd != NULL)
-- 
2.40.1



Re: Question on patch -fprofile-partial-training

2023-05-04 Thread Martin Liška
On 5/3/23 21:10, Qing Zhao via Gcc-patches wrote:
> Hi, Jan,
> 
> You added the following patch into gcc10:
> 
> From 34fbe3f0946f88828765184ed6581bda62cdf49f Mon Sep 17 00:00:00 2001
> From: Jan Hubicka 
> Date: Thu, 5 Dec 2019 19:12:51 +0100
> Subject: [PATCH] cgraphclones.c (localize_profile): New function.
> 
> * cgraphclones.c (localize_profile): New function.
> (cgraph_node::create_clone): Use it for partial profiles.
> * common.opt (fprofile-partial-training): New flag.
> * doc/invoke.texi (-fprofile-partial-training): Document.
> * ipa-cp.c (update_profiling_info): For partial profiles do not
> set function profile to zero.
> * profile.c (compute_branch_probabilities): With partial profile
> watch if edge count is zero and turn all probabilities to guessed.
> (compute_branch_probabilities): For partial profiles do not apply
> profile when entry count is zero.
> * tree-profile.c (tree_profiling): Only do 
> value_profile_transformations
> when profile is read.
> 
> My question is:

Hello.

Why would anybody backport such change to unsupported code-stream of GCC 8?
Generally speaking, I discourage from doing that.

Martin

> 
> Can this patch be back ported to GCC8 easily? I am wondering any significant
> Change between GCC8 and GCC10 that might make the backporting very hard> 
> Thanks a lot for your help.
> 
> Qing



[PATCH v2] RISC-V: Handle multi-lib path correclty for linux

2023-05-04 Thread Kito Cheng via Gcc-patches
RISC-V Linux encodes the ABI into the path, so in theory, we can only use that
to select multi-lib paths, and no way to use different multi-lib paths between
`rv32i/ilp32` and `rv32ima/ilp32`, we'll mapping both to `/lib/ilp32`.

It's hard to do that with GCC's builtin multi-lib selection mechanism; builtin
mechanism did the option string compare and then enumerate all possible reuse
rules during the build time. However, it's impossible to RISC-V; we have a huge
number of combinations of `-march`, so implementing a customized multi-lib
selection becomes the only solution.

Multi-lib configuration is only used for determines which ISA should be used
when compiling the corresponding ABI variant after this patch.

During the multi-lib selection stage, only consider -mabi as the only key to
select the multi-lib path.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_select_multilib_by_abi): 
New.
(riscv_select_multilib): New.
(riscv_compute_multilib): Extract logic to riscv_select_multilib and
also handle select_by_abi.
* config/riscv/elf.h (RISCV_USE_CUSTOMISED_MULTI_LIB): Change it
to select_by_abi_arch_cmodel from 1.
* config/riscv/linux.h (RISCV_USE_CUSTOMISED_MULTI_LIB): Define.
* config/riscv/riscv-opts.h (enum riscv_multilib_select_kind): New.

---
V2 Changes:
- Fix some trivial issue cause I forgot to squash patches...

This patch also plan backport to GCC 13 after landing to trunk.

---
 gcc/common/config/riscv/riscv-common.cc | 128 
 gcc/config/riscv/elf.h  |   2 +-
 gcc/config/riscv/linux.h|   2 +
 gcc/config/riscv/riscv-opts.h   |   9 ++
 4 files changed, 100 insertions(+), 41 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 309a52def75f..57a2a279ef53 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1441,9 +1441,6 @@ riscv_multi_lib_check (int argc ATTRIBUTE_UNUSED,
   return "";
 }
 
-/* We only override this in bare-metal toolchain.  */
-#ifdef RISCV_USE_CUSTOMISED_MULTI_LIB
-
 /* Find last switch with the prefix, options are take last one in general,
return NULL if not found, and return the option value if found, it could
return empty string if the option has no value.  */
@@ -1597,6 +1594,68 @@ riscv_check_conds (
   return match_score + ok_count * 100;
 }
 
+static const char *
+riscv_select_multilib_by_abi (
+  const std::string _current_arch_str,
+  const std::string _current_abi_str,
+  const riscv_subset_list *subset_list, const struct switchstr *switches,
+  int n_switches, const std::vector _infos)
+{
+  for (size_t i = 0; i < multilib_infos.size (); ++i)
+if (riscv_current_abi_str == multilib_infos[i].abi_str)
+  return xstrdup (multilib_infos[i].path.c_str ());
+
+  return NULL;
+}
+
+static const char *
+riscv_select_multilib (
+  const std::string _current_arch_str,
+  const std::string _current_abi_str,
+  const riscv_subset_list *subset_list, const struct switchstr *switches,
+  int n_switches, const std::vector _infos)
+{
+  int match_score = 0;
+  int max_match_score = 0;
+  int best_match_multi_lib = -1;
+  /* Try to decision which set we should used.  */
+  /* We have 3 level decision tree here, ABI, check input arch/ABI must
+ be superset of multi-lib arch, and other rest option checking.  */
+  for (size_t i = 0; i < multilib_infos.size (); ++i)
+{
+  /* Check ABI is same first.  */
+  if (riscv_current_abi_str != multilib_infos[i].abi_str)
+   continue;
+
+  /* Found a potential compatible multi-lib setting!
+Calculate the match score.  */
+  match_score = subset_list->match_score (multilib_infos[i].subset_list);
+
+  /* Checking other cond in the multi-lib setting.  */
+  match_score = riscv_check_conds (switches, n_switches, match_score,
+  multilib_infos[i].conds);
+
+  /* Record highest match score multi-lib setting.  */
+  if (match_score > max_match_score)
+   {
+ best_match_multi_lib = i;
+ max_match_score = match_score;
+   }
+}
+
+  if (best_match_multi_lib == -1)
+{
+  riscv_no_matched_multi_lib = true;
+  return NULL;
+}
+  else
+return xstrdup (multilib_infos[best_match_multi_lib].path.c_str ());
+}
+
+#ifndef RISCV_USE_CUSTOMISED_MULTI_LIB
+#define RISCV_USE_CUSTOMISED_MULTI_LIB select_by_builtin
+#endif
+
 /* Implement TARGET_COMPUTE_MULTILIB.  */
 static const char *
 riscv_compute_multilib (
@@ -1609,6 +1668,11 @@ riscv_compute_multilib (
   const char *multilib_exclusions ATTRIBUTE_UNUSED,
   const char *multilib_reuse ATTRIBUTE_UNUSED)
 {
+  enum riscv_multilib_select_kind select_kind = RISCV_USE_CUSTOMISED_MULTI_LIB;
+
+  if (select_kind == select_by_builtin)
+return multilib_dir;
+
   const char *p;
   const char *this_path;
   size_t 

Re: libgomp C++ testsuite: Use 'lang_include_flags' instead of 'libstdcxx_includes' (was: [PATCH] libgomp: Add openacc_{cuda,cublas,cudart} effective targets and use them in openacc testsuite)

2023-05-04 Thread Jakub Jelinek via Gcc-patches
On Thu, May 04, 2023 at 09:54:03AM +0200, Thomas Schwinge wrote:
>   libgomp/
>   * testsuite/libgomp.c++/c++.exp: Use 'lang_include_flags' instead
>   of 'libstdcxx_includes'.
>   * testsuite/libgomp.oacc-c++/c++.exp: Likewise.

Ok, thanks.

Jakub



libgomp C++ testsuite: Use 'lang_include_flags' instead of 'libstdcxx_includes' (was: [PATCH] libgomp: Add openacc_{cuda,cublas,cudart} effective targets and use them in openacc testsuite)

2023-05-04 Thread Thomas Schwinge
Hi!

On 2021-05-26T14:06:53+0200, Jakub Jelinek via Gcc-patches 
 wrote:
> When gcc is configured for nvptx offloading [...]
> and full CUDA isn't installed, many libgomp.oacc-*/* tests fail,
> some of them because cuda.h header can't be found, others because
> the tests can't be linked against -lcuda, -lcudart or -lcublas.
> I usually only have akmod-nvidia and xorg-x11-drv-nvidia-cuda rpms
> installed, so libcuda.so.1 can be dlopened and the offloading works,
> but linking against those libraries isn't possible nor are the
> headers around [...]

> The following patch adds 3 new effective targets and uses them in tests that
> needs those.

> --- libgomp/testsuite/lib/libgomp.exp.jj  2021-05-25 13:43:02.800121273 
> +0200
> +++ libgomp/testsuite/lib/libgomp.exp 2021-05-26 12:18:56.562556244 +0200

> +# Return 1 if cuda.h and -lcuda are available.
> +
> +proc check_effective_target_openacc_cuda { } {
> +return [check_no_compiler_messages openacc_cuda executable {
> +#include 
> +[...]
> +} } "-lcuda" ]
> +}
> +
> +# Return 1 if cublas_v2.h and -lcublas are available.
> +
> +proc check_effective_target_openacc_cublas { } {
> +return [check_no_compiler_messages openacc_cublas executable {
> +#include 
> +#include 
> +[...]
> +} } "-lcuda -lcublas" ]
> +}
> +
> +# Return 1 if cuda_runtime_api.h and -lcudart are available.
> +
> +proc check_effective_target_openacc_cudart { } {
> +return [check_no_compiler_messages openacc_cudart executable {
> +#include 
> +#include 
> +[...]
> +} } "-lcuda -lcudart" ]
> +}

OK to push the attached
"libgomp C++ testsuite: Use 'lang_include_flags' instead of 
'libstdcxx_includes'"?


This does not adjust the 'libstdcxx_includes' usage in libitm and libvtv
testsuites -- maybe those should also get 'lang_include_flags' ported
(separately)?


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 9bb4ffee6932ee5f917344535026f75c3eadc093 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Thu, 4 May 2023 09:07:35 +0200
Subject: [PATCH] libgomp C++ testsuite: Use 'lang_include_flags' instead of
 'libstdcxx_includes'

With nvptx offloading configured, and supported, and CUDA available:

$ make check-target-libgomp RUNTESTFLAGS="--all c.exp=context-1.c c++.exp=context-1.c"
[...]
Running [...]/libgomp.oacc-c/c.exp ...
PASS: libgomp.oacc-c/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  (test for excess errors)
PASS: libgomp.oacc-c/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
PASS: libgomp.oacc-c/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  (test for excess errors)
PASS: libgomp.oacc-c/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
UNSUPPORTED: libgomp.oacc-c/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O2
Running [...]/libgomp.oacc-c++/c++.exp ...
PASS: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  (test for excess errors)
PASS: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
PASS: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  (test for excess errors)
PASS: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
UNSUPPORTED: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O2
[...]

..., but for 'c++.exp=context-1.c' alone, we currently get all-UNSUPPORTED:

$ make check-target-libgomp RUNTESTFLAGS_="--all c++.exp=context-1.c"
[...]
Running [...]/libgomp.oacc-c++/c++.exp ...
UNSUPPORTED: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0
UNSUPPORTED: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2
UNSUPPORTED: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/context-1.c -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O2
[...]

That is, if 'c.exp' executes first, it does successfully evaluate
'dg-require-effective-target openacc_cublas' -- and does cache this result (so
it isn't reevaluated for 'c++.exp').  

[PATCH] tree-optimization/109724 - new testcase

2023-05-04 Thread Richard Biener via Gcc-patches
The following adds a testcase for PR109724 which was caused by
backporting r13-2375-gbe1b42de9c151d and fixed by r11-199-g2b42509f8b7bdf.

Tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/109724
* g++.dg/torture/pr109724.C: New testcase.
---
 gcc/testsuite/g++.dg/torture/pr109724.C | 32 +
 1 file changed, 32 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr109724.C

diff --git a/gcc/testsuite/g++.dg/torture/pr109724.C 
b/gcc/testsuite/g++.dg/torture/pr109724.C
new file mode 100644
index 000..4a88e1519d0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr109724.C
@@ -0,0 +1,32 @@
+// { dg-do compile }
+
+double (double , double ) { return a ?: b; }
+struct SkDPoint {
+  double fX;
+};
+struct SkDCubic {
+  SkDPoint [](int n) { return fPts[n]; }
+  SkDPoint fPts[4];
+};
+struct SkDCurve {
+  SkDCubic fCubic;
+  SkDPoint [](int n) { return fCubic[n]; }
+};
+struct SkDCurveSweep {
+  SkDCurve fCurve;
+};
+int endsIntersect_rPts;
+double endsIntersect_maxX;
+struct SkOpAngle {
+  void endsIntersect();
+  SkDCurveSweep fPart;
+} endsIntersect_rh;
+void SkOpAngle::endsIntersect() {
+  for (int index = 0;; ++index) {
+SkDCurve  = index ? endsIntersect_rh.fPart.fCurve : fPart.fCurve;
+for (int idx2 = 0; endsIntersect_rPts; ++idx2) {
+  SkDPoint &__trans_tmp_1 = curve[idx2];
+  endsIntersect_maxX = SkTMax(endsIntersect_maxX, __trans_tmp_1.fX);
+}
+  }
+}
-- 
2.35.3


[PATCH] RISC-V: Handle multi-lib path correclty for linux

2023-05-04 Thread Kito Cheng via Gcc-patches
RISC-V Linux encodes the ABI into the path, so in theory, we can only use that
to select multi-lib paths, and no way to use different multi-lib paths between
`rv32i/ilp32` and `rv32ima/ilp32`, we'll mapping both to `/lib/ilp32`.

It's hard to do that with GCC's builtin multi-lib selection mechanism; builtin
mechanism did the option string compare and then enumerate all possible reuse
rules during the build time. However, it's impossible to RISC-V; we have a huge
number of combinations of `-march`, so implementing a customized multi-lib
selection becomes the only solution.

Multi-lib configuration is only used for determines which ISA should be used
when compiling the corresponding ABI variant after this patch.

During the multi-lib selection stage, only consider -mabi as the only key to
select the multi-lib path.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_select_multilib_by_abi): 
New.
(riscv_select_multilib): New.
(riscv_compute_multilib): Extract logic to riscv_select_multilib and
also handle select_by_abi.
* config/riscv/elf.h (RISCV_USE_CUSTOMISED_MULTI_LIB): Change it
to select_by_abi_arch_cmodel from 1.
* config/riscv/linux.h (RISCV_USE_CUSTOMISED_MULTI_LIB): Define.
* config/riscv/riscv-opts.h (enum riscv_multilib_select_kind): New.

---

This patch also plan backport to GCC 13 after landing to trunk.


---
 gcc/common/config/riscv/riscv-common.cc | 128 
 gcc/config/riscv/elf.h  |   2 +-
 gcc/config/riscv/linux.h|   2 +
 gcc/config/riscv/riscv-opts.h   |   9 ++
 4 files changed, 100 insertions(+), 41 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 309a52def75f..d7c4e7d97133 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1441,9 +1441,6 @@ riscv_multi_lib_check (int argc ATTRIBUTE_UNUSED,
   return "";
 }
 
-/* We only override this in bare-metal toolchain.  */
-#ifdef RISCV_USE_CUSTOMISED_MULTI_LIB
-
 /* Find last switch with the prefix, options are take last one in general,
return NULL if not found, and return the option value if found, it could
return empty string if the option has no value.  */
@@ -1597,6 +1594,68 @@ riscv_check_conds (
   return match_score + ok_count * 100;
 }
 
+static const char *
+riscv_select_multilib_by_abi (
+  const std::string _current_arch_str,
+  const std::string _current_abi_str,
+  const riscv_subset_list *subset_list, const struct switchstr *switches,
+  int n_switches, const std::vector _infos)
+{
+  for (size_t i = 0; i < multilib_infos.size (); ++i)
+if (riscv_current_abi_str == multilib_infos[i].abi_str)
+  return xstrdup (multilib_infos[i].path.c_str ());
+
+  return NULL;
+}
+
+static const char *
+riscv_select_multilib (
+  const std::string _current_arch_str,
+  const std::string _current_abi_str,
+  const riscv_subset_list *subset_list, const struct switchstr *switches,
+  int n_switches, const std::vector _infos)
+{
+  int match_score = 0;
+  int max_match_score = 0;
+  int best_match_multi_lib = -1;
+  /* Try to decision which set we should used.  */
+  /* We have 3 level decision tree here, ABI, check input arch/ABI must
+ be superset of multi-lib arch, and other rest option checking.  */
+  for (size_t i = 0; i < multilib_infos.size (); ++i)
+{
+  /* Check ABI is same first.  */
+  if (riscv_current_abi_str != multilib_infos[i].abi_str)
+   continue;
+
+  /* Found a potential compatible multi-lib setting!
+Calculate the match score.  */
+  match_score = subset_list->match_score (multilib_infos[i].subset_list);
+
+  /* Checking other cond in the multi-lib setting.  */
+  match_score = riscv_check_conds (switches, n_switches, match_score,
+  multilib_infos[i].conds);
+
+  /* Record highest match score multi-lib setting.  */
+  if (match_score > max_match_score)
+   {
+ best_match_multi_lib = i;
+ max_match_score = match_score;
+   }
+}
+
+  if (best_match_multi_lib == -1)
+{
+  riscv_no_matched_multi_lib = true;
+  return NULL;
+}
+  else
+return xstrdup (multilib_infos[best_match_multi_lib].path.c_str ());
+}
+
+#ifndef RISCV_USE_CUSTOMISED_MULTI_LIB
+#define RISCV_USE_CUSTOMISED_MULTI_LIB select_by_builtin
+#endif
+
 /* Implement TARGET_COMPUTE_MULTILIB.  */
 static const char *
 riscv_compute_multilib (
@@ -1609,6 +1668,11 @@ riscv_compute_multilib (
   const char *multilib_exclusions ATTRIBUTE_UNUSED,
   const char *multilib_reuse ATTRIBUTE_UNUSED)
 {
+  enum riscv_multilib_select_kind select_kind = RISCV_USE_CUSTOMISED_MULTI_LIB;
+
+  if (riscv_multilib_select_kind == select_by_builtin)
+return multilib_dir;
+
   const char *p;
   const char *this_path;
   size_t this_path_len;
@@ -1672,7 +1736,13 @@ riscv_compute_multilib (
 

Re: Re: GCC 12.2.1 Status Report (2023-05-02), branch frozen for release

2023-05-04 Thread Martin Uecker via Gcc-patches


Can I please get permission for fixing this ICE?

https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616221.html



Martin