Re: [committed] RISC-V: Remove redundant ABI test

2023-09-12 Thread Kito Cheng via Gcc-patches
lgtm

On Wed, Sep 13, 2023 at 11:23 AM Juzhe-Zhong  wrote:
>
> We only support and report warning for RVV types.
>
> We don't report warning for GNU vectors.
> So this testcase checking is incorrect and the FAIL is bogus.
>
> Remove it and commit it.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/vector-abi-9.c: Removed.
>
> ---
>  .../gcc.target/riscv/rvv/base/vector-abi-9.c | 16 
>  1 file changed, 16 deletions(-)
>  delete mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vector-abi-9.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vector-abi-9.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/vector-abi-9.c
> deleted file mode 100644
> index b5f130f0caf..000
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/vector-abi-9.c
> +++ /dev/null
> @@ -1,16 +0,0 @@
> -/* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d 
> --param=riscv-autovec-preference=fixed-vlmax" } */
> -
> -#include "riscv_vector.h"
> -
> -typedef int v4si __attribute__ ((vector_size (16)));
> -
> -v4si
> -fun (v4si a) {  return a; }  /* { dg-warning "the vector type" } */
> -
> -void
> -bar ()
> -{
> -  v4si a;
> -  fun (a);
> -}
> --
> 2.36.3
>


[PATCH v4 12/22] LoongArch: Add tests for ASX builtin functions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lasx/lasx-builtin.c: New test.
---
 .../loongarch/vector/lasx/lasx-builtin.c  | 1509 +
 1 file changed, 1509 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c
new file mode 100644
index 000..ecb8d639bdd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c
@@ -0,0 +1,1509 @@
+/* Test builtins for LOONGARCH LASX ASE instructions */
+/* { dg-do compile } */
+/* { dg-options "-mlasx" } */
+/* { dg-final { scan-assembler-times "lasx_xvsll_b:.*xvsll\\.b.*lasx_xvsll_b" 
1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsll_h:.*xvsll\\.h.*lasx_xvsll_h" 
1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsll_w:.*xvsll\\.w.*lasx_xvsll_w" 
1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsll_d:.*xvsll\\.d.*lasx_xvsll_d" 
1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvslli_b:.*xvslli\\.b.*lasx_xvslli_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvslli_h:.*xvslli\\.h.*lasx_xvslli_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvslli_w:.*xvslli\\.w.*lasx_xvslli_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvslli_d:.*xvslli\\.d.*lasx_xvslli_d" 1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsra_b:.*xvsra\\.b.*lasx_xvsra_b" 
1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsra_h:.*xvsra\\.h.*lasx_xvsra_h" 
1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsra_w:.*xvsra\\.w.*lasx_xvsra_w" 
1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsra_d:.*xvsra\\.d.*lasx_xvsra_d" 
1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrai_b:.*xvsrai\\.b.*lasx_xvsrai_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrai_h:.*xvsrai\\.h.*lasx_xvsrai_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrai_w:.*xvsrai\\.w.*lasx_xvsrai_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrai_d:.*xvsrai\\.d.*lasx_xvsrai_d" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrar_b:.*xvsrar\\.b.*lasx_xvsrar_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrar_h:.*xvsrar\\.h.*lasx_xvsrar_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrar_w:.*xvsrar\\.w.*lasx_xvsrar_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrar_d:.*xvsrar\\.d.*lasx_xvsrar_d" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrari_b:.*xvsrari\\.b.*lasx_xvsrari_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrari_h:.*xvsrari\\.h.*lasx_xvsrari_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrari_w:.*xvsrari\\.w.*lasx_xvsrari_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrari_d:.*xvsrari\\.d.*lasx_xvsrari_d" 1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsrl_b:.*xvsrl\\.b.*lasx_xvsrl_b" 
1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsrl_h:.*xvsrl\\.h.*lasx_xvsrl_h" 
1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsrl_w:.*xvsrl\\.w.*lasx_xvsrl_w" 
1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvsrl_d:.*xvsrl\\.d.*lasx_xvsrl_d" 
1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrli_b:.*xvsrli\\.b.*lasx_xvsrli_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrli_h:.*xvsrli\\.h.*lasx_xvsrli_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrli_w:.*xvsrli\\.w.*lasx_xvsrli_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrli_d:.*xvsrli\\.d.*lasx_xvsrli_d" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrlr_b:.*xvsrlr\\.b.*lasx_xvsrlr_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrlr_h:.*xvsrlr\\.h.*lasx_xvsrlr_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrlr_w:.*xvsrlr\\.w.*lasx_xvsrlr_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrlr_d:.*xvsrlr\\.d.*lasx_xvsrlr_d" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrlri_b:.*xvsrlri\\.b.*lasx_xvsrlri_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrlri_h:.*xvsrlri\\.h.*lasx_xvsrlri_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrlri_w:.*xvsrlri\\.w.*lasx_xvsrlri_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvsrlri_d:.*xvsrlri\\.d.*lasx_xvsrlri_d" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvbitclr_b:.*xvbitclr\\.b.*lasx_xvbitclr_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvbitclr_h:.*xvbitclr\\.h.*lasx_xvbitclr_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvbitclr_w:.*xvbitclr\\.w.*lasx_xvbitclr_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvbitclr_d:.*xvbitclr\\.d.*lasx_xvbitclr_d" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvbitclri_b:.*xvbitclri\\.b.*lasx_xvbitclri_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lasx_xvbitclri_h:.*xvbitclri\\.h.*lasx_xvbitclri_h" 1 } } */
+/* { dg-final { scan-assembler-times 

[PATCH v4 13/22] LoongArch: Add tests for ASX xvldrepl/xvstelm instruction generation.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvstelm.c: New test.
---
 .../loongarch/vector/lasx/lasx-xvldrepl.c| 16 
 .../loongarch/vector/lasx/lasx-xvstelm.c | 14 ++
 2 files changed, 30 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
new file mode 100644
index 000..10556795119
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx" } */
+/* { dg-final { scan-assembler-times "xvldrepl.w" 2} } */
+
+#define N 258
+
+float a[N], b[N], c[N];
+
+void
+test ()
+{
+  for (int i = 0; i < 256; i++)
+{
+  a[i] = c[0] * b[i] + c[1];
+}
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
new file mode 100644
index 000..1a7b0e86f8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx" } */
+/* { dg-final { scan-assembler-times "xvstelm.w" 8} } */
+
+#define LEN 256
+
+float a[LEN], b[LEN], c[LEN];
+
+void
+test ()
+{
+  for (int i = 0; i < LEN; i += 2)
+a[i] = b[i] + c[i];
+}
-- 
2.20.1



[PATCH v4 07/22] LoongArch: Add tests for ASX vector xvand/xvandi/xvandn/xvor/xvori/ xvnor/xvnori/xvxor/xvxori instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lasx/lasx-xvand.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvandi.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvandn.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvnor.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvnori.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvor.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvori.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvorn.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvxor.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvxori.c: New test.
---
 .../loongarch/vector/lasx/lasx-xvand.c| 155 +++
 .../loongarch/vector/lasx/lasx-xvandi.c   | 196 ++
 .../loongarch/vector/lasx/lasx-xvandn.c   | 125 +
 .../loongarch/vector/lasx/lasx-xvnor.c| 170 
 .../loongarch/vector/lasx/lasx-xvnori.c   | 152 +++
 .../loongarch/vector/lasx/lasx-xvor.c | 215 +++
 .../loongarch/vector/lasx/lasx-xvori.c| 141 ++
 .../loongarch/vector/lasx/lasx-xvorn.c| 245 ++
 .../loongarch/vector/lasx/lasx-xvxor.c| 185 +
 .../loongarch/vector/lasx/lasx-xvxori.c   | 163 
 10 files changed, 1747 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandi.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandn.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnor.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnori.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvor.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvori.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvorn.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxor.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxori.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c
new file mode 100644
index 000..e485786dd3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c
@@ -0,0 +1,155 @@
+/* { dg-do run } */
+/* { dg-options "-mlasx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result;
+  __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result;
+  __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m256i_op0[3]) = 0x;
+  *((unsigned long *)&__m256i_op0[2]) = 0x;
+  *((unsigned long *)&__m256i_op0[1]) = 0x;
+  *((unsigned long *)&__m256i_op0[0]) = 0x;
+  *((unsigned long *)&__m256i_op1[3]) = 0x0010001000100010;
+  *((unsigned long *)&__m256i_op1[2]) = 0x0010001000100010;
+  *((unsigned long *)&__m256i_op1[1]) = 0x0010001000100010;
+  *((unsigned long *)&__m256i_op1[0]) = 0x0010001000100010;
+  *((unsigned long *)&__m256i_result[3]) = 0x;
+  *((unsigned long *)&__m256i_result[2]) = 0x;
+  *((unsigned long *)&__m256i_result[1]) = 0x;
+  *((unsigned long *)&__m256i_result[0]) = 0x;
+  __m256i_out = __lasx_xvand_v (__m256i_op0, __m256i_op1);
+  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+
+  *((unsigned long *)&__m256i_op0[3]) = 0x;
+  *((unsigned long *)&__m256i_op0[2]) = 0x;
+  *((unsigned long *)&__m256i_op0[1]) = 0x;
+  *((unsigned long *)&__m256i_op0[0]) = 0x;
+  *((unsigned long *)&__m256i_op1[3]) = 0x;
+  *((unsigned long *)&__m256i_op1[2]) = 0x;
+  *((unsigned long *)&__m256i_op1[1]) = 0x;
+  *((unsigned long *)&__m256i_op1[0]) = 0x;
+  *((unsigned long *)&__m256i_result[3]) = 0x;
+  *((unsigned long *)&__m256i_result[2]) = 0x;
+  *((unsigned long *)&__m256i_result[1]) = 0x;
+  *((unsigned long *)&__m256i_result[0]) = 0x;
+  __m256i_out = __lasx_xvand_v (__m256i_op0, __m256i_op1);
+  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+
+  *((unsigned long *)&__m256i_op0[3]) = 0x;
+  *((unsigned long 

[PATCH v4 00/22] Added support for ASX vector instructions.

2023-09-12 Thread Xiaolong Chen
  In order to better test the function of the vector instruction, the 256
bit test cases are further split according to the function of the instruction.


Xiaolong Chen (22):
  LoongArch: Add tests for ASX vector xvadd/xvadda/xvaddi/xvaddwev/
xvaddwodxvsadd instructions.
  LoongArch: Add tests for ASX vector xvhadd/xvhaddw/xvmaddwev/xvmaddwod
instructions.
  LoongArch: Add tests for ASX vector subtraction instructions.
  LoongArch: Add tests for ASX vector xvmul/xvmod/xvdiv instructions.
  LoongArch: Add tests for ASX vector xvmax/xvmaxi/xvmin/xvmini
instructions.
  LoongArch: Add tests for ASX vector
xvldi/xvmskgez/xvmskltz/xvmsknz/xvmuh /xvsigncov instructions.
  LoongArch: Add tests for ASX vector xvand/xvandi/xvandn/xvor/xvori/
xvnor/xvnori/xvxor/xvxori instructions.
  LoongArch: Add tests for ASX vector xvsll/xvsrl instructions.
  LoongArch: Add tests for ASX vector xvextl/xvsra/xvsran/xvsrarn
instructions.
  LoongArch: Add tests for ASX vector
xvssran/xvssrani/xvssrarn/xvssrarni/xvssrln/
xvssrlni/xvssrlrn/xvssrlrni instructions.
  LoongArch: Add tests for ASX vector
xvbitclr/xvbitclri/xvbitrev/xvbitrevi/
xvbitsel/xvbitseli/xvbitset/xvbitseti/xvclo/xvclz/xvpcnt
instructions.
  LoongArch: Add tests for ASX builtin functions.
  LoongArch: Add tests for ASX xvldrepl/xvstelm instruction generation.
  LoongArch: Add tests for ASX vector floating-point operation
instruction.
  LoongArch: Add tests for ASX vector floating-point conversion
instruction.
  LoongArch: Add tests for ASX vector comparison and selection
instruction.
  LoongArch: Add tests for ASX vector xvfnmadd/xvfrstp/xvfstpi/xvhsubw/
xvmsub/xvrotr/xvrotri/xvld/xvst instructions.
  LoongArch: Add tests for ASX vector
xvabsd/xvavg/xvavgr/xvbsll/xvbsrl/xvneg/ xvsat instructions.
  LoongArch: Add tests for ASX vector
xvfcmp{caf/ceq/cle/clt/cne/cor/cun} instructions.
  LoongArch: Add tests for ASX vector
xvfcmp{saf/seq/sle/slt/sne/sor/sun} instructions.
  LoongArch: Add tests for ASX vector
xvext2xv/xvexth/xvextins/xvilvh/xvilvl/xvinsgr2vr/
xvinsve0/xvprem/xvpremi instructions.
  LoongArch: Add tests for ASX vector
xvpackev/xvpackod/xvpickev/xvpickod/
xvpickve2gr/xvreplgr2vr/xvreplve/xvreplve0/xvreplvei/xvshuf4i/xvshuf
instructions.

 .../loongarch/vector/lasx/lasx-builtin.c  | 1509 
 .../loongarch/vector/lasx/lasx-xvabsd-1.c |  485 +
 .../loongarch/vector/lasx/lasx-xvabsd-2.c |  650 +++
 .../loongarch/vector/lasx/lasx-xvadd.c|  725 
 .../loongarch/vector/lasx/lasx-xvadda.c   |  785 
 .../loongarch/vector/lasx/lasx-xvaddi.c   |  427 +
 .../loongarch/vector/lasx/lasx-xvaddwev-1.c   |  740 
 .../loongarch/vector/lasx/lasx-xvaddwev-2.c   |  485 +
 .../loongarch/vector/lasx/lasx-xvaddwev-3.c   |  515 ++
 .../loongarch/vector/lasx/lasx-xvaddwod-1.c   |  530 ++
 .../loongarch/vector/lasx/lasx-xvaddwod-2.c   |  560 ++
 .../loongarch/vector/lasx/lasx-xvaddwod-3.c   |  485 +
 .../loongarch/vector/lasx/lasx-xvand.c|  155 ++
 .../loongarch/vector/lasx/lasx-xvandi.c   |  196 ++
 .../loongarch/vector/lasx/lasx-xvandn.c   |  125 ++
 .../loongarch/vector/lasx/lasx-xvavg-1.c  |  680 +++
 .../loongarch/vector/lasx/lasx-xvavg-2.c  |  560 ++
 .../loongarch/vector/lasx/lasx-xvavgr-1.c |  770 
 .../loongarch/vector/lasx/lasx-xvavgr-2.c |  650 +++
 .../loongarch/vector/lasx/lasx-xvbitclr.c |  635 +++
 .../loongarch/vector/lasx/lasx-xvbitclri.c|  515 ++
 .../loongarch/vector/lasx/lasx-xvbitrev.c |  650 +++
 .../loongarch/vector/lasx/lasx-xvbitrevi.c|  317 
 .../loongarch/vector/lasx/lasx-xvbitsel.c |  134 ++
 .../loongarch/vector/lasx/lasx-xvbitseli.c|  185 ++
 .../loongarch/vector/lasx/lasx-xvbitset.c |  620 +++
 .../loongarch/vector/lasx/lasx-xvbitseti.c|  405 +
 .../loongarch/vector/lasx/lasx-xvbsll_v.c |  130 ++
 .../loongarch/vector/lasx/lasx-xvbsrl_v.c |   64 +
 .../loongarch/vector/lasx/lasx-xvclo.c|  449 +
 .../loongarch/vector/lasx/lasx-xvclz.c|  504 ++
 .../loongarch/vector/lasx/lasx-xvdiv-1.c  |  485 +
 .../loongarch/vector/lasx/lasx-xvdiv-2.c  |  500 ++
 .../loongarch/vector/lasx/lasx-xvext2xv-1.c   |  515 ++
 .../loongarch/vector/lasx/lasx-xvext2xv-2.c   |  669 +++
 .../loongarch/vector/lasx/lasx-xvexth-1.c |  350 
 .../loongarch/vector/lasx/lasx-xvexth-2.c |  592 ++
 .../loongarch/vector/lasx/lasx-xvextl-1.c |   86 +
 .../loongarch/vector/lasx/lasx-xvextl-2.c |  163 ++
 .../loongarch/vector/lasx/lasx-xvextrins.c|  515 ++
 .../loongarch/vector/lasx/lasx-xvfadd_d.c |  545 ++
 .../loongarch/vector/lasx/lasx-xvfadd_s.c |  911 ++
 .../loongarch/vector/lasx/lasx-xvfclass_d.c   |  152 ++
 .../loongarch/vector/lasx/lasx-xvfclass_s.c   |   95 +
 

[PATCH v4 23/23] LoongArch: Add tests for SX vector vfmadd/vfnmadd/vld/vst instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vld.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vst.c: New test.
---
 .../loongarch/vector/lsx/lsx-vfmadd_d.c   | 251 
 .../loongarch/vector/lsx/lsx-vfmadd_s.c   | 381 ++
 .../loongarch/vector/lsx/lsx-vfnmadd_d.c  | 196 +
 .../loongarch/vector/lsx/lsx-vfnmadd_s.c  | 381 ++
 .../gcc.target/loongarch/vector/lsx/lsx-vld.c |  62 +++
 .../gcc.target/loongarch/vector/lsx/lsx-vst.c |  70 
 6 files changed, 1341 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vld.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vst.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c
new file mode 100644
index 000..c5de1ac7ae9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c
@@ -0,0 +1,251 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128d_op0[1]) = 0x;
+  *((unsigned long *)&__m128d_op0[0]) = 0x;
+  *((unsigned long *)&__m128d_op1[1]) = 0x8a228acac14e440a;
+  *((unsigned long *)&__m128d_op1[0]) = 0xc77c47cdc0f16549;
+  *((unsigned long *)&__m128d_op2[1]) = 0xd24271c4;
+  *((unsigned long *)&__m128d_op2[0]) = 0x2711bad1e8e309ed;
+  *((unsigned long *)&__m128d_result[1]) = 0xd24271c4;
+  *((unsigned long *)&__m128d_result[0]) = 0x2711bad1e8e309ed;
+  __m128d_out = __lsx_vfmadd_d (__m128d_op0, __m128d_op1, __m128d_op2);
+  ASSERTEQ_64 (__LINE__, __m128d_result, __m128d_out);
+
+  *((unsigned long *)&__m128d_op0[1]) = 0x;
+  *((unsigned long *)&__m128d_op0[0]) = 0x;
+  *((unsigned long *)&__m128d_op1[1]) = 0x;
+  *((unsigned long *)&__m128d_op1[0]) = 0x;
+  *((unsigned long *)&__m128d_op2[1]) = 0x;
+  *((unsigned long *)&__m128d_op2[0]) = 0x;
+  *((unsigned long *)&__m128d_result[1]) = 0x;
+  *((unsigned long *)&__m128d_result[0]) = 0x;
+  __m128d_out = __lsx_vfmadd_d (__m128d_op0, __m128d_op1, __m128d_op2);
+  ASSERTEQ_64 (__LINE__, __m128d_result, __m128d_out);
+
+  *((unsigned long *)&__m128d_op0[1]) = 0x04040383;
+  *((unsigned long *)&__m128d_op0[0]) = 0xe0001fff;
+  *((unsigned long *)&__m128d_op1[1]) = 0x04040383;
+  *((unsigned long *)&__m128d_op1[0]) = 0xe0001fff;
+  *((unsigned long *)&__m128d_op2[1]) = 0x0101;
+  *((unsigned long *)&__m128d_op2[0]) = 0x00010001;
+  *((unsigned long *)&__m128d_result[1]) = 0x0101;
+  *((unsigned long *)&__m128d_result[0]) = 0xe0001fff;
+  __m128d_out = __lsx_vfmadd_d (__m128d_op0, __m128d_op1, __m128d_op2);
+  ASSERTEQ_64 (__LINE__, __m128d_result, __m128d_out);
+
+  *((unsigned long *)&__m128d_op0[1]) = 0x;
+  *((unsigned long *)&__m128d_op0[0]) = 0x;
+  *((unsigned long *)&__m128d_op1[1]) = 0x003f80b0;
+  *((unsigned long *)&__m128d_op1[0]) = 0xff80;
+  *((unsigned long *)&__m128d_op2[1]) = 0x;
+  *((unsigned long *)&__m128d_op2[0]) = 0x;
+  *((unsigned long *)&__m128d_result[1]) = 0x;
+  *((unsigned long *)&__m128d_result[0]) = 0x;
+  __m128d_out = __lsx_vfmadd_d (__m128d_op0, __m128d_op1, __m128d_op2);
+  ASSERTEQ_64 (__LINE__, __m128d_result, __m128d_out);
+
+  *((unsigned long *)&__m128d_op0[1]) = 0x;
+  *((unsigned long *)&__m128d_op0[0]) = 0x00802000;
+  *((unsigned long *)&__m128d_op1[1]) = 0x00401000;
+  

[PATCH v4 22/23] LoongArch: Add tests for SX vector vand/vandi/vandn/vor/vori/vnor/ vnori/vxor/vxori instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vand.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vandi.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vandn.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vnor.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vnori.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vor.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vori.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vorn.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vxor.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vxori.c: New test.
---
 .../loongarch/vector/lsx/lsx-vand.c   | 159 
 .../loongarch/vector/lsx/lsx-vandi.c  |  67 +++
 .../loongarch/vector/lsx/lsx-vandn.c  | 129 +
 .../loongarch/vector/lsx/lsx-vnor.c   | 109 +++
 .../loongarch/vector/lsx/lsx-vnori.c  |  91 ++
 .../gcc.target/loongarch/vector/lsx/lsx-vor.c | 169 ++
 .../loongarch/vector/lsx/lsx-vori.c   | 123 +
 .../loongarch/vector/lsx/lsx-vorn.c   | 109 +++
 .../loongarch/vector/lsx/lsx-vxor.c   |  79 
 .../loongarch/vector/lsx/lsx-vxori.c  |  67 +++
 10 files changed, 1102 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandi.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandn.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnor.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnori.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vor.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vori.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vorn.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxor.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxori.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c
new file mode 100644
index 000..1597749b546
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c
@@ -0,0 +1,159 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i=1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long*)& __m128i_op0[1]) = 0x;
+  *((unsigned long*)& __m128i_op0[0]) = 0x;
+  *((unsigned long*)& __m128i_op1[1]) = 0x;
+  *((unsigned long*)& __m128i_op1[0]) = 0x;
+  *((unsigned long*)& __m128i_result[1]) = 0x;
+  *((unsigned long*)& __m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vand_v(__m128i_op0,__m128i_op1);
+  ASSERTEQ_64(__LINE__, __m128i_result, __m128i_out);
+
+
+  *((unsigned long*)& __m128i_op0[1]) = 0x;
+  *((unsigned long*)& __m128i_op0[0]) = 0x;
+  *((unsigned long*)& __m128i_op1[1]) = 0x03574e3a62407e03;
+  *((unsigned long*)& __m128i_op1[0]) = 0x0101;
+  *((unsigned long*)& __m128i_result[1]) = 0x03574e3a62407e03;
+  *((unsigned long*)& __m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vand_v(__m128i_op0,__m128i_op1);
+  ASSERTEQ_64(__LINE__, __m128i_result, __m128i_out);
+
+
+  *((unsigned long*)& __m128i_op0[1]) = 0x;
+  *((unsigned long*)& __m128i_op0[0]) = 0x;
+  *((unsigned long*)& __m128i_op1[1]) = 0x001f001f;
+  *((unsigned long*)& __m128i_op1[0]) = 0x001f001f;
+  *((unsigned long*)& __m128i_result[1]) = 0x001f001f;
+  *((unsigned long*)& __m128i_result[0]) = 0x001f001f;
+  __m128i_out = __lsx_vand_v(__m128i_op0,__m128i_op1);
+  ASSERTEQ_64(__LINE__, __m128i_result, __m128i_out);
+
+
+  *((unsigned long*)& __m128i_op0[1]) = 0x003dffc2;
+  *((unsigned long*)& __m128i_op0[0]) = 0x003dffc2;
+  *((unsigned long*)& __m128i_op1[1]) = 0x0008;
+  *((unsigned long*)& __m128i_op1[0]) = 0x;
+  *((unsigned long*)& __m128i_result[1]) = 0x;
+  *((unsigned long*)& __m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vand_v(__m128i_op0,__m128i_op1);
+  

[PATCH v4 11/23] LoongArch: Add tests for SX vector vexth/vextl/vldi/vneg/vsat instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vexth-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vexth-2.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vextl-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vextl-2.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vldi.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vneg.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vsat-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vsat-2.c: New test.
---
 .../loongarch/vector/lsx/lsx-vexth-1.c| 342 ++
 .../loongarch/vector/lsx/lsx-vexth-2.c| 182 ++
 .../loongarch/vector/lsx/lsx-vextl-1.c|  83 +
 .../loongarch/vector/lsx/lsx-vextl-2.c|  83 +
 .../loongarch/vector/lsx/lsx-vldi.c   |  61 
 .../loongarch/vector/lsx/lsx-vneg.c   | 321 
 .../loongarch/vector/lsx/lsx-vsat-1.c | 231 
 .../loongarch/vector/lsx/lsx-vsat-2.c | 272 ++
 8 files changed, 1575 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vldi.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vneg.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-2.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c
new file mode 100644
index 000..f6390800d82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c
@@ -0,0 +1,342 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vexth_h_b (__m128i_op0);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x7fff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x007f;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vexth_h_b (__m128i_op0);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vexth_h_b (__m128i_op0);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vexth_h_b (__m128i_op0);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0xf909;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vexth_h_b (__m128i_op0);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vexth_h_b (__m128i_op0);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0xff01ff01;
+  *((unsigned long 

[PATCH v4 18/23] LoongArch: Add tests for SX vector floating point arithmetic instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c: New test.
---
 .../loongarch/vector/lsx/lsx-vfadd_d.c| 407 +++
 .../loongarch/vector/lsx/lsx-vfadd_s.c| 470 ++
 .../loongarch/vector/lsx/lsx-vfclass_d.c  |  83 
 .../loongarch/vector/lsx/lsx-vfclass_s.c  |  74 +++
 .../loongarch/vector/lsx/lsx-vflogb_d.c   |  76 +++
 .../loongarch/vector/lsx/lsx-vflogb_s.c   | 185 +++
 .../loongarch/vector/lsx/lsx-vfmax_d.c| 200 
 .../loongarch/vector/lsx/lsx-vfmax_s.c| 335 +
 .../loongarch/vector/lsx/lsx-vfmaxa_d.c   | 155 ++
 .../loongarch/vector/lsx/lsx-vfmaxa_s.c   | 230 +
 .../loongarch/vector/lsx/lsx-vfsqrt_d.c   | 216 
 .../loongarch/vector/lsx/lsx-vfsqrt_s.c   | 372 ++
 12 files changed, 2803 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c
new file mode 100644
index 000..7ffbd385ee0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c
@@ -0,0 +1,407 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128d_op0[1]) = 0x;
+  *((unsigned long *)&__m128d_op0[0]) = 0x;
+  *((unsigned long *)&__m128d_op1[1]) = 0x;
+  *((unsigned long *)&__m128d_op1[0]) = 0x;
+  *((unsigned long *)&__m128d_result[1]) = 0x;
+  *((unsigned long *)&__m128d_result[0]) = 0x;
+  __m128d_out = __lsx_vfadd_d (__m128d_op0, __m128d_op1);
+  ASSERTEQ_64 (__LINE__, __m128d_result, __m128d_out);
+
+  *((unsigned long *)&__m128d_op0[1]) = 0x;
+  *((unsigned long *)&__m128d_op0[0]) = 0xfea8ff44;
+  *((unsigned long *)&__m128d_op1[1]) = 0x2020202020202020;
+  *((unsigned long *)&__m128d_op1[0]) = 0x2020202020202020;
+  *((unsigned long *)&__m128d_result[1]) = 0x2020202020202020;
+  *((unsigned long *)&__m128d_result[0]) = 0x2020202020202020;
+  __m128d_out = __lsx_vfadd_d (__m128d_op0, __m128d_op1);
+  ASSERTEQ_64 (__LINE__, __m128d_result, __m128d_out);
+
+  *((unsigned long *)&__m128d_op0[1]) = 0x1000100010001000;
+  *((unsigned long *)&__m128d_op0[0]) = 0x1000100010001000;
+  *((unsigned long *)&__m128d_op1[1]) = 0x;
+  *((unsigned long *)&__m128d_op1[0]) = 0x;
+  *((unsigned long *)&__m128d_result[1]) = 0x1000100010001000;
+  *((unsigned long *)&__m128d_result[0]) = 0x1000100010001000;
+  __m128d_out = __lsx_vfadd_d (__m128d_op0, __m128d_op1);
+  

[PATCH v4 10/23] LoongArch: Add tests for SX vector vmax/vmaxi/vmin/vmini instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vmax-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmax-2.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmin-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmin-2.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmini-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmini-2.c: New test.
---
 .../loongarch/vector/lsx/lsx-vmax-1.c | 317 +
 .../loongarch/vector/lsx/lsx-vmax-2.c | 362 +++
 .../loongarch/vector/lsx/lsx-vmaxi-1.c| 279 +++
 .../loongarch/vector/lsx/lsx-vmaxi-2.c| 223 +
 .../loongarch/vector/lsx/lsx-vmin-1.c | 434 ++
 .../loongarch/vector/lsx/lsx-vmin-2.c | 344 ++
 .../loongarch/vector/lsx/lsx-vmini-1.c| 314 +
 .../loongarch/vector/lsx/lsx-vmini-2.c| 216 +
 8 files changed, 2489 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-2.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c
new file mode 100644
index 000..b0e22f955b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c
@@ -0,0 +1,317 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vmax_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vmax_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vmax_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x7fff7fff7fff7fff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0001003f;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x7f007f007f007f00;
+  *((unsigned long *)&__m128i_result[0]) = 0x0001003f;
+  __m128i_out = __lsx_vmax_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0001;
+  *((unsigned long *)&__m128i_op1[1]) = 0xf0001fff;
+  *((unsigned long *)&__m128i_op1[0]) = 0xf0001fff;
+  *((unsigned long *)&__m128i_result[1]) = 0x1f00;
+  

[PATCH v4 12/23] LoongArch: Add tests for SX vector vabsd/vmskgez/vmskltz/vmsknz/vsigncov instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmskgez.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmskltz.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmsknz.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vsigncov.c: New test.
---
 .../loongarch/vector/lsx/lsx-vabsd-1.c| 272 +++
 .../loongarch/vector/lsx/lsx-vabsd-2.c| 398 
 .../loongarch/vector/lsx/lsx-vmskgez.c| 119 +
 .../loongarch/vector/lsx/lsx-vmskltz.c| 321 +
 .../loongarch/vector/lsx/lsx-vmsknz.c | 104 +
 .../loongarch/vector/lsx/lsx-vsigncov.c   | 425 ++
 6 files changed, 1639 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskgez.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskltz.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsknz.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsigncov.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c
new file mode 100644
index 000..e336581f3b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c
@@ -0,0 +1,272 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vabsd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0xfda9b23a624082fd;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x03574e3a62407e03;
+  *((unsigned long *)&__m128i_result[0]) = 0x0101;
+  __m128i_out = __lsx_vabsd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x80008000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x7fff7fff;
+  *((unsigned long *)&__m128i_op1[1]) = 0xfffd0007;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0014fff5;
+  *((unsigned long *)&__m128i_result[1]) = 0x7f0300078000;
+  *((unsigned long *)&__m128i_result[0]) = 0x7f15000a7f010101;
+  __m128i_out = __lsx_vabsd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vabsd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x7fff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0006000e;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00127fea;
+  *((unsigned long *)&__m128i_result[1]) = 0x7f0101070101010f;
+  *((unsigned long *)&__m128i_result[0]) = 0x00127f010116;
+  __m128i_out = __lsx_vabsd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x000b;
+  *((unsigned long *)&__m128i_op0[0]) = 0x000b;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;

[PATCH v4 13/23] LoongArch: Add tests for SX vector vdiv/vmod instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmod-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vmod-2.c: New test.
---
 .../loongarch/vector/lsx/lsx-vdiv-1.c | 299 ++
 .../loongarch/vector/lsx/lsx-vdiv-2.c | 254 +++
 .../loongarch/vector/lsx/lsx-vmod-1.c | 254 +++
 .../loongarch/vector/lsx/lsx-vmod-2.c | 254 +++
 4 files changed, 1061 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-2.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c
new file mode 100644
index 000..cb4be04757c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c
@@ -0,0 +1,299 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vdiv_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x3ff0;
+  *((unsigned long *)&__m128i_op0[0]) = 0x40f3fa00;
+  *((unsigned long *)&__m128i_op1[1]) = 0xb4ff;
+  *((unsigned long *)&__m128i_op1[0]) = 0xb4ff;
+  *((unsigned long *)&__m128i_result[1]) = 0xc110;
+  *((unsigned long *)&__m128i_result[0]) = 0xc00d0600;
+  __m128i_out = __lsx_vdiv_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x0101010101010101;
+  __m128i_out = __lsx_vdiv_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0002;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0101000101010001;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x00fe;
+  *((unsigned long *)&__m128i_result[0]) = 0x00ff00ff;
+  __m128i_out = __lsx_vdiv_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x01010101;
+  *((unsigned long *)&__m128i_result[0]) = 0x01010101;
+  __m128i_out = __lsx_vdiv_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0xd3259a2984048c23;
+  *((unsigned long *)&__m128i_op1[0]) = 0xf9796558e39953fd;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vdiv_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x97279727;
+  *((unsigned long *)&__m128i_op0[0]) = 0xfe79ba5f;
+  *((unsigned long *)&__m128i_op1[1]) = 

[PATCH v4 09/23] LoongArch: Add tests for SX vector vavg/vavgr instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vavg-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vavg-2.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c: New test.
---
 .../loongarch/vector/lsx/lsx-vavg-1.c | 398 ++
 .../loongarch/vector/lsx/lsx-vavg-2.c | 308 ++
 .../loongarch/vector/lsx/lsx-vavgr-1.c| 299 +
 .../loongarch/vector/lsx/lsx-vavgr-2.c| 317 ++
 4 files changed, 1322 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c
new file mode 100644
index 000..2177ca3f6f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c
@@ -0,0 +1,398 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vavg_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xfff8fff8fff8fff8;
+  *((unsigned long *)&__m128i_op0[0]) = 0xfff8fff8fff8fff8;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0xfffcfffcfffcfffc;
+  *((unsigned long *)&__m128i_result[0]) = 0xfffcfffcfffcfffc;
+  __m128i_out = __lsx_vavg_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vavg_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vavg_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x4050;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x2028;
+  __m128i_out = __lsx_vavg_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vavg_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 

[PATCH v4 07/23] LoongArch: Add tests for SX vector addition vsadd instructions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c: New test.
---
 .../loongarch/vector/lsx/lsx-vsadd-1.c| 335 +
 .../loongarch/vector/lsx/lsx-vsadd-2.c| 345 ++
 2 files changed, 680 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
new file mode 100644
index 000..1bc27c983bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
@@ -0,0 +1,335 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0xfefefefefefefefe;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x3c992b2e;
+  *((unsigned long *)&__m128i_op1[0]) = 0x730f;
+  *((unsigned long *)&__m128i_result[1]) = 0x3c992b2e;
+  *((unsigned long *)&__m128i_result[0]) = 0x730f;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x7fff7fff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x2bfd9461;
+  *((unsigned long *)&__m128i_result[1]) = 0x7fff7fff;
+  *((unsigned long *)&__m128i_result[0]) = 0x2bfd9461;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00d3012acc56f9bb;
+  *((unsigned long *)&__m128i_op0[0]) = 0x1021;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x00d3012acc56f9bb;
+  *((unsigned long *)&__m128i_result[0]) = 0x1021;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x1000;
+  *((unsigned long *)&__m128i_op0[0]) = 

[PATCH v4 00/23] Add tests for SX vector instructions.

2023-09-12 Thread Xiaolong Chen
v3 -> v4:
  Modify the name of the patch file.

  In order to better test the function of the vector instruction, the 128 bit
test cases are further split according to the function of the instruction.


Xiaolong Chen (23):
  LoongArch: Add tests of -mstrict-align option.
  LoongArch: Add testsuite framework for Loongson SX/ASX.
  LoongArch: Add tests for Loongson SX builtin functions.
  LoongArch: Add tests for SX vector floating-point instructions.
  LoongArch: Add tests for SX vector addition instructions.
  LoongArch: Add tests for SX vector subtraction instructions.
  LoongArch: Add tests for SX vector addition vsadd instructions.
  LoongArch: Add tests for the SX vector multiplication instruction.
  LoongArch: Add tests for SX vector vavg/vavgr instructions.
  LoongArch: Add tests for SX vector vmax/vmaxi/vmin/vmini instructions.
  LoongArch: Add tests for SX vector vexth/vextl/vldi/vneg/vsat
instructions.
  LoongArch: Add tests for SX vector
vabsd/vmskgez/vmskltz/vmsknz/vsigncov instructions.
  LoongArch: Add tests for SX vector vdiv/vmod instructions.
  LoongArch: Add tests for SX vector
vsll/vslli/vsrl/vsrli/vsrln/vsrlni/vsrlr /vsrlri/vslrlrn/vsrlrni 
instructions.
  LoongArch: Add tests for SX vector
vrotr/vrotri/vsra/vsrai/vsran/vsrani /vsrarn/vsrarni instructions.
  LoongArch: Add tests for SX vector
vssran/vssrani/vssrarn/vssrarni/vssrln /vssrlni/vssrlrn/vssrlrni
instructions.
  LoongArch: Add tests for SX vector vbitclr/vbitclri/vbitrev/vbitrevi/
vbitsel/vbitseli/vbitset/vbitseti/vclo/vclz/vpcnt instructions.
  LoongArch: Add tests for SX vector floating point arithmetic
instructions.
  LoongArch: Add tests for SX vector vfrstp/vfrstpi/vseq/vseqi/vsle
/vslei/vslt/vslti instructions.
  LoongArch: Add tests for SX vector vfcmp instructions.
  LoongArch: Add tests for SX vector handling and shuffle instructions.
  LoongArch: Add tests for SX vector vand/vandi/vandn/vor/vori/vnor/
vnori/vxor/vxori instructions.
  LoongArch: Add tests for SX vector vfmadd/vfnmadd/vld/vst
instructions.

 .../gcc.target/loongarch/strict-align.c   |   12 +
 .../loongarch/vector/loongarch-vector.exp |   42 +
 .../loongarch/vector/lsx/lsx-builtin.c| 1461 +
 .../loongarch/vector/lsx/lsx-vabsd-1.c|  272 +++
 .../loongarch/vector/lsx/lsx-vabsd-2.c|  398 +
 .../loongarch/vector/lsx/lsx-vadd.c   |  416 +
 .../loongarch/vector/lsx/lsx-vadda.c  |  344 
 .../loongarch/vector/lsx/lsx-vaddi.c  |  251 +++
 .../loongarch/vector/lsx/lsx-vaddwev-1.c  |  335 
 .../loongarch/vector/lsx/lsx-vaddwev-2.c  |  344 
 .../loongarch/vector/lsx/lsx-vaddwev-3.c  |  425 +
 .../loongarch/vector/lsx/lsx-vaddwod-1.c  |  408 +
 .../loongarch/vector/lsx/lsx-vaddwod-2.c  |  344 
 .../loongarch/vector/lsx/lsx-vaddwod-3.c  |  237 +++
 .../loongarch/vector/lsx/lsx-vand.c   |  159 ++
 .../loongarch/vector/lsx/lsx-vandi.c  |   67 +
 .../loongarch/vector/lsx/lsx-vandn.c  |  129 ++
 .../loongarch/vector/lsx/lsx-vavg-1.c |  398 +
 .../loongarch/vector/lsx/lsx-vavg-2.c |  308 
 .../loongarch/vector/lsx/lsx-vavgr-1.c|  299 
 .../loongarch/vector/lsx/lsx-vavgr-2.c|  317 
 .../loongarch/vector/lsx/lsx-vbitclr.c|  461 ++
 .../loongarch/vector/lsx/lsx-vbitclri.c   |  279 
 .../loongarch/vector/lsx/lsx-vbitrev.c|  407 +
 .../loongarch/vector/lsx/lsx-vbitrevi.c   |  336 
 .../loongarch/vector/lsx/lsx-vbitsel.c|  109 ++
 .../loongarch/vector/lsx/lsx-vbitseli.c   |   84 +
 .../loongarch/vector/lsx/lsx-vbitset.c|  371 +
 .../loongarch/vector/lsx/lsx-vbitseti.c   |  279 
 .../loongarch/vector/lsx/lsx-vbsll.c  |   83 +
 .../loongarch/vector/lsx/lsx-vbsrl.c  |   55 +
 .../loongarch/vector/lsx/lsx-vclo.c   |  266 +++
 .../loongarch/vector/lsx/lsx-vclz.c   |  265 +++
 .../loongarch/vector/lsx/lsx-vdiv-1.c |  299 
 .../loongarch/vector/lsx/lsx-vdiv-2.c |  254 +++
 .../loongarch/vector/lsx/lsx-vexth-1.c|  342 
 .../loongarch/vector/lsx/lsx-vexth-2.c|  182 ++
 .../loongarch/vector/lsx/lsx-vextl-1.c|   83 +
 .../loongarch/vector/lsx/lsx-vextl-2.c|   83 +
 .../loongarch/vector/lsx/lsx-vextrins.c   |  479 ++
 .../loongarch/vector/lsx/lsx-vfadd_d.c|  407 +
 .../loongarch/vector/lsx/lsx-vfadd_s.c|  470 ++
 .../loongarch/vector/lsx/lsx-vfclass_d.c  |   83 +
 .../loongarch/vector/lsx/lsx-vfclass_s.c  |   74 +
 .../loongarch/vector/lsx/lsx-vfcmp_caf.c  |  244 +++
 .../loongarch/vector/lsx/lsx-vfcmp_ceq.c  |  516 ++
 .../loongarch/vector/lsx/lsx-vfcmp_cle.c  |  530 ++
 .../loongarch/vector/lsx/lsx-vfcmp_clt.c  |  476 ++
 .../loongarch/vector/lsx/lsx-vfcmp_cne.c  |  378 +
 .../loongarch/vector/lsx/lsx-vfcmp_cor.c 

[PATCH v4 03/23] LoongArch: Add tests for Loongson SX builtin functions.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-builtin.c: New test.
---
 .../loongarch/vector/lsx/lsx-builtin.c| 1461 +
 1 file changed, 1461 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c
new file mode 100644
index 000..70f5000b29f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c
@@ -0,0 +1,1461 @@
+/* Test builtins for LOONGARCH LSX ASE instructions */
+/* { dg-do compile } */
+/* { dg-options "-mlsx" } */
+/* { dg-final { scan-assembler-times "lsx_vsll_b:.*vsll\\.b.*lsx_vsll_b" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsll_h:.*vsll\\.h.*lsx_vsll_h" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsll_w:.*vsll\\.w.*lsx_vsll_w" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsll_d:.*vsll\\.d.*lsx_vsll_d" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vslli_b:.*vslli\\.b.*lsx_vslli_b" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vslli_h:.*vslli\\.h.*lsx_vslli_h" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vslli_w:.*vslli\\.w.*lsx_vslli_w" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vslli_d:.*vslli\\.d.*lsx_vslli_d" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsra_b:.*vsra\\.b.*lsx_vsra_b" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsra_h:.*vsra\\.h.*lsx_vsra_h" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsra_w:.*vsra\\.w.*lsx_vsra_w" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsra_d:.*vsra\\.d.*lsx_vsra_d" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsrai_b:.*vsrai\\.b.*lsx_vsrai_b" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrai_h:.*vsrai\\.h.*lsx_vsrai_h" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrai_w:.*vsrai\\.w.*lsx_vsrai_w" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrai_d:.*vsrai\\.d.*lsx_vsrai_d" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrar_b:.*vsrar\\.b.*lsx_vsrar_b" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrar_h:.*vsrar\\.h.*lsx_vsrar_h" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrar_w:.*vsrar\\.w.*lsx_vsrar_w" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrar_d:.*vsrar\\.d.*lsx_vsrar_d" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrari_b:.*vsrari\\.b.*lsx_vsrari_b" 
1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrari_h:.*vsrari\\.h.*lsx_vsrari_h" 
1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrari_w:.*vsrari\\.w.*lsx_vsrari_w" 
1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrari_d:.*vsrari\\.d.*lsx_vsrari_d" 
1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrl_b:.*vsrl\\.b.*lsx_vsrl_b" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsrl_h:.*vsrl\\.h.*lsx_vsrl_h" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsrl_w:.*vsrl\\.w.*lsx_vsrl_w" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsrl_d:.*vsrl\\.d.*lsx_vsrl_d" 1 } } 
*/
+/* { dg-final { scan-assembler-times "lsx_vsrli_b:.*vsrli\\.b.*lsx_vsrli_b" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrli_h:.*vsrli\\.h.*lsx_vsrli_h" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrli_w:.*vsrli\\.w.*lsx_vsrli_w" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrli_d:.*vsrli\\.d.*lsx_vsrli_d" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrlr_b:.*vsrlr\\.b.*lsx_vsrlr_b" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrlr_h:.*vsrlr\\.h.*lsx_vsrlr_h" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrlr_w:.*vsrlr\\.w.*lsx_vsrlr_w" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrlr_d:.*vsrlr\\.d.*lsx_vsrlr_d" 1 
} } */
+/* { dg-final { scan-assembler-times "lsx_vsrlri_b:.*vsrlri\\.b.*lsx_vsrlri_b" 
1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrlri_h:.*vsrlri\\.h.*lsx_vsrlri_h" 
1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrlri_w:.*vsrlri\\.w.*lsx_vsrlri_w" 
1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrlri_d:.*vsrlri\\.d.*lsx_vsrlri_d" 
1 } } */
+/* { dg-final { scan-assembler-times 
"lsx_vbitclr_b:.*vbitclr\\.b.*lsx_vbitclr_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lsx_vbitclr_h:.*vbitclr\\.h.*lsx_vbitclr_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lsx_vbitclr_w:.*vbitclr\\.w.*lsx_vbitclr_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lsx_vbitclr_d:.*vbitclr\\.d.*lsx_vbitclr_d" 1 } } */
+/* { dg-final { scan-assembler-times 
"lsx_vbitclri_b:.*vbitclri\\.b.*lsx_vbitclri_b" 1 } } */
+/* { dg-final { scan-assembler-times 
"lsx_vbitclri_h:.*vbitclri\\.h.*lsx_vbitclri_h" 1 } } */
+/* { dg-final { scan-assembler-times 
"lsx_vbitclri_w:.*vbitclri\\.w.*lsx_vbitclri_w" 1 } } */
+/* { dg-final { scan-assembler-times 
"lsx_vbitclri_d:.*vbitclri\\.d.*lsx_vbitclri_d" 1 } } */
+/* { dg-final { scan-assembler-times 
"lsx_vbitset_b:.*vbitset\\.b.*lsx_vbitset_b" 1 } } 

[PATCH v4 02/23] LoongArch: Add testsuite framework for Loongson SX/ASX.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/loongarch-vector.exp: New test.
* gcc.target/loongarch/vector/simd_correctness_check.h: New test.
---
 .../loongarch/vector/loongarch-vector.exp | 42 +++
 .../loongarch/vector/simd_correctness_check.h | 54 +++
 2 files changed, 96 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp 
b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
new file mode 100644
index 000..2cbf9ac6ac1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
@@ -0,0 +1,42 @@
+#Copyright(C) 2021 - 2023 Free Software Foundation, Inc.
+
+#This program is free software; you can redistribute it and / or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3 of the License, or
+#(at your option) any later version.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
+#GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+#along with GCC; see the file COPYING3.If not see
+# .
+
+#GCC testsuite that uses the `dg.exp' driver.
+
+#Exit immediately if this isn't a LoongArch target.
+if ![istarget loongarch*-*-*] then {
+return
+}
+
+#Load support procs.
+load_lib gcc-dg.exp
+
+#If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+set DEFAULT_CFLAGS ""
+}
+
+#Initialize `dg'.
+dg-init
+
+#Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \
+   "-mlsx" $DEFAULT_CFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \
+   "-mlasx" $DEFAULT_CFLAGS
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h 
b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
new file mode 100644
index 000..eb7fbd59cc7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
@@ -0,0 +1,54 @@
+#include 
+#include 
+#include 
+
+#define ASSERTEQ_64(line, ref, res)   \
+  do  \
+{ \
+  int fail = 0;   \
+  for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
+{ \
+  long *temp_ref = [i], *temp_res = [i];  \
+  if (abs (*temp_ref - *temp_res) > 0)\
+{ \
+  printf (" error: %s at line %ld , expected " #ref   \
+  "[%ld]:0x%lx, got: 0x%lx\n",\
+  __FILE__, line, i, *temp_ref, *temp_res);   \
+  fail = 1;   \
+} \
+} \
+  if (fail == 1)  \
+abort (); \
+} \
+  while (0)
+
+#define ASSERTEQ_32(line, ref, res)   \
+  do  \
+{ \
+  int fail = 0;   \
+  for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
+{ \
+  int *temp_ref = [i], *temp_res = [i];   \
+  if (abs (*temp_ref - *temp_res) > 0)\
+{ \
+  printf (" error: %s at line %ld , expected " #ref   \
+  "[%ld]:0x%x, got: 0x%x\n",  \
+  __FILE__, line, i, *temp_ref, *temp_res);   \
+  fail = 1;   \
+}

[PATCH v4 01/23] LoongArch: Add tests of -mstrict-align option.

2023-09-12 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/strict-align.c: New test.
---
 gcc/testsuite/gcc.target/loongarch/strict-align.c | 12 
 1 file changed, 12 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/strict-align.c

diff --git a/gcc/testsuite/gcc.target/loongarch/strict-align.c 
b/gcc/testsuite/gcc.target/loongarch/strict-align.c
new file mode 100644
index 000..040d849584b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/strict-align.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mstrict-align -mlasx" } */
+/* { dg-final { scan-assembler-not "vfadd.s" } } */
+
+void
+foo (float *restrict x, float *restrict y)
+{
+  x[0] = x[0] + y[0];
+  x[1] = x[1] + y[1];
+  x[2] = x[2] + y[2];
+  x[3] = x[3] + y[3];
+}
-- 
2.20.1



[committed] RISC-V: Remove redundant ABI test

2023-09-12 Thread Juzhe-Zhong
We only support and report warning for RVV types.

We don't report warning for GNU vectors.
So this testcase checking is incorrect and the FAIL is bogus.

Remove it and commit it.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/vector-abi-9.c: Removed.

---
 .../gcc.target/riscv/rvv/base/vector-abi-9.c | 16 
 1 file changed, 16 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vector-abi-9.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vector-abi-9.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vector-abi-9.c
deleted file mode 100644
index b5f130f0caf..000
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/vector-abi-9.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d 
--param=riscv-autovec-preference=fixed-vlmax" } */
-
-#include "riscv_vector.h"
-
-typedef int v4si __attribute__ ((vector_size (16)));
-
-v4si
-fun (v4si a) {  return a; }  /* { dg-warning "the vector type" } */
-
-void
-bar ()
-{
-  v4si a;
-  fun (a);
-}
-- 
2.36.3



[PATCH] LoongArch: Change the value of branch_cost from 2 to 6.

2023-09-12 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch-def.c: Modify the default value of
branch_cost.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/cmov_ii.c: New test.
---
 gcc/config/loongarch/loongarch-def.c |  4 ++--
 gcc/testsuite/gcc.target/loongarch/cmov_ii.c | 16 
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/cmov_ii.c

diff --git a/gcc/config/loongarch/loongarch-def.c 
b/gcc/config/loongarch/loongarch-def.c
index e744ee01d6d..430ef8b2d95 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -85,7 +85,7 @@ loongarch_cpu_align[N_TUNE_TYPES] = {
 .int_mult_di   = COSTS_N_INSNS (1),\
 .int_div_si= COSTS_N_INSNS (4),\
 .int_div_di= COSTS_N_INSNS (6),\
-.branch_cost   = 2,\
+.branch_cost   = 6,\
 .memory_latency= 4
 
 /* The following properties cannot be looked up directly using "cpucfg".
@@ -118,7 +118,7 @@ loongarch_rtx_cost_optimize_size = {
 .int_mult_di  = 4,
 .int_div_si  = 4,
 .int_div_di  = 4,
-.branch_cost  = 2,
+.branch_cost  = 6,
 .memory_latency   = 4,
 };
 
diff --git a/gcc/testsuite/gcc.target/loongarch/cmov_ii.c 
b/gcc/testsuite/gcc.target/loongarch/cmov_ii.c
new file mode 100644
index 000..466a4c1c9af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/cmov_ii.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "main:.*xor.*masknez.*maskeqz.*or.*" } }
+ */
+void printf (char *, ...);
+extern void foo_ii (int *, int *, int *, int *);
+
+int
+test (void)
+{
+  int a, b;
+  int c, d, out;
+  foo_ii (, , , );
+  out = a == b ? c : d;
+  printf ("%d\n", out);
+}
-- 
2.31.1



[PING][PATCH v2] Add clang's invalid-noreturn warning flag

2023-09-12 Thread Julian Waters via Gcc-patches
Second desperate ping for patch
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627913.html


Re: [PATCH] Checking undefined_p before using the vr

2023-09-12 Thread Jiufu Guo via Gcc-patches


Hi,

Richard Biener  writes:

> On Thu, 7 Sep 2023, Jiufu Guo wrote:
>
>> Hi,
>> 
>> As discussed in PR111303:
>> 
>> For pattern "(X + C) / N": "div (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)",
>> Even if "X" has value-range and "X + C" does not overflow, "@3" may still
>> be undefined. Like below example:
>> 
>> _3 = _2 + -5;
>> if (0 != 0)
>>   goto ; [34.00%]
>> else
>>   goto ; [66.00%]
>> ;;  succ:   3
>> ;;  4
>> 
>> ;; basic block 3, loop depth 0
>> ;;  pred:   2
>> _5 = _3 / 5; 
>> ;;  succ:   4
>> 
>> The whole pattern "(_2 + -5 ) / 5" is in "bb 3", but "bb 3" would be
>> unreachable (because "if (0 != 0)" is always false).
>> And "get_range_query (cfun)->range_of_expr (vr3, @3)" is checked in
>> "bb 3", "range_of_expr" gets an "undefined vr3". Where "@3" is "_5".
>> 
>> So, before using "vr3", it would be safe to check "!vr3.undefined_p ()".
>> 
>> Bootstrap & regtest pass on ppc64{,le} and x86_64.
>> Is this ok for trunk?
>
> OK, but I wonder why ->range_of_expr () doesn't return false for
> undefined_p ()?  While "undefined" technically means we can treat
> it as nonnegative_p (or not, maybe but maybe not both), we seem to
> not want to do that.  So why expose it at all to ranger users
> (yes, internally we in some places want to handle undefined).

I guess, currently, it returns true and then lets the user check
undefined_p, maybe because it tries to only return false if the
type of EXPR is unsupported.

Let "range_of_expr" return false for undefined_p would save checking
undefined_p again when using the APIs.

Committed va r14-3913.

BR,
Jeff (Jiufu Guo)

>
> Richard.
>
>> BR,
>> Jeff (Jiufu Guo)
>> 
>>  PR middle-end/111303
>> 
>> gcc/ChangeLog:
>> 
>>  * match.pd ((X - N * M) / N): Add undefined_p checking.
>>  (X + N * M) / N): Likewise.
>>  ((X + C) div_rshift N): Likewise.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>  * gcc.dg/pr111303.c: New test.
>> 
>> ---
>>  gcc/match.pd|  3 +++
>>  gcc/testsuite/gcc.dg/pr111303.c | 11 +++
>>  2 files changed, 14 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.dg/pr111303.c
>> 
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index 801edb128f9..e2583ca7960 100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -975,6 +975,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>> /* "X+(N*M)" doesn't overflow.  */
>> && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
>> && get_range_query (cfun)->range_of_expr (vr4, @4)
>> +   && !vr4.undefined_p ()
>> /* "X+N*M" is not with opposite sign as "X".  */
>> && (TYPE_UNSIGNED (type)
>> || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> @@ -995,6 +996,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>> /* "X - (N*M)" doesn't overflow.  */
>> && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
>> && get_range_query (cfun)->range_of_expr (vr4, @4)
>> +   && !vr4.undefined_p ()
>> /* "X-N*M" is not with opposite sign as "X".  */
>> && (TYPE_UNSIGNED (type)
>> || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> @@ -1025,6 +1027,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>/* "X+C" doesn't overflow.  */
>>&& range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
>>&& get_range_query (cfun)->range_of_expr (vr3, @3)
>> +  && !vr3.undefined_p ()
>>/* "X+C" and "X" are not of opposite sign.  */
>>&& (TYPE_UNSIGNED (type)
>>|| (vr0.nonnegative_p () && vr3.nonnegative_p ())
>> diff --git a/gcc/testsuite/gcc.dg/pr111303.c 
>> b/gcc/testsuite/gcc.dg/pr111303.c
>> new file mode 100644
>> index 000..eaabe55c105
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr111303.c
>> @@ -0,0 +1,11 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2" } */
>> +
>> +/* Make sure no ICE. */
>> +unsigned char a;
>> +int b(int c) {
>> +  if (c >= 5000)
>> +return c / 5;
>> +}
>> +void d() { b(a - 5); }
>> +int main() {}
>> 


[PATCH v2] LoongArch: Fix bug of 'di3_fake'.

2023-09-12 Thread Lulu Cheng
PR 111334

gcc/ChangeLog:

* config/loongarch/loongarch.md: Fix bug of 'di3_fake'.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/pr111334.c: New test.
---
v1 -> v2:

Modify the template "*3", the SI type division operation
is not supported under the LA64 architecture.
---
 gcc/config/loongarch/loongarch.md | 20 ++
 gcc/testsuite/gcc.target/loongarch/pr111334.c | 39 +++
 2 files changed, 52 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr111334.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 1dc6b524416..4fcb6d781d5 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -72,6 +72,9 @@ (define_c_enum "unspec" [
   UNSPEC_LUI_H_HI12
   UNSPEC_TLS_LOW
 
+  ;; Fake div.w[u] mod.w[u]
+  UNSPEC_FAKE_ANY_DIV
+
   UNSPEC_SIBCALL_VALUE_MULTIPLE_INTERNAL_1
   UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1
 ])
@@ -900,7 +903,7 @@ (define_expand "3"
 (match_operand:GPR 2 "register_operand")))]
   ""
 {
- if (GET_MODE (operands[0]) == SImode)
+ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT)
   {
 rtx reg1 = gen_reg_rtx (DImode);
 rtx reg2 = gen_reg_rtx (DImode);
@@ -920,9 +923,9 @@ (define_expand "3"
 })
 
 (define_insn "*3"
-  [(set (match_operand:GPR 0 "register_operand" "=r,,")
-   (any_div:GPR (match_operand:GPR 1 "register_operand" "r,r,0")
-(match_operand:GPR 2 "register_operand" "r,r,r")))]
+  [(set (match_operand:X 0 "register_operand" "=r,,")
+   (any_div:X (match_operand:X 1 "register_operand" "r,r,0")
+  (match_operand:X 2 "register_operand" "r,r,r")))]
   ""
 {
   return loongarch_output_division (".\t%0,%1,%2", operands);
@@ -938,9 +941,12 @@ (define_insn "*3"
 (define_insn "di3_fake"
   [(set (match_operand:DI 0 "register_operand" "=r,,")
(sign_extend:DI
- (any_div:SI (match_operand:DI 1 "register_operand" "r,r,0")
- (match_operand:DI 2 "register_operand" "r,r,r"]
-  ""
+ (unspec:SI
+  [(subreg:SI
+(any_div:DI (match_operand:DI 1 "register_operand" "r,r,0")
+(match_operand:DI 2 "register_operand" "r,r,r")) 0)]
+ UNSPEC_FAKE_ANY_DIV)))]
+  "TARGET_64BIT"
 {
   return loongarch_output_division (".w\t%0,%1,%2", operands);
 }
diff --git a/gcc/testsuite/gcc.target/loongarch/pr111334.c 
b/gcc/testsuite/gcc.target/loongarch/pr111334.c
new file mode 100644
index 000..47366afcb74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr111334.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned
+util_next_power_of_two (unsigned x)
+{
+  return (1 << __builtin_clz (x - 1));
+}
+
+extern int create_vec_from_array (void);
+
+struct ac_shader_args {
+struct {
+   unsigned char offset;
+   unsigned char size;
+} args[384];
+};
+
+struct isel_context {
+const struct ac_shader_args* args;
+int arg_temps[384];
+};
+
+
+void
+add_startpgm (struct isel_context* ctx, unsigned short arg_count)
+{
+
+  for (unsigned i = 0, arg = 0; i < arg_count; i++)
+{
+  unsigned size = ctx->args->args[i].size;
+  unsigned reg = ctx->args->args[i].offset;
+
+  if (reg % ( 4 < util_next_power_of_two (size)
+? 4 : util_next_power_of_two (size)))
+ ctx->arg_temps[i] = create_vec_from_array ();
+}
+}
+
-- 
2.31.1



Re: [PATCH] aarch64: Add SVE instruction types

2023-09-12 Thread Evandro Menezes via Gcc-patches
Hi, Kyrill.

I wonder if the regression that you noticed was the same that I did.  Overall, 
thus far, there’s no significant regression that I can say is due to 
scheduling.  However, there is one benchmark, 507.cactuBSSN_r/607.cactuBSSN_s 
in SPEC2017, that regressed by more than 10%.  Upon closer examination, it 
seems that the change in the live ranges led to heavy spilling and to doubling 
of the stack size.  The spilling looks rather capricious though, as there seem 
to be enough free registers available.  

Is this similar to what you observed as well?  I tried to adjust the priority 
of memory ops through, TARGET_SCHED_ADJUST_PRIORITY, but it was innefective.  
I’m a bit at a loss what’s likely going on with the RA at this point.  Any 
pointers?

Thank you,

-- 
Evandro Menezes



> Em 16 de mai. de 2023, à(s) 03:36, Kyrylo Tkachov  
> escreveu:
> 
> Hi Evandro,
>  
> I created a new attribute so I didn’t have to extend the “type” attribute 
> that lives in config/arm/types.md. As that attribute and file lives in the 
> arm backend but SVE is AArch64-only I didn’t want to add logic to the arm 
> backend as it’s not truly shared.
> The granularity has been somewhat subjective. I had looked at the Software 
> Optimisation guides for various SVE and SVE2-capable cores from Arm on 
> developer.arm.com and tried to glean commonalities between different 
> instruction groups.
> I did try writing a model for Neoverse V1 using that classification but I 
> couldn’t spend much time on it and the resulting model didn’t give me much 
> improvements and gave some regressions instead.
> I think that was more down to my rushed model rather than anything else 
> though.
>  
> Thanks,
> Kyrill
>  
> From: Evandro Menezes  
> Sent: Monday, May 15, 2023 9:13 PM
> To: Kyrylo Tkachov 
> Cc: Richard Sandiford ; Evandro Menezes via 
> Gcc-patches ; evandro+...@gcc.gnu.org; Tamar 
> Christina 
> Subject: Re: [PATCH] aarch64: Add SVE instruction types
>  
> Hi, Kyrill.
>  
> I wasn’t aware of your previous patch.  Could you clarify why you considered 
> creating an SVE specific type attribute instead of reusing the common one?  I 
> really liked the iterators that you created; I’d like to use them.
>  
> Do you have specific examples which you might want to mention with regards to 
> granularity?
>  
> Yes, my intent for this patch is to enable modeling the SVE instructions on 
> N1.  The patch that implements it brings up some performance improvements, 
> but it’s mostly flat, as expected.
>  
> Thank you,
> 
> -- 
> Evandro Menezes
>  
>  
> 
> 
> Em 15 de mai. de 2023, à(s) 04:49, Kyrylo Tkachov  > escreveu:
>  
> 
> 
> 
> -Original Message-
> From: Richard Sandiford  >
> Sent: Monday, May 15, 2023 10:01 AM
> To: Evandro Menezes via Gcc-patches  >
> Cc: evandro+...@gcc.gnu.org ; Evandro Menezes 
> mailto:ebah...@icloud.com>>;
> Kyrylo Tkachov mailto:kyrylo.tkac...@arm.com>>; 
> Tamar Christina
> mailto:tamar.christ...@arm.com>>
> Subject: Re: [PATCH] aarch64: Add SVE instruction types
> 
> Evandro Menezes via Gcc-patches  > writes:
> 
> This patch adds the attribute `type` to most SVE1 instructions, as in the
> other
> 
> instructions.
> 
> Thanks for doing this.
> 
> Could you say what criteria you used for picking the granularity?  Other
> maintainers might disagree, but personally I'd prefer to distinguish two
> instructions only if:
> 
> (a) a scheduling description really needs to distinguish them or
> (b) grouping them together would be very artificial (because they're
>logically unrelated)
> 
> It's always possible to split types later if new scheduling descriptions
> require it.  Because of that, I don't think we should try to predict ahead
> of time what future scheduling descriptions will need.
> 
> Of course, this depends on having results that show that scheduling
> makes a significant difference on an SVE core.  I think one of the
> problems here is that, when a different scheduling model changes the
> performance of a particular test, it's difficult to tell whether
> the gain/loss is caused by the model being more/less accurate than
> the previous one, or if it's due to important "secondary" effects
> on register live ranges.  Instinctively, I'd have expected these
> secondary effects to dominate on OoO cores.
> 
> I agree with Richard on these points. The key here is getting the granularity 
> right without having too maintain too many types that aren't useful in the 
> models.
> FWIW I had posted 
> https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607101.html in 
> November. It adds annotations to SVE2 patterns as well as for base SVE.
> Feel free to reuse it if you'd like.
> I see you had posted a Neoverse V1 scheduling model. Does that give an 
> improvement on SVE code when combined with the scheduling attributes somehow?
> 

[PATCH] c++: always check arity before deduction

2023-09-12 Thread Patrick Palka via Gcc-patches
Bootstrpaped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

-- >8 --

This simple patch extends the r12-3271-gf1e73199569287 optimization
to apply to deduction without explicit template arguments as well.
The motivation for this is to accept testcases such as conv20.C and
ttp40.C below, which don't use explicit template arguments but for which
unnecessary template instantiation during deduction could be avoided if
we pruned overloads according to arity early in this case as well.  This
incidentally causes us to accept one reduced testcase from PR c++/84075,
but the underlying issue there still remains unfixed.

As an added bonus, this change ends up causing the "candidate expects
N argument(s)" note during overload resolution failure to point to the
template candidate instead of the call site, which seems like an
improvement similar to r14-309-g14e881eb030509.

gcc/cp/ChangeLog:

* call.cc (add_template_candidate_real): Check arity even
when there are no explicit template arguments.  Combine the
two adjacent '!obj' tests into one.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/vt-57397-1.C: Expect "candidate expects ... N
argument(s)" at the declaration site instead of the call site.
* g++.dg/cpp0x/vt-57397-2.C: Likewise.
* g++.dg/overload/template5.C: Likewise.
* g++.dg/template/local6.C: Likewise.
* g++.dg/template/conv20.C: New test.
* g++.dg/template/ttp40.C: New test.
---
 gcc/cp/call.cc| 14 ++---
 gcc/testsuite/g++.dg/cpp0x/vt-57397-1.C   |  6 +++---
 gcc/testsuite/g++.dg/cpp0x/vt-57397-2.C   |  6 +++---
 gcc/testsuite/g++.dg/overload/template5.C |  4 ++--
 gcc/testsuite/g++.dg/template/conv20.C| 17 +++
 gcc/testsuite/g++.dg/template/local6.C|  4 ++--
 gcc/testsuite/g++.dg/template/ttp40.C | 25 +++
 7 files changed, 58 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/conv20.C
 create mode 100644 gcc/testsuite/g++.dg/template/ttp40.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 399345307ea..2bbaeee039d 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -3535,13 +3535,13 @@ add_template_candidate_real (struct z_candidate 
**candidates, tree tmpl,
 }
   gcc_assert (ia == nargs_without_in_chrg);
 
-  if (!obj && explicit_targs)
+  if (!obj)
 {
   /* Check that there's no obvious arity mismatch before proceeding with
 deduction.  This avoids substituting explicit template arguments
-into the template (which could result in an error outside the
-immediate context) when the resulting candidate would be unviable
-anyway.  */
+into the template or e.g. derived-to-base parm/arg unification
+(which could result in an error outside the immediate context) when
+the resulting candidate would be unviable anyway.  */
   int min_arity = 0, max_arity = 0;
   tree parms = TYPE_ARG_TYPES (TREE_TYPE (tmpl));
   parms = skip_artificial_parms_for (tmpl, parms);
@@ -3571,11 +3571,7 @@ add_template_candidate_real (struct z_candidate 
**candidates, tree tmpl,
  reason = arity_rejection (NULL_TREE, max_arity, ia);
  goto fail;
}
-}
 
-  errs = errorcount+sorrycount;
-  if (!obj)
-{
   convs = alloc_conversions (nargs);
 
   if (shortcut_bad_convs
@@ -3602,6 +3598,8 @@ add_template_candidate_real (struct z_candidate 
**candidates, tree tmpl,
}
}
 }
+
+  errs = errorcount+sorrycount;
   fn = fn_type_unification (tmpl, explicit_targs, targs,
args_without_in_chrg,
nargs_without_in_chrg,
diff --git a/gcc/testsuite/g++.dg/cpp0x/vt-57397-1.C 
b/gcc/testsuite/g++.dg/cpp0x/vt-57397-1.C
index 440bea5b2f7..bac3b64ad7e 100644
--- a/gcc/testsuite/g++.dg/cpp0x/vt-57397-1.C
+++ b/gcc/testsuite/g++.dg/cpp0x/vt-57397-1.C
@@ -3,20 +3,20 @@
 
 template
 void foo(T1, Tn...);
+// { dg-message "candidate expects at least 1 argument, 0 provided" "" { 
target *-*-* } .-1 }
 
 template
 void bar(T1, T2, Tn...);
+// { dg-message "candidate expects at least 2 arguments, 0 provided" "" { 
target *-*-* } .-1 }
+// { dg-message "candidate expects at least 2 arguments, 1 provided" "" { 
target *-*-* } .-2 }
 
 int main()
 {
   foo();   // { dg-error "no matching" }
-  // { dg-message "candidate expects at least 1 argument, 0 provided" "" { 
target *-*-* } .-1 }
   foo(1);
   foo(1, 2);
   bar();   // { dg-error "no matching" }
-  // { dg-message "candidate expects at least 2 arguments, 0 provided" "" { 
target *-*-* } .-1 }
   bar(1);  // { dg-error "no matching" }
-  // { dg-message "candidate expects at least 2 arguments, 1 provided" "" { 
target *-*-* } .-1 }
   bar(1, 2);
   bar(1, 2, 3);
 }
diff --git a/gcc/testsuite/g++.dg/cpp0x/vt-57397-2.C 
b/gcc/testsuite/g++.dg/cpp0x/vt-57397-2.C
index 1a99e22c5cb..22b19ef6c1a 100644
--- 

[PATCH] MATCH: Simplify `(X % Y) < Y` pattern.

2023-09-12 Thread Andrew Pinski via Gcc-patches
This merges the two patterns to catch
`(X % Y) < Y` and `Y > (X % Y)` into one by
using :c on the comparison operator.
It does not change any code generation nor
anything else. It is more to allow for better
maintainability of this pattern.

OK? Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* match.pd (`Y > (X % Y)`): Merge
into ...
(`(X % Y) < Y`): Pattern by adding `:c`
on the comparison.
---
 gcc/match.pd | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 39c7ea1088f..24fd29863fb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1483,14 +1483,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* X % Y is smaller than Y.  */
 (for cmp (lt ge)
  (simplify
-  (cmp (trunc_mod @0 @1) @1)
+  (cmp:c (trunc_mod @0 @1) @1)
   (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
{ constant_boolean_node (cmp == LT_EXPR, type); })))
-(for cmp (gt le)
- (simplify
-  (cmp @1 (trunc_mod @0 @1))
-  (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
-   { constant_boolean_node (cmp == GT_EXPR, type); })))
 
 /* x | ~0 -> ~0  */
 (simplify
-- 
2.31.1



[PING][PATCH 2/2 v2] Ada: Finalization of constrained subtypes of unconstrained synchronized private extensions

2023-09-12 Thread Richard Wai



> On Aug 23, 2023, at 10:24, Richard Wai  wrote:
> 
> Somehow an error worked its way into the original diff (the diff itself), 
> making the previous patch fail to apply.
>  
> Fixed version attached.
>  
> Richard Wai
> ANNEXI-STRAYLINE
>  
> From: Richard Wai  > 
> Sent: Thursday, August 10, 2023 1:27 AM
> To: 'gcc-patches@gcc.gnu.org ' 
> mailto:gcc-patches@gcc.gnu.org>>
> Cc: 'Eric Botcazou' mailto:ebotca...@adacore.com>>; 
> 'Arnaud Charlet' mailto:char...@adacore.com>>; 'Stephen 
> Baird' mailto:ba...@adacore.com>>
> Subject: [PATCH 2/2] Ada: Finalization of constrained subtypes of 
> unconstrained synchronized private extensions
>  
> When generating TSS address finalization bodies for a tagged class-wide 
> subtype, GNAT climbs the parent chain looking for the first “non-constrained” 
> type. That type’s underlying type’s class-wide type is used as a “designated” 
> type for a dispatching TSS deep finalize call to the designated class-wide 
> type. In the case of a constrained subtype of an unconstrained synchronized 
> private extension, this ends up designating the underlying type of that 
> private extension. This means it targets the class-wide type of the actual 
> underlying concurrent type rather than the corresponding record. Ultimately 
> it ends up generating a call to the corresponding record’s deep finalizer, 
> but with incompatible types (concurrent_type’Class -> 
> concurrent_typeV’Class). This causes compilation to fail.
>  
> This patch adds extra logic to exp_ch7(Make_Finalize_Address_Stmts) to 
> identify such cases and ensure that the designated type is the corresponding 
> record type’s class-wide type in that situation.
>  
> Patch file is attached.
>  
> --  Begin change log entry –
>  
> ada: TSS finalize address subprogram generation for constrained subtypes of 
> unconstrained synchronized private extensions should take care to designate 
> the corresponding record of the underlying concurrent type.
>  
> When generating TSS finalize address subprograms for class-wide types of 
> constrained root types, it follows the parent chain looking for the first 
> “non-constrained” type. It is possible that such a type is a private 
> extension with the “synchronized” keyword, in which case the underlying type 
> is a concurrent type. When that happens, the designated type of the finalize 
> address subprogram should be the corresponding record’s class-wide-type.
>  
> Gcc/ada/
> * exp_ch3(Expand_Freeze_Class_Wide_Type): Expanded comments 
> explaining why TSS Finalize_Address is not generated for concurrent 
> class-wide types.
> * exp_ch7(Make_Finalize_Address_Stmts): Handle cases where 
> the underlying non-constrained parent type is a concurrent type, and adjust 
> the designated type to be the corresponding record’s class-wide type.
>  
> --  End change log entry –
>  
> This patch was bootstrapped on x86_64-*-freebsd13.2. One new test cases was 
> added. Note that 4 gnat test cases fail currently on master and are unrelated 
> to this patch.
>  
> Check-ada output of this patch:
>  
> === acats tests ===
> Running chapter a ...
> Running chapter c2 ...
> Running chapter c3 ...
> Running chapter c4 ...
> Running chapter c5 ...
> Running chapter c6 ...
> Running chapter c7 ...
> Running chapter c8 ...
> Running chapter c9 ...
> Running chapter ca ...
> Running chapter cb ...
> Running chapter cc ...
> Running chapter cd ...
> Running chapter ce ...
> Running chapter cxa ...
> Running chapter cxb ...
> Running chapter cxf ...
> Running chapter cxg ...
> Running chapter cxh ...
> Running chapter cz ...
> Running chapter d ...
> Running chapter e ...
> Running chapter l ...
> === acats Summary ===
> # of expected passes   2328
> # of unexpected failures 0
>  
> Native configuration is x86_64-unknown-freebsd13.2
>  
> === gnat tests ===
>  
> Schedule of variations:
> unix
>  
> Running target unix
> FAIL: gnat.dg/specs/alignment2.ads  (test for warnings, line 14)
> FAIL: gnat.dg/specs/alignment2.ads  (test for warnings, line 20)
> FAIL: gnat.dg/specs/alignment2.ads  (test for warnings, line 38)
> FAIL: gnat.dg/specs/alignment2.ads  (test for warnings, line 42)
>  
> === gnat Summary ===
>  
> # of expected passes   3401
> # of unexpected failures 4
> # of expected failures  23
> # of unsupported tests   10
> gnatmake version 14.0.0 20230809 (experimental)
>  
>  
> Richard Wai
> ANNEXI-STRAYLINE



[PATCH 1/2 v2] Ada: Synchronized private extensions are always limited

2023-09-12 Thread Richard Wai
Hi Arno,

No worries, and sorry for the trouble. I’m going to try using a different 
client for the gcc mailing list, it doesn’t seem to like Outlook. Thanks for 
catching that mistake!

Please advise how I can get this patch actually applied, given my lack of 
commit privilege.

Revised patch attached!

Thanks!



ada-synchronized-private-types-are-limited-v2.patch
Description: Binary data



> On Sep 1, 2023, at 08:08, Arnaud Charlet  wrote:
> 
>> For some reason, your email is endeing up in a strange format, I almost
>> missed the .patch file attached, making the review harder.
> 
> Never mind, I was on vacation earlier this month and then busy with a seminar 
> last week, so I started looking at your ping email before the original email 
> which did contain the patch easily found, sorry for the noise!
> 
> Arno



Re: [PATCH v3] c++: Move consteval folding to cp_fold_r

2023-09-12 Thread Jason Merrill via Gcc-patches

On 9/8/23 14:24, Marek Polacek wrote:

On Thu, Sep 07, 2023 at 02:32:51PM -0400, Jason Merrill wrote:

On 9/7/23 11:23, Marek Polacek wrote:

On Tue, Sep 05, 2023 at 04:36:34PM -0400, Jason Merrill wrote:

On 9/5/23 15:59, Marek Polacek wrote:

On Tue, Sep 05, 2023 at 10:52:04AM -0400, Jason Merrill wrote:

On 9/1/23 13:23, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --

In the review of P2564:

it turned out that in order to correctly handle an example in the paper,
we should stop doing immediate evaluation in build_over_call and
bot_replace, and instead do it in cp_fold_r.  This patch does that.

Another benefit is that this is a pretty significant simplification, at
least in my opinion.  Also, this fixes the c++/110997 ICE (but the test
doesn't compile yet).

The main drawback seems to be that cp_fold_r doesn't process as much
code as we did before: uninstantiated templates


That's acceptable, it's an optional diagnostic.


and things like "false ? foo () : 1".


This is a problem.  Maybe we want cp_fold_r to recurse into the arms of a
COND_EXPR before folding them away?  Maybe only if we know we've seen an
immediate function?


Unfortunately we had already thrown the dead branch away when we got to
cp_fold_r.  I wonder if we have to adjust cxx_eval_conditional_expression
to call cp_fold_r on the dead branch too,


Hmm, I guess so.


perhaps with a new ff_ flag to skip the whole second switch in cp_fold_r?


Or factor out the immediate function handling to a separate walk function
that cp_fold_r also calls?


I did that.

But then it's possible that the in_immediate_context checks have to stay.


We can just not do the walk in immediate (or mce_true) context, like we
currently avoid calling cp_fold_function.


Right.  Unfortunately I have to check even when mce_true, consider

consteval int bar (int i) { if (i != 1) throw 1; return 0; }
constexpr int a = 0 ? bar(3) : 3;


I disagree; the call is in a manifestly constant-evaluated expression, and
so is now considered an immediate function context, and we should accept
that example.


Ack.  I was still living in pre-P2564 world.
  

For mce_unknown I guess we'd want
to set *non_constant_p instead of giving an error.


I did not do this because I haven't found a case where it would make
a difference.


I think it will given the above comment.


Correct.  For instance, in:

   consteval int bar (int i) { if (i != 1) throw 1; return 0; }

   constexpr int
   foo (bool b)
   {
 return b ? bar (3) : 2;
   }

   static_assert (foo (false) == 2);

we should complain only once.  I've implemented your suggestion to set
*non_constant_p instead of giving an error for mce_unknown.


diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 0ca4370deab..397d5c7ec3f 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -2311,6 +2311,29 @@ cxx_dynamic_cast_fn_p (tree fndecl)
  && CP_DECL_CONTEXT (fndecl) == abi_node);
   }
+/* Return true if we are in the body of a consteval function. > +   This is in 
addition to in_immediate_context because that
+   uses current_function_decl which may not be available.  CTX is
+   the current constexpr context.  */
+
+static bool
+in_immediate_context (const constexpr_ctx *ctx)
+{
+  if (in_immediate_context ())
+return true;


Can't we check for mce_true here instead of looking at the call chain?


Yes.
  

+/* A wrapper around cp_fold_immediate_r.  */
+
+void
+cp_fold_immediate (tree *tp)
+{


Maybe return early if consteval isn't supported in the active standard?


Absolutely.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
In the review of P2564:

it turned out that in order to correctly handle an example in the paper,
we should stop doing immediate evaluation in build_over_call and
bot_replace, and instead do it in cp_fold_r.  This patch does that.

Another benefit is that this is a pretty significant simplification, at
least in my opinion.  Also, this fixes the c++/110997 ICE (but the test
doesn't compile yet).

The main drawback seems to be that cp_fold_r doesn't process
uninstantiated templates.  We still have to handle things like
"false ? foo () : 1".  To that end, I've added cp_fold_immediate, called
on dead branches in cxx_eval_conditional_expression.

You'll see that I've reintroduced ADDR_EXPR_DENOTES_CALL_P here.  This
is to detect

   *()) ()
   (s.*::foo) ()

which were deemed ill-formed.

gcc/cp/ChangeLog:

* call.cc (build_over_call): Set ADDR_EXPR_DENOTES_CALL_P.  Don't handle
immediate_invocation_p here.
* constexpr.cc (in_immediate_context): New overload.
(cxx_eval_call_expression): Use mce_true for DECL_IMMEDIATE_FUNCTION_P.
(cxx_eval_conditional_expression): Call cp_fold_immediate.
* cp-gimplify.cc (maybe_replace_decl): Make 

Re: libgo: Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing (instead of build-time 'CC' etc.) [PR109951] (was: [PATCH 3/4] libgo/test: Fix compilation for build sysroot)

2023-09-12 Thread Ian Lance Taylor via Gcc-patches
On Tue, Sep 12, 2023 at 4:16 AM Thomas Schwinge  wrote:
>
> As we've found, this is conceptually problematic, as discussed in
> 
> "Consider '--with-build-sysroot=[...]' for target libraries' build-tree 
> testing (instead of build-time 'CC' etc.)
> [PR109951]".
> I therefore suggest to apply to libgo the conceptually same changes
> as I've just pushed for libgomp:
> 
> "libgomp: Consider '--with-build-sysroot=[...]' for target libraries' 
> build-tree testing (instead of build-time 'CC'
> etc.) [PR91884, PR109951]".
> OK to push (via Ian/Go upstream) the attached
> "libgo: Consider '--with-build-sysroot=[...]' for target libraries' 
> build-tree testing (instead of build-time 'CC' etc.) [PR109951]"?
>
> By the way, I've tested this one via hard-coding
> 'libgo/configure.ac:USE_DEJAGNU' to 'yes', and observing that my
> "quick hack to replicate the original requirement"
> ('internal_error ("MISSING SYSROOT");') no longer triggers.

Thanks.  Committed.

Ian


Re: [PATCH] preprocessor: c++: Support `#pragma GCC target' macros [PR87299]

2023-09-12 Thread Lewis Hyatt via Gcc-patches
On Tue, Aug 8, 2023 at 5:53 PM Jason Merrill  wrote:
>
> On 7/31/23 22:22, Lewis Hyatt via Gcc-patches wrote:
> > `#pragma GCC target' is not currently handled in preprocess-only mode (e.g.,
> > when running gcc -E or gcc -save-temps). As noted in the PR, this means that
> > if the target pragma defines any macros, those macros are not effective in
> > preprocess-only mode. Similarly, such macros are not effective when
> > compiling with C++ (even when compiling without -save-temps), because C++
> > does not process the pragma until after all tokens have been obtained from
> > libcpp, at which point it is too late for macro expansion to take place.
> >
> > Since r13-1544 and r14-2893, there is a general mechanism to handle pragmas
> > under these conditions as well, so resolve the PR by using the new "early
> > pragma" support.
> >
> > toplev.cc required some changes because the target-specific handlers for
> > `#pragma GCC target' may call target_reinit(), and toplev.cc was not 
> > expecting
> > that function to be called in preprocess-only mode.
> >
> > I added some additional testcases from the PR for x86. The other targets
> > that support `#pragma GCC target' (aarch64, arm, nios2, powerpc, s390)
> > already had tests verifying that the pragma sets macros as expected; here I
> > have added -save-temps to some of them, to test that it now works in
> > preprocess-only mode as well.
> >
> > gcc/c-family/ChangeLog:
> >
> >   PR preprocessor/87299
> >   * c-pragma.cc (init_pragma): Register `#pragma GCC target' and
> >   related pragmas in preprocess-only mode, and enable early handling.
> >   (c_reset_target_pragmas): New function refactoring code from...
> >   (handle_pragma_reset_options): ...here.
> >   * c-pragma.h (c_reset_target_pragmas): Declare.
> >
> > gcc/cp/ChangeLog:
> >
> >   PR preprocessor/87299
> >   * parser.cc (cp_lexer_new_main): Call c_reset_target_pragmas ()
> >   after preprocessing is complete, before starting compilation.
> >
> > gcc/ChangeLog:
> >
> >   PR preprocessor/87299
> >   * toplev.cc (no_backend): New static global.
> >   (finalize): Remove argument no_backend, which is now a
> >   static global.
> >   (process_options): Likewise.
> >   (do_compile): Likewise.
> >   (target_reinit): Don't do anything in preprocess-only mode.
> >   (toplev::main): Adapt to no_backend change.
> >   (toplev::finalize): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   PR preprocessor/87299
> >   * c-c++-common/pragma-target-1.c: New test.
> >   * c-c++-common/pragma-target-2.c: New test.
> >   * g++.target/i386/pr87299-1.C: New test.
> >   * g++.target/i386/pr87299-2.C: New test.
> >   * gcc.target/i386/pr87299-1.c: New test.
> >   * gcc.target/i386/pr87299-2.c: New test.
> >   * gcc.target/s390/target-attribute/tattr-2.c: Add -save-temps to the
> >   options, to test preprocess-only mode as well.
> >   * gcc.target/aarch64/pragma_cpp_predefs_1.c: Likewise.
> >   * gcc.target/arm/pragma_arch_attribute.c: Likewise.
> >   * gcc.target/nios2/custom-fp-2.c: Likewise.
> >   * gcc.target/powerpc/float128-3.c: Likewise.
> > ---
> >
> > Notes:
> >  Hello-
> >
> >  This patch fixes the PR by enabling early pragma handling for `#pragma 
> > GCC
> >  target' and related pragmas such as `#pragma GCC push_options'. I did 
> > not
> >  need to touch any target-specific code, however I did need to make a 
> > change
> >  to toplev.cc, affecting all targets, to make it safe to call 
> > target_reinit()
> >  in preprocess-only mode. (Otherwise, it would be necessary to modify 
> > the
> >  implementation of target pragmas in every target, to avoid this code 
> > path.)
> >  That was the only complication I ran into.
> >
> >  Regarding testing, I did: (thanks to GCC compile farm for the non-x86
> >  targets)
> >
> >  bootstrap + regtest all languages - x86_64-pc-linux-gnu
> >  bootstrap + regtest c/c++ - powerpc64le-unknown-linux-gnu,
> >  aarch64-unknown-linux-gnu
> >
> >  The following backends also implement this pragma so ought to be 
> > tested:
> >  arm
> >  nios2
> >  s390
> >
> >  I am not able to test those directly. I did add coverage to their 
> > testsuites
> >  (basically, adding -save-temps to any existing test, causes it to test 
> > the
> >  pragma in preprocess-only mode.) Then, I verified on x86_64 with a 
> > cross
> >  compiler, that the modified testcases fail before the patch and pass
> >  afterwards. nios2 is an exception, it does not set any libcpp macros 
> > when
> >  handling the pragma, so there is nothing to test, but I did verify that
> >  processing the pragma in preprocess-only mode does not cause any 
> > problems.
> >  The cross compilers tested were targets arm-unknown-linux-gnueabi,
> >  

Re: [PATCH V6] RISC-V: Enable vec_int testsuite for RVV VLA vectorization

2023-09-12 Thread Robin Dapp via Gcc-patches
> Most (all?) of those are due to:
> f951: Warning: command-line option '-Wno-psabi' is valid for 
> C/C++/D/LTO/ObjC/ObjC++ but not for Fortran
> so no real bug.

When pushing this, I'd take the liberty of enabling the recently merged vector
ABI so we don't require -Wno-psabi anymore.  All Fortran FAILs disappear and
nothing else changes.

--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -11166,12 +11166,12 @@ proc check_vect_support_and_set_flags { } {
 } elseif [istarget riscv64-*-*] {
if [check_effective_target_riscv_vector_hw] {
lappend DEFAULT_VECTCFLAGS "--param" 
"riscv-autovec-preference=scalable"
-   lappend DEFAULT_VECTCFLAGS "-Wno-psabi"
+   lappend DEFAULT_VECTCFLAGS "--param" "riscv-vector-abi"
set dg-do-what-default run
} else {
lappend DEFAULT_VECTCFLAGS "-march=rv64gcv_zvfh" "-mabi=lp64d"
lappend DEFAULT_VECTCFLAGS "--param" 
"riscv-autovec-preference=scalable"
-   lappend DEFAULT_VECTCFLAGS "-Wno-psabi"
+   lappend DEFAULT_VECTCFLAGS "--param" "riscv-vector-abi"
set dg-do-what-default compile
}
 } else {

Regards
 Robin



Re: [PATCH] ggc, jit: forcibly clear GTY roots in jit

2023-09-12 Thread Antoni Boucher via Gcc-patches
I added it to bugzilla here:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111396

Since this only reproduces part of the issue, please let me test again
with rustc_codegen_gcc after adding the missing fix.

I confirmed that the fix is in
https://github.com/antoyo/gcc/commit/9d5b6b20efa20825926196759d50706a604c64a8
so you might as well include all of this (except the linetable
condition in toplev.cc).

On Tue, 2023-09-12 at 14:38 -0400, David Malcolm wrote:
> On Tue, 2023-09-12 at 13:36 -0400, Antoni Boucher wrote:
> > In the mean time, here's a (Rust) reproducer for the issue:
> > 
> > fn main() {
> >     for _ in 0..5 {
> >     let context = Context::default();
> >     context.add_command_line_option("-flto");
> >    
> > context.set_optimization_level(OptimizationLevel::Aggressive);
> >     context.add_driver_option("-nostdlib");
> > 
> >     let int_type = context.new_type::();
> > 
> >     let function = context.new_function(None,
> > FunctionType::Exported, int_type, &[], "main", false);
> >     let block = function.new_block("start");
> >     let value = context.new_rvalue_from_int(int_type, 42);
> >     block.end_with_return(None, value);
> > 
> >     context.compile_to_file(OutputKind::Executable, "my_exe");
> >     }
> > }
> 
> Can we get this in bugzilla please?  If you generate a .c version of
> the context (via gcc_jit_context_dump_reproducer_to_file) I can try
> to
> debug it.
> 
> Thanks
> Dave
> 



[V2] RISC-V: Replace not + bitwise_imm with li + bitwise_not

2023-09-12 Thread Jivan Hakobyan via Gcc-patches
In the case when we have C code like this

int foo (int a) {
   return 100 & ~a;
}

GCC generates the following instruction sequence

foo:
 not a0,a0
 andia0,a0,100
 ret

This patch replaces that with this sequence
foo:
 li a5,100
 andn a0,a5,a0
 ret

The profitability comes from an out-of-order processor being able to
issue the "li a5, 100" at any time after it's fetched while "not a0, a0" has
to wait until any prior setter of a0 has reached completion.


gcc/ChangeLog:
* config/riscv/bitmanip.md (*_not_const): New split
pattern.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/zbb-andn-orn-01.c: New test.
* gcc.target/riscv/zbb-andn-orn-02.c: Likewise.


-- 
With the best regards
Jivan Hakobyan
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 0d126a8ece54aefba66a07690d87bb54c04d1f93..0f45bad14d04b6e891a764cf115e1fadbbb2200b 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -215,6 +215,18 @@
   [(set_attr "type" "bitmanip")
(set_attr "mode" "")])
 
+(define_insn_and_split "*_not_const"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (bitmanip_bitwise:X (not:X (match_operand:X 1 "register_operand" "r"))
+  (match_operand:X 2 "const_arith_operand" "I")))
+  (clobber (match_scratch:X 3 "="))]
+  "(TARGET_ZBB || TARGET_ZBKB) && !TARGET_ZCB
+   && !optimize_function_for_size_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 3)))])
+
 ;; '(a >= 0) ? b : 0' is emitted branchless (from if-conversion).  Without a
 ;; bit of extra help for combine (i.e., the below split), we end up emitting
 ;; not/srai/and instead of combining the not into an andn.
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-01.c b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-01.c
new file mode 100644
index ..f9f32227bd58336dd6e0049ad324208b74940420
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-01.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-g" "-Oz" "-Os" } } */
+
+int foo1(int rs1)
+{
+  return 100 & ~rs1;
+}
+
+int foo2(int rs1)
+{
+  return 100 | ~rs1;
+}
+
+/* { dg-final { scan-assembler-times "andn\t" 1 } } */
+/* { dg-final { scan-assembler-times "orn\t" 1 } } */
+/* { dg-final { scan-assembler-times "li\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-02.c b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-02.c
new file mode 100644
index ..112c0fa968eb6047bad9b196e6afd6aab66f527f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-02.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-g" "-Oz" "-Os" } } */
+
+int foo1(int rs1)
+{
+  return 100 & ~rs1;
+}
+
+int foo2(int rs1)
+{
+  return 100 | ~rs1;
+}
+
+/* { dg-final { scan-assembler-times "andn\t" 1 } } */
+/* { dg-final { scan-assembler-times "orn\t" 1 } } */
+/* { dg-final { scan-assembler-times "li\t" 2 } } */


[PATCH] check_GNU_style.py: Skip .md square bracket linting

2023-09-12 Thread Patrick O'Neill
This testcase causes lots of false-positives for machine description files.

contrib/ChangeLog:

* check_GNU_style_lib.py: Skip machine description file bracket linting.

Signed-off-by: Patrick O'Neill 
---
 contrib/check_GNU_style_lib.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/contrib/check_GNU_style_lib.py b/contrib/check_GNU_style_lib.py
index 94a742941cf..5096b1333f3 100755
--- a/contrib/check_GNU_style_lib.py
+++ b/contrib/check_GNU_style_lib.py
@@ -182,6 +182,9 @@ class SquareBracketCheck:
 self.re = re.compile('\w\s+(\[)')

 def check(self, filename, lineno, line):
+if filename.endswith('.md'):
+return None
+
 m = self.re.search(line)
 if m != None:
 return CheckError(filename, lineno,
--
2.34.1



Re: [committed] libstdc++: Format Python code according to PEP8

2023-09-12 Thread Eric Gallager via Gcc-patches
On Tue, Sep 12, 2023 at 7:46 AM Jonathan Wakely via Gcc-patches
 wrote:
>
> Tested x86_64-linux. Pushed to trunk.
>
> -- >8 --
>
> These files were filtered through autopep8 to reformat them more
> conventionally.
>

Thanks for this; I'm wondering if it might be worthwhile to do
likewise for other python scripts elsewhere in the repository? e.g. in
contrib/

> libstdc++-v3/ChangeLog:
>
> * python/libstdcxx/v6/printers.py: Reformat.
> * python/libstdcxx/v6/xmethods.py: Likewise.
> ---
>  libstdc++-v3/python/libstdcxx/v6/printers.py | 651 +++
>  libstdc++-v3/python/libstdcxx/v6/xmethods.py |  58 +-
>  2 files changed, 446 insertions(+), 263 deletions(-)
>
> diff --git a/libstdc++-v3/python/libstdcxx/v6/printers.py 
> b/libstdc++-v3/python/libstdcxx/v6/printers.py
> index 37a447b514b..c0056de2565 100644
> --- a/libstdc++-v3/python/libstdcxx/v6/printers.py
> +++ b/libstdc++-v3/python/libstdcxx/v6/printers.py
> @@ -18,10 +18,12 @@
>  import gdb
>  import itertools
>  import re
> -import sys, os, errno
> +import sys
> +import os
> +import errno
>  import datetime
>
> -### Python 2 + Python 3 compatibility code
> +# Python 2 + Python 3 compatibility code
>
>  # Resources about compatibility:
>  #
> @@ -38,7 +40,7 @@ import datetime
>  # 
>
>  if sys.version_info[0] > 2:
> -### Python 3 stuff
> +# Python 3 stuff
>  Iterator = object
>  # Python 3 folds these into the normal functions.
>  imap = map
> @@ -47,7 +49,7 @@ if sys.version_info[0] > 2:
>  long = int
>  _utc_timezone = datetime.timezone.utc
>  else:
> -### Python 2 stuff
> +# Python 2 stuff
>  class Iterator:
>  """Compatibility mixin for iterators
>
> @@ -98,6 +100,8 @@ except ImportError:
>  # Starting with the type ORIG, search for the member type NAME.  This
>  # handles searching upward through superclasses.  This is needed to
>  # work around http://sourceware.org/bugzilla/show_bug.cgi?id=13615.
> +
> +
>  def find_type(orig, name):
>  typ = orig.strip_typedefs()
>  while True:
> @@ -116,8 +120,10 @@ def find_type(orig, name):
>  else:
>  raise ValueError("Cannot find type %s::%s" % (str(orig), name))
>
> +
>  _versioned_namespace = '__8::'
>
> +
>  def lookup_templ_spec(templ, *args):
>  """
>  Lookup template specialization templ
> @@ -139,6 +145,8 @@ def lookup_templ_spec(templ, *args):
>
>  # Use this to find container node types instead of find_type,
>  # see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91997 for details.
> +
> +
>  def lookup_node_type(nodename, containertype):
>  """
>  Lookup specialization of template NODENAME corresponding to 
> CONTAINERTYPE.
> @@ -168,6 +176,7 @@ def lookup_node_type(nodename, containertype):
>  pass
>  return None
>
> +
>  def is_member_of_namespace(typ, *namespaces):
>  """
>  Test whether a type is a member of one of the specified namespaces.
> @@ -181,6 +190,7 @@ def is_member_of_namespace(typ, *namespaces):
>  return True
>  return False
>
> +
>  def is_specialization_of(x, template_name):
>  """
>  Test whether a type is a specialization of the named class template.
> @@ -195,12 +205,14 @@ def is_specialization_of(x, template_name):
>  return re.match('^std::(%s)?%s<.*>$' % (_versioned_namespace, 
> template_name), x) is not None
>  return re.match('^std::%s<.*>$' % template_name, x) is not None
>
> +
>  def strip_versioned_namespace(typename):
>  global _versioned_namespace
>  if _versioned_namespace:
>  return typename.replace(_versioned_namespace, '')
>  return typename
>
> +
>  def strip_inline_namespaces(type_str):
>  "Remove known inline namespaces from the canonical name of a type."
>  type_str = strip_versioned_namespace(type_str)
> @@ -212,6 +224,7 @@ def strip_inline_namespaces(type_str):
>  type_str = type_str.replace(fs_ns+'v1::', fs_ns)
>  return type_str
>
> +
>  def get_template_arg_list(type_obj):
>  "Return a type's template arguments as a list"
>  n = 0
> @@ -223,6 +236,7 @@ def get_template_arg_list(type_obj):
>  return template_args
>  n += 1
>
> +
>  class SmartPtrIterator(Iterator):
>  "An iterator for smart pointer types with a single 'child' value"
>
> @@ -238,28 +252,29 @@ class SmartPtrIterator(Iterator):
>  self.val, val = None, self.val
>  return ('get()', val)
>
> +
>  class SharedPointerPrinter:
>  "Print a shared_ptr, weak_ptr, atomic, or atomic"
>
> -def __init__ (self, typename, val):
> +def __init__(self, typename, val):
>  self.typename = strip_versioned_namespace(typename)
>  self.val = val
>  self.pointer = val['_M_ptr']
>
> -def children (self):
> +def children(self):
>  return SmartPtrIterator(self.pointer)
>
>  # Return the _Sp_counted_base<>* that holds the refcounts.
> -   

Re: [PATCH] ggc, jit: forcibly clear GTY roots in jit

2023-09-12 Thread David Malcolm via Gcc-patches
On Tue, 2023-09-12 at 13:36 -0400, Antoni Boucher wrote:
> In the mean time, here's a (Rust) reproducer for the issue:
> 
> fn main() {
>     for _ in 0..5 {
>     let context = Context::default();
>     context.add_command_line_option("-flto");
>    
> context.set_optimization_level(OptimizationLevel::Aggressive);
>     context.add_driver_option("-nostdlib");
> 
>     let int_type = context.new_type::();
> 
>     let function = context.new_function(None,
> FunctionType::Exported, int_type, &[], "main", false);
>     let block = function.new_block("start");
>     let value = context.new_rvalue_from_int(int_type, 42);
>     block.end_with_return(None, value);
> 
>     context.compile_to_file(OutputKind::Executable, "my_exe");
>     }
> }

Can we get this in bugzilla please?  If you generate a .c version of
the context (via gcc_jit_context_dump_reproducer_to_file) I can try to
debug it.

Thanks
Dave



Re: [pushed] c++: __integer_pack with class argument [PR111357]

2023-09-12 Thread Jakub Jelinek via Gcc-patches
On Tue, Sep 12, 2023 at 01:34:43PM -0400, Marek Polacek via Gcc-patches wrote:
> On Tue, Sep 12, 2023 at 01:27:44PM -0400, Jason Merrill via Gcc-patches wrote:
> > Tested x86_64-pc-linux-gnu, applying to trunk.
> > 
> > -- 8< --
> > 
> > The argument might not already be an integer.
> > 
> > PR c++/111357
> > 
> > gcc/cp/ChangeLog:
> > 
> > * pt.cc (expand_integer_pack): Convert argument to int.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/ext/integer-pack7.C: New test.
> > ---
> >  gcc/cp/pt.cc |  2 ++
> >  gcc/testsuite/g++.dg/ext/integer-pack7.C | 38 
> >  2 files changed, 40 insertions(+)
> >  create mode 100644 gcc/testsuite/g++.dg/ext/integer-pack7.C
> > 
> > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > index 838179d5fe3..b583c11eb99 100644
> > --- a/gcc/cp/pt.cc
> > +++ b/gcc/cp/pt.cc
> > @@ -3793,6 +3793,8 @@ expand_integer_pack (tree call, tree args, 
> > tsubst_flags_t complain,
> >  }
> >else
> >  {
> > +  hi = perform_implicit_conversion_flags (integer_type_node, hi, 
> > complain,
> > + LOOKUP_IMPLICIT);
> 
> FWIW, we have perform_implicit_conversion for this.

Is it correct to convert exactly to integer_type_node though?
Consider
#include 

using std::integer_sequence;
using std::make_integer_sequence;

template
void g(integer_sequence)
{}

template
struct c1
{
  static constexpr int value = 1;
  constexpr operator int() { return value; } 
  constexpr operator long() { return value + 1; }
};
template
struct R
{
using S = make_integer_sequence{}>;

R() noexcept(noexcept(g(S(
{}
};
int main()
{
R();
}
Shouldn't that invoke c1{}.operator long() rather than operator int()?
I thought the conversion was supposed to be done a few lines earlier,
instead of doing
  CALL_EXPR_ARG (call, 0) = hi;
do
  CALL_EXPR_ARG (call, 0)
= perform_implicit_conversion_flags (TREE_TYPE (ohi), hi,
 complain, LOOKUP_IMPLICIT);
or tsubst that TREE_TYPE (ohi) as well?  I.e. convert to type of the
template parameter.

Jakub



Re: [PATCH] ggc, jit: forcibly clear GTY roots in jit

2023-09-12 Thread Antoni Boucher via Gcc-patches
In the mean time, here's a (Rust) reproducer for the issue:

fn main() {
for _ in 0..5 {
let context = Context::default();
context.add_command_line_option("-flto");
context.set_optimization_level(OptimizationLevel::Aggressive);
context.add_driver_option("-nostdlib");

let int_type = context.new_type::();

let function = context.new_function(None,
FunctionType::Exported, int_type, &[], "main", false);
let block = function.new_block("start");
let value = context.new_rvalue_from_int(int_type, 42);
block.end_with_return(None, value);

context.compile_to_file(OutputKind::Executable, "my_exe");
}
}

On Tue, 2023-09-12 at 12:00 -0400, Antoni Boucher via Jit wrote:
> It seems to not be enough to fix the issue.
> Let me find out what's missing from my patch.
> 
> On Tue, 2023-09-12 at 11:35 +0200, Richard Biener via Jit wrote:
> > On Wed, Sep 6, 2023 at 3:41 PM David Malcolm via Gcc-patches
> >  wrote:
> > > 
> > > As part of Antoyo's work on supporting LTO in rustc_codegen_gcc,
> > > he
> > > noticed an ICE inside libgccjit when compiling certain rust
> > > files.
> > > 
> > > Debugging libgccjit showed that outdated information from a
> > > previous
> > > in-memory compile was referring to ad-hoc locations in the
> > > previous
> > > compile's line_table.
> > > 
> > > The issue turned out to be the function decls in
> > > internal_fn_fnspec_array
> > > from the previous compile keeping alive the symtab nodes for
> > > these
> > > functions, and from this finding other functions in the previous
> > > compile, walking their CFGs, and finding ad-hoc data pointers in
> > > an
> > > edge
> > > with a location_t using ad-hoc data from the previous line_table
> > > instance, and thus a use-after-free ICE attempting to use this
> > > ad-
> > > hoc
> > > data.
> > > 
> > > Previously in toplev::finalize we've fixed global state
> > > "piecemeal"
> > > by
> > > calling out to individual source_name_cc_finalize functions. 
> > > However,
> > > it occurred to me that we have run-time information on where the
> > > GTY-marked pointers are.
> > > 
> > > Hence this patch takes something of a "big hammer" approach by
> > > adding a
> > > new ggc_common_finalize that walks the GC roots, zeroing all of
> > > the
> > > pointers.  I stepped through this in the debugger and observed
> > > that, in
> > > particular, this correctly zeroes the internal_fn_fnspec_array at
> > > the end
> > > of a libgccjit compile.  Antoyo reports that this fixes the ICE
> > > for
> > > him.
> > > Doing so uncovered an ICE with libgccjit in dwarf2cfi.cc due to
> > > reuse of
> > > global variables from the previous compile, which this patch also
> > > fixes.
> > > 
> > > I noticed that in ggc_mark_roots when clearing deletable roots we
> > > only
> > > clear the initial element in each gcc_root_tab_t.  This looks
> > > like
> > > a
> > > latent bug to me, which the patch fixes.  That said, there don't
> > > seem to
> > > be any deletable roots where the number of elements != 1.
> > > 
> > > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> > > 
> > > OK for trunk?
> > 
> > OK.
> > 
> > Thanks,
> > Richard.
> > 
> > > Thanks
> > > Dave
> > > 
> > > gcc/ChangeLog:
> > >     * dwarf2cfi.cc (dwarf2cfi_cc_finalize): New.
> > >     * dwarf2out.h (dwarf2cfi_cc_finalize): New decl.
> > >     * ggc-common.cc (ggc_mark_roots): Multiply by rti->nelt
> > > when
> > >     clearing the deletable gcc_root_tab_t.
> > >     (ggc_common_finalize): New.
> > >     * ggc.h (ggc_common_finalize): New decl.
> > >     * toplev.cc (toplev::finalize): Call
> > > dwarf2cfi_cc_finalize
> > > and
> > >     ggc_common_finalize.
> > > ---
> > >  gcc/dwarf2cfi.cc  |  9 +
> > >  gcc/dwarf2out.h   |  1 +
> > >  gcc/ggc-common.cc | 23 ++-
> > >  gcc/ggc.h |  2 ++
> > >  gcc/toplev.cc |  3 +++
> > >  5 files changed, 37 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
> > > index ddc728f4ad00..f1777c0a4cf1 100644
> > > --- a/gcc/dwarf2cfi.cc
> > > +++ b/gcc/dwarf2cfi.cc
> > > @@ -3822,4 +3822,13 @@ make_pass_dwarf2_frame (gcc::context
> > > *ctxt)
> > >    return new pass_dwarf2_frame (ctxt);
> > >  }
> > > 
> > > +void dwarf2cfi_cc_finalize ()
> > > +{
> > > +  add_cfi_insn = NULL;
> > > +  add_cfi_vec = NULL;
> > > +  cur_trace = NULL;
> > > +  cur_row = NULL;
> > > +  cur_cfa = NULL;
> > > +}
> > > +
> > >  #include "gt-dwarf2cfi.h"
> > > diff --git a/gcc/dwarf2out.h b/gcc/dwarf2out.h
> > > index 870b56a6a372..61a996050ff9 100644
> > > --- a/gcc/dwarf2out.h
> > > +++ b/gcc/dwarf2out.h
> > > @@ -419,6 +419,7 @@ struct fixed_point_type_info
> > >  } scale_factor;
> > >  };
> > > 
> > > +void dwarf2cfi_cc_finalize (void);
> > >  void dwarf2out_cc_finalize (void);
> > > 
> > >  /* Some DWARF internals are exposed for the needs of DWARF-based
> > > debug
> > > diff --git 

Re: [pushed] c++: __integer_pack with class argument [PR111357]

2023-09-12 Thread Marek Polacek via Gcc-patches
On Tue, Sep 12, 2023 at 01:27:44PM -0400, Jason Merrill via Gcc-patches wrote:
> Tested x86_64-pc-linux-gnu, applying to trunk.
> 
> -- 8< --
> 
> The argument might not already be an integer.
> 
>   PR c++/111357
> 
> gcc/cp/ChangeLog:
> 
>   * pt.cc (expand_integer_pack): Convert argument to int.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/ext/integer-pack7.C: New test.
> ---
>  gcc/cp/pt.cc |  2 ++
>  gcc/testsuite/g++.dg/ext/integer-pack7.C | 38 
>  2 files changed, 40 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/ext/integer-pack7.C
> 
> diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> index 838179d5fe3..b583c11eb99 100644
> --- a/gcc/cp/pt.cc
> +++ b/gcc/cp/pt.cc
> @@ -3793,6 +3793,8 @@ expand_integer_pack (tree call, tree args, 
> tsubst_flags_t complain,
>  }
>else
>  {
> +  hi = perform_implicit_conversion_flags (integer_type_node, hi, 
> complain,
> +   LOOKUP_IMPLICIT);

FWIW, we have perform_implicit_conversion for this.

Marek



[PATCH RFC] diagnostic: add permerror variants with opt

2023-09-12 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu.  Does this approach make sense to you?  Or do you
have another idea?

Perhaps the warn_system_headers adjustment should also be part of this?

-- 8< --

In the discussion of promoting some pedwarns to be errors by default, rather
than move them all into -fpermissive it seems to me to make sense to follow
the -Wnarrowing pattern of turning pedantic_errors on by default for them
like I've previously done for -Wnarrowing.  This way will also work with
-fpermissive, but users can still use -Wno-error=narrowing to downgrade that
specific diagnostic rather than everything affected by -fpermissive.

gcc/ChangeLog:

* diagnostic.cc (permerror): Add new overloads.
* diagnostic-core.h (permerror): Declare them.

gcc/cp/ChangeLog:

* typeck2.cc (check_narrowing): Use permerror.
---
 gcc/diagnostic-core.h |  3 +++
 gcc/cp/typeck2.cc |  9 +++--
 gcc/diagnostic.cc | 39 +++
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/gcc/diagnostic-core.h b/gcc/diagnostic-core.h
index c9e27fd2e6e..2d9909f18bd 100644
--- a/gcc/diagnostic-core.h
+++ b/gcc/diagnostic-core.h
@@ -105,6 +105,9 @@ extern bool pedwarn (rich_location *, int, const char *, 
...)
 extern bool permerror (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern bool permerror (rich_location *, const char *,
   ...) ATTRIBUTE_GCC_DIAG(2,3);
+extern bool permerror (location_t, int, const char *, ...) 
ATTRIBUTE_GCC_DIAG(3,4);
+extern bool permerror (rich_location *, int, const char *,
+  ...) ATTRIBUTE_GCC_DIAG(3,4);
 extern void sorry (const char *, ...) ATTRIBUTE_GCC_DIAG(1,2);
 extern void sorry_at (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void inform (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc
index cd1ea045720..1cbab70f513 100644
--- a/gcc/cp/typeck2.cc
+++ b/gcc/cp/typeck2.cc
@@ -1109,15 +1109,12 @@ check_narrowing (tree type, tree init, tsubst_flags_t 
complain,
   else if (complain & tf_error)
{
  int savederrorcount = errorcount;
- if (!flag_permissive)
-   global_dc->pedantic_errors = 1;
  auto s = make_temp_override (global_dc->dc_warn_system_headers, true);
- pedwarn (loc, OPT_Wnarrowing,
-  "narrowing conversion of %qE from %qH to %qI",
-  init, ftype, type);
+ permerror (loc, OPT_Wnarrowing,
+"narrowing conversion of %qE from %qH to %qI",
+init, ftype, type);
  if (errorcount == savederrorcount)
ok = true;
- global_dc->pedantic_errors = flag_pedantic_errors;
}
 }
 
diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index 65c0cfbf11a..4195a01aa09 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -2054,6 +2054,45 @@ permerror (rich_location *richloc, const char *gmsgid, 
...)
   return ret;
 }
 
+/* Similar to the above, but controlled by a flag other than -fpermissive.
+   As above, an error by default or a warning with -fpermissive, but this
+   diagnostic can also be downgraded by -Wno-error=opt.  */
+
+bool
+permerror (location_t location, int opt, const char *gmsgid, ...)
+{
+  auto_diagnostic_group d;
+  va_list ap;
+  va_start (ap, gmsgid);
+  rich_location richloc (line_table, location);
+  bool pe = global_dc->pedantic_errors;
+  if (!global_dc->permissive)
+global_dc->pedantic_errors = true;
+  bool ret = diagnostic_impl (, NULL, opt, gmsgid, , DK_PEDWARN);
+  global_dc->pedantic_errors = pe;
+  va_end (ap);
+  return ret;
+}
+
+/* Same as "permerror" above, but at RICHLOC.  */
+
+bool
+permerror (rich_location *richloc, int opt, const char *gmsgid, ...)
+{
+  gcc_assert (richloc);
+
+  auto_diagnostic_group d;
+  va_list ap;
+  va_start (ap, gmsgid);
+  bool pe = global_dc->pedantic_errors;
+  if (!global_dc->permissive)
+global_dc->pedantic_errors = true;
+  bool ret = diagnostic_impl (richloc, NULL, opt, gmsgid, , DK_PEDWARN);
+  global_dc->pedantic_errors = pe;
+  va_end (ap);
+  return ret;
+}
+
 /* A hard error: the code is definitely ill-formed, and an object file
will not be produced.  */
 void

base-commit: f73d2d61a5926f42e9e5d771d23868787ef9d800
-- 
2.39.3



[pushed] c++: __integer_pack with class argument [PR111357]

2023-09-12 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

The argument might not already be an integer.

PR c++/111357

gcc/cp/ChangeLog:

* pt.cc (expand_integer_pack): Convert argument to int.

gcc/testsuite/ChangeLog:

* g++.dg/ext/integer-pack7.C: New test.
---
 gcc/cp/pt.cc |  2 ++
 gcc/testsuite/g++.dg/ext/integer-pack7.C | 38 
 2 files changed, 40 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/integer-pack7.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 838179d5fe3..b583c11eb99 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -3793,6 +3793,8 @@ expand_integer_pack (tree call, tree args, tsubst_flags_t 
complain,
 }
   else
 {
+  hi = perform_implicit_conversion_flags (integer_type_node, hi, complain,
+ LOOKUP_IMPLICIT);
   hi = instantiate_non_dependent_expr (hi, complain);
   hi = cxx_constant_value (hi, complain);
   int len = valid_constant_size_p (hi) ? tree_to_shwi (hi) : -1;
diff --git a/gcc/testsuite/g++.dg/ext/integer-pack7.C 
b/gcc/testsuite/g++.dg/ext/integer-pack7.C
new file mode 100644
index 000..95b1195bef4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/integer-pack7.C
@@ -0,0 +1,38 @@
+// PR c++/111357
+// { dg-do compile { target c++11 } }
+
+namespace std {
+  template
+struct integer_sequence
+{ };
+
+  template
+using make_integer_sequence
+  = integer_sequence<_Tp, __integer_pack(_Num)...>;
+}
+
+using std::integer_sequence;
+using std::make_integer_sequence;
+
+template
+void g(integer_sequence)
+{}
+
+template
+struct c1
+{
+  static constexpr int value = 1;
+  constexpr operator int() { return value; }
+};
+template
+struct R
+{
+   using S = make_integer_sequence{}>;
+
+   R() noexcept(noexcept(g(S(
+   {}
+};
+int main()
+{
+R();
+}

base-commit: ea5abbb263315e558c876b50c9371b90ddd5e028
-- 
2.39.3



Re: [PATCH] small _BitInt tweaks

2023-09-12 Thread Joseph Myers
On Tue, 12 Sep 2023, Jakub Jelinek via Gcc-patches wrote:

> And by ensuring we never create 1-bit signed BITINT_TYPE e.g. the backends
> don't need to worry about them.
> 
> But I admit I don't feel strongly about that.
> 
> Joseph, what do you think about this?

I think it's appropriate to avoid 1-bit signed BITINT_TYPE consistently.

-- 
Joseph S. Myers
jos...@codesourcery.com


[pushed] c++: ICE with -fno-exceptions and array init [PR107198]

2023-09-12 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

The removed line no longer has an effect on anew5.C error recovery, and
removing it improves error recovery for this testcase.

PR c++/107198

gcc/cp/ChangeLog:

* typeck2.cc (process_init_constructor_array): Use VEC_INIT_EXPR
regardless of seen_error.

gcc/testsuite/ChangeLog:

* g++.dg/eh/no-exceptions1.C: New test.
---
 gcc/cp/typeck2.cc|  1 -
 gcc/testsuite/g++.dg/eh/no-exceptions1.C | 19 +++
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/eh/no-exceptions1.C

diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc
index 582a73bb053..cd1ea045720 100644
--- a/gcc/cp/typeck2.cc
+++ b/gcc/cp/typeck2.cc
@@ -1683,7 +1683,6 @@ process_init_constructor_array (tree type, tree init, int 
nested, int flags,
if (next)
  {
if (next != error_mark_node
-   && ! seen_error () // Improves error-recovery on anew5.C.
&& (initializer_constant_valid_p (next, TREE_TYPE (next))
!= null_pointer_node))
  {
diff --git a/gcc/testsuite/g++.dg/eh/no-exceptions1.C 
b/gcc/testsuite/g++.dg/eh/no-exceptions1.C
new file mode 100644
index 000..4b77064c646
--- /dev/null
+++ b/gcc/testsuite/g++.dg/eh/no-exceptions1.C
@@ -0,0 +1,19 @@
+// PR c++/107198
+// { dg-additional-options -fno-exceptions }
+
+struct A {
+  A() { throw 0; } // { dg-error disabled }
+  A(int i) { throw i; }
+  A(const A&) { throw 10; }
+};
+
+void try_idx (int i)
+{
+  int t = 10;
+  try {
+struct X {
+  A e1[2], e2;
+}
+x2[3] = { { 1, 2, 3 }, { 4, 5, 6 } };
+  } catch (int x) { t = x; }   // { dg-prune-output "not declared" }
+}

base-commit: 27e2e7c93e48bcbb63877cc5964fae8dba47d706
-- 
2.39.3



Re: [PATCH v2 08/11] Native complex ops: Add explicit vector of complex

2023-09-12 Thread Joseph Myers
On Tue, 12 Sep 2023, Sylvain Noiry via Gcc-patches wrote:

> Summary:
> Allow the creation and usage of builtins vectors of complex
> in C, using __attribute__ ((vector_size ()))

If you're adding a new language feature like this, you need to update 
extend.texi to explain the valid uses of the attribute for complex types, 
and (under "Vector Extensions") the valid uses of the resulting vectors.  
You also need to add testcases to the testsuite for such vectors - both 
execution tests covering valid uses of the vectors, and tests that invalid 
declarations or uses of such vectors (uses with any operator, or other 
operand to such operator, that aren't valid) are properly rejected - go 
through all cases of operators, with one or two complex vector operands, 
of the same or different types, and with different choices for what type 
the other operand might be when one has complex vector type, and make sure 
they are all properly tested and do have the desired and documented 
semantics.

If the intended semantics are the same for C and C++, the tests should be 
c-c++-common tests.  Any cases where the intended semantics are different 
will need separate tests for each language or appropriately conditional 
test assertions in c-c++-common.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: RFC: RISC-V sign extension dead code elimination

2023-09-12 Thread Vineet Gupta

On 8/29/23 08:40, Joern Rennecke wrote:

In the patch call we talked about sign extsnsion elimination, so I dug
up this patch set that I did a while ago.  It is still lacking some
documentation and testing in a more recent base version;
I only adjusted the common.opt part context for the patch to apply.


Attached is the updated patch - prev one fails to apply cleanly on trunk 
and also needs some adj for inverted_post_order_compute API change.


Thx,
-VineetFrom 246ca9a572f49c3e2f2076d5a82dbaa5bb9def52 Mon Sep 17 00:00:00 2001
From: Joern Rennecke 
Date: Mon, 11 Sep 2023 14:13:58 -0700
Subject: [PATCH] DCE extraneous extension pass

---
 gcc/Makefile.in  |   1 +
 gcc/common.opt   |   8 +
 gcc/config/riscv/bitmanip.md |  14 +
 gcc/df-scan.cc   |   3 +-
 gcc/df.h |   1 +
 gcc/ext-dce.cc   | 546 +++
 gcc/passes.def   |   1 +
 gcc/tree-pass.h  |   1 +
 8 files changed, 573 insertions(+), 2 deletions(-)
 create mode 100644 gcc/ext-dce.cc

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 6d608db4dd24..cc13d1904ed7 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1429,6 +1429,7 @@ OBJS = \
 	explow.o \
 	expmed.o \
 	expr.o \
+	ext-dce.o \
 	fibonacci_heap.o \
 	file-prefix-map.o \
 	final.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index f137a1f81ac8..8a5d3fece581 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3716,4 +3716,12 @@ fipa-ra
 Common Var(flag_ipa_ra) Optimization
 Use caller save register across calls if possible.
 
+fext-dce
+Common Var(flag_ext_dce, 1) Optimization Init(0)
+Perform dead code elimination on zero and sign extensions with special dataflow analysis.
+
+fext-dce-pre
+Common Var(flag_ext_dce, 2)
+Perform dead code elimination on zero and sign extensions with special dataflow analysis.  Insert extensions on edges for partial redundancy elimination.
+
 ; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 431b32922135..c03f20f59546 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -273,6 +273,20 @@
   [(set_attr "type" "")
(set_attr "mode" "DI")])
 
+;; Combine has a different idea about canonical rtl.
+;; Example: int f (int i) { return (short)i; }
+(define_insn_and_split "*extendhidi_combine"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI
+	  (ashiftrt:SI
+	(subreg:SI (ashift:DI (match_operand:DI 1 "register_operand")
+  (const_int 16)) 0)
+	(const_int 16]
+  "TARGET_ZBB"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (sign_extend:DI (subreg:HI (match_dup 1) 0)))])
+
 (define_insn "*zero_extendhi2_bitmanip"
   [(set (match_operand:GPR 0 "register_operand" "=r,r")
 (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
diff --git a/gcc/df-scan.cc b/gcc/df-scan.cc
index 9515740728c3..87729ab0f44d 100644
--- a/gcc/df-scan.cc
+++ b/gcc/df-scan.cc
@@ -78,7 +78,6 @@ static void df_get_eh_block_artificial_uses (bitmap);
 
 static void df_record_entry_block_defs (bitmap);
 static void df_record_exit_block_uses (bitmap);
-static void df_get_exit_block_use_set (bitmap);
 static void df_get_entry_block_def_set (bitmap);
 static void df_grow_ref_info (struct df_ref_info *, unsigned int);
 static void df_ref_chain_delete_du_chain (df_ref);
@@ -3642,7 +3641,7 @@ df_epilogue_uses_p (unsigned int regno)
 
 /* Set the bit for regs that are considered being used at the exit. */
 
-static void
+void
 df_get_exit_block_use_set (bitmap exit_block_uses)
 {
   unsigned int i;
diff --git a/gcc/df.h b/gcc/df.h
index 402657a7076f..abcbb0977349 100644
--- a/gcc/df.h
+++ b/gcc/df.h
@@ -1091,6 +1091,7 @@ extern bool df_epilogue_uses_p (unsigned int);
 extern void df_set_regs_ever_live (unsigned int, bool);
 extern void df_compute_regs_ever_live (bool);
 extern void df_scan_verify (void);
+extern void df_get_exit_block_use_set (bitmap);
 
 
 /*
diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
new file mode 100644
index ..c285e67c509d
--- /dev/null
+++ b/gcc/ext-dce.cc
@@ -0,0 +1,546 @@
+/* RTL dead zero/sign extension (code) elimination.
+   Copyright (C) 2000-2022 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  

Re: [PATCH][_GLIBCXX_INLINE_VERSION] Fix friend declarations

2023-09-12 Thread Jonathan Wakely via Gcc-patches
On Tue, 12 Sept 2023 at 17:47, Jonathan Wakely  wrote:
>
> On Wed, 23 Aug 2023 at 18:35, François Dumont via Libstdc++
>  wrote:
> >
> > Hi
> >
> > The few tests that are failing in versioned namespace mode are due to
> > those friend declarations.
> >
> > This is a fix proposal even if I considered 2 other options:
> >
> > 1. Make __format::_Arg_store a struct and so do not bother with friend
> > declarations.
> >
> > 2. Consider it as a compiler bug and do nothing. In this case I think we
> > might still need this patch to avoid a non-working format library in
> > versioned namespace mode in gcc 14 if compiler bug is not fixed.
>
> It definitely is a compiler bug, this is PR c++/59256.
>
> Please add a comment to the new macro definition, so we remember to
> remove it when it's not needed:
>
>
> #if _GLIBCXX_INLINE_VERSION
> // Needed because of PR c++/59526
> # define _GLIBCXX_STD_V std::__8
> #else
> # define _GLIBCXX_STD_V std
> #endif
>
>
> OK with that change, thanks.

Actually, are you sure the friend std::basic_format_args declaration
needs to change?

I only see errors for the friend function, not the friend class. So
this seems to fix it:

--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -3437,7 +3437,13 @@ namespace __format

  template
   friend auto
-   std::make_format_args(_Argz&&...) noexcept;
+#if _GLIBCXX_INLINE_VERSION
+   // Needed for PR c++/59526
+   std::__8::
+#else
+   std::
+#endif
+   make_format_args(_Argz&&...) noexcept;

  // For a sufficiently small number of arguments we only store values.
  // basic_format_args can get the types from the _Args pack.




>
>
> >
> > I can also define _GLIBCXX_STD_V at  level to limit impact.
> >
> >  libstdc++: [_GLIBCXX_INLINE_VERSION] Fix  friend declarations
> >
> >  GCC do not consider the inline namespace in friend declarations. We
> > need
> >  to explicit this namespace.
> >
> >  libstdc++-v3/ChangeLog:
> >
> >  * include/bits/c++config (_GLIBCXX_STD_V): New macro giving
> > current
> >  std namespace with optionally the version namespace.
> >  * include/std/format (std::__format::_Arg_store): Use
> > latter on friend
> >  declarations.
> >
> > Tested under versioned mode.
> >
> > Ok to commit ?
> >
> > François



Re: [PATCH][_GLIBCXX_INLINE_VERSION] Fix friend declarations

2023-09-12 Thread Jonathan Wakely via Gcc-patches
On Wed, 23 Aug 2023 at 18:35, François Dumont via Libstdc++
 wrote:
>
> Hi
>
> The few tests that are failing in versioned namespace mode are due to
> those friend declarations.
>
> This is a fix proposal even if I considered 2 other options:
>
> 1. Make __format::_Arg_store a struct and so do not bother with friend
> declarations.
>
> 2. Consider it as a compiler bug and do nothing. In this case I think we
> might still need this patch to avoid a non-working format library in
> versioned namespace mode in gcc 14 if compiler bug is not fixed.

It definitely is a compiler bug, this is PR c++/59256.

Please add a comment to the new macro definition, so we remember to
remove it when it's not needed:


#if _GLIBCXX_INLINE_VERSION
// Needed because of PR c++/59526
# define _GLIBCXX_STD_V std::__8
#else
# define _GLIBCXX_STD_V std
#endif


OK with that change, thanks.




>
> I can also define _GLIBCXX_STD_V at  level to limit impact.
>
>  libstdc++: [_GLIBCXX_INLINE_VERSION] Fix  friend declarations
>
>  GCC do not consider the inline namespace in friend declarations. We
> need
>  to explicit this namespace.
>
>  libstdc++-v3/ChangeLog:
>
>  * include/bits/c++config (_GLIBCXX_STD_V): New macro giving
> current
>  std namespace with optionally the version namespace.
>  * include/std/format (std::__format::_Arg_store): Use
> latter on friend
>  declarations.
>
> Tested under versioned mode.
>
> Ok to commit ?
>
> François



Re: [PATCH 00/19] aarch64: Fix -fstack-protector issue

2023-09-12 Thread Siddhesh Poyarekar

On 2023-09-12 11:25, Richard Sandiford via Gcc-patches wrote:

This series of patches fixes deficiencies in GCC's -fstack-protector
implementation for AArch64 when using dynamically allocated stack space.
This is CVE-2023-4039.  See:



While this is a legitimate missed hardening, I'm not sure if this 
qualifies as a CVE-worthy vulnerability since correct programs won't 
actually be exploitable due to this.  This is essentially the kind of 
thing that the "Security features implemented in GCC" section in the 
proposed security policy[1] describes.


Thanks,
Sid

[1] 
https://inbox.sourceware.org/gcc-patches/ba133293-a7e8-8fe4-e1ba-7129b9e10...@gotplt.org/


[PATCH v1] rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

2023-09-12 Thread Ajit Agarwal via Gcc-patches
This patch removes zero extension from vctzlsbb as it already zero extends.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

For rs6000 target we dont need zero_extend after vctzlsbb as vctzlsbb
already zero extend.

2023-09-12  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/vsx.md (vctzlsbb_zext_): New define_insn.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/altivec-19.C: New testcase.
---
 gcc/config/rs6000/vsx.md  | 17 ++---
 gcc/testsuite/g++.target/powerpc/altivec-19.C | 10 ++
 2 files changed, 24 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/altivec-19.C

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 19abfeb565a..42379409e5f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5846,11 +5846,22 @@
   [(set_attr "type" "vecsimple")])
 
 ;; Vector Count Trailing Zero Least-Significant Bits Byte
-(define_insn "vctzlsbb_"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+(define_insn "vctzlsbb_zext_"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
(unspec:SI
 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
-UNSPEC_VCTZLSBB))]
+UNSPEC_VCTZLSBB)))]
+  "TARGET_P9_VECTOR"
+  "vctzlsbb %0,%1"
+  [(set_attr "type" "vecsimple")])
+
+;; Vector Count Trailing Zero Least-Significant Bits Byte
+(define_insn "vctzlsbb_"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:SI
+ [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
+ UNSPEC_VCTZLSBB))]
   "TARGET_P9_VECTOR"
   "vctzlsbb %0,%1"
   [(set_attr "type" "vecsimple")])
diff --git a/gcc/testsuite/g++.target/powerpc/altivec-19.C 
b/gcc/testsuite/g++.target/powerpc/altivec-19.C
new file mode 100644
index 000..e49e5076af8
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/altivec-19.C
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 " } */ 
+
+#include 
+
+unsigned int foo (vector unsigned char a, vector unsigned char b) {
+  return vec_first_match_or_eos_index (a, b);
+}
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
-- 
2.39.3



[OG13][committed] libgomp, nvptx, amdgcn: parallel reverse offload

2023-09-12 Thread Andrew Stubbs

Here's the same patch, but backported to the OG13 branch.

There was one "difficult" conflict, but after reading around the problem 
I don't think that any actual code changes are required and I've updated 
the comment to explain (see second patch).


Both patches committed to devel/omp/gcc-13.

Andrew

On 12/09/2023 15:27, Andrew Stubbs wrote:

Hi all,

This patch implements parallel execution of OpenMP reverse offload kernels.

The first problem was that GPU device kernels may request reverse 
offload (via the "ancestor" clause) once for each running offload thread 
-- of which there may be thousands -- and the existing implementation 
ran each request serially, whilst blocking all other I/O from that 
device kernel.


The second problem was that the NVPTX plugin runs the reverse offload 
kernel in the context of whichever host thread sees the request first, 
regardless of which kernel originated the request. This is probably 
logically harmless, but may lead to surprising timing when it blocks the 
wrong kernel from exiting until the reverse offload is done. It was also 
only capable of receiving and processing a single request at a time, 
across all running kernels. (GCN did not have these problems.)


Both problems are now solved by making the reverse offload requests 
asynchronous. The host threads still recieve the requests in the same 
way, but instead of running them inline the request is queued for 
execution later in another thread. The requests are then consumed from 
the message passing buffer imediately (allowing I/O to continue, in the 
case of GCN). The device threads that sent requests are still blocked 
waiting for the completion signal, but any other threads may continue as 
usual.


The queued requests are processed by a thread pool created on demand and 
limited by a new environment variable GOMP_REVERSE_OFFLOAD_THREADS. By 
this means reverse offload should become much less of a bottleneck.


In the process of this work I have found and fixed a couple of 
target-specific issues. NVPTX asynchronous streams were independent of 
each other, but still synchronous w.r.t. the default NULL stream. Some 
GCN devices (at least gfx908) seem to have a race condition in the 
message passing system whereby the cache write-back triggered by 
__ATOMIC_RELEASE occurs slower than the atomically written value.


OK for mainline?

Andrew


nvptx: update comment re delayed free

Polling the delayed free is roughly the same as freeing them between
reverse offload kernels.

libgomp/ChangeLog:

* plugin/plugin-nvptx.c (GOMP_OFFLOAD_run): Update comment.

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 176bb983bdc..0cf49719515 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2703,12 +2703,10 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void 
*tgt_vars, void **args)
   a following reverse offload does
   'GOMP_OFFLOAD_page_locked_host_alloc', and that then runs the
   deferred 'cuMemFreeHost's -- which may dead-lock?!
-  TODO: This may need more considerations for the case that
-  different host threads do reverse offload?  We could move
-  'free_host_blocks' into 'aq' (which is separate per reverse
-  offload) instead of global, like
-  'page_locked_host_unregister_blocks', but that doesn't seem the
-  right thing for OpenACC 'async' generally?  */
+  Note: even though the reverse offload kernels are now run in
+  multiple backgroud threads, *this* thread (or one of these
+  threads, anyway) will live the whole time, so polling
+  free_host_blocks should be effective.  */
if (!nvptx_run_deferred_page_locked_host_free ())
  exit (EXIT_FAILURE);
  }
libgomp: parallel reverse offload

Extend OpenMP reverse offload support to allow running the host kernels
on multiple threads.  The device plugin API for reverse offload is now made
non-blocking, meaning that running the host kernel in the wrong device
context is no longer a problem.  The NVPTX message passing interface now
uses a ring buffer aproximately matching GCN.

include/ChangeLog:

* gomp-constants.h (GOMP_VERSION): Bump.

libgomp/ChangeLog:

* config/gcn/target.c (GOMP_target_ext): Add "signal" field.
Fix atomics race condition.
* config/nvptx/libgomp-nvptx.h (REV_OFFLOAD_QUEUE_SIZE): New define.
(struct rev_offload): Implement ring buffer.
* config/nvptx/target.c (GOMP_target_ext): Likewise.
* env.c (initialize_env): Read GOMP_REVERSE_OFFLOAD_THREADS.
* libgomp-plugin.c (GOMP_PLUGIN_target_rev): Replace "aq" parameter
with "signal" and "use_aq".
* libgomp-plugin.h (GOMP_PLUGIN_target_rev): Likewise.
* libgomp.h (gomp_target_rev): Likewise.
* plugin/plugin-gcn.c (process_reverse_offload): Add "signal".

[PATCH] RISC-V: Support cond vmulh.vv and vmulu.vv

2023-09-12 Thread Lehua Ding
This patch adds combine patterns to combine vmulh[u].vv + vcond_mask
to mask vmulh[u].vv. For vmulsu.vv, it can not be produced in midend
currently. We will send another patch to take this issue.

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*cond_3_highpart):
New combine pattern.
* config/riscv/autovec.md (smul3_highpart): Mrege smul and umul.
(3_highpart): Merged pattern.
(umul3_highpart): Mrege smul and umul.
* config/riscv/vector-iterators.md (umul): New iterators.
(UNSPEC_VMULHU): New iterators.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/cond/cond_mulh-1.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_mulh-2.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_mulh_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_mulh_run-2.c: New test.

---
 gcc/config/riscv/autovec-opt.md   | 23 -
 gcc/config/riscv/autovec.md   | 22 ++--
 gcc/config/riscv/vector-iterators.md  |  4 +++
 .../riscv/rvv/autovec/cond/cond_mulh-1.c  | 29 
 .../riscv/rvv/autovec/cond/cond_mulh-2.c  | 30 
 .../riscv/rvv/autovec/cond/cond_mulh_run-1.c  | 32 +
 .../riscv/rvv/autovec/cond/cond_mulh_run-2.c  | 34 +++
 7 files changed, 154 insertions(+), 20 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_mulh-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_mulh-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_mulh_run-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_mulh_run-2.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 0d2721f0b29..552be48bf73 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -970,6 +970,28 @@
 }
  [(set_attr "type" "vnshift")])
 
+;; Combine vmulh.vv/vmulhu.vv + vcond_mask
+(define_insn_and_split "*cond_3_highpart"
+   [(set (match_operand:VFULLI 0 "register_operand")
+(if_then_else:VFULLI
+  (match_operand: 1 "register_operand")
+  (mulh:VFULLI
+(match_operand:VFULLI 2 "register_operand")
+(match_operand:VFULLI 3 "register_operand"))
+  (match_operand:VFULLI 4 "register_operand")))]
+   "TARGET_VECTOR && can_create_pseudo_p ()"
+   "#"
+   "&& 1"
+   [(const_int 0)]
+{
+  insn_code icode = code_for_pred_mulh (, mode);
+  rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
+   gen_int_mode (GET_MODE_NUNITS (mode), Pmode)};
+  riscv_vector::expand_cond_len_binop (icode, ops);
+   DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; 
=
 ;; Combine extend + binop to widen_binop
 ;; 
=
@@ -1172,7 +1194,6 @@
 }
 [(set_attr "type" "vfwmul")])
 
-
 ;; 
=
 ;; Misc combine patterns
 ;; 
=
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index e9dd40af935..b4ac22bb97b 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1569,9 +1569,9 @@
 ;; - vmulhu.vv
 ;; -
 
-(define_insn_and_split "smul3_highpart"
+(define_insn_and_split "3_highpart"
   [(set (match_operand:VFULLI 0 "register_operand")
-(smul_highpart:VFULLI
+(mulh:VFULLI
   (match_operand:VFULLI 1 "register_operand")
   (match_operand:VFULLI 2 "register_operand")))]
   "TARGET_VECTOR && can_create_pseudo_p ()"
@@ -1579,23 +1579,7 @@
   "&& 1"
   [(const_int 0)]
 {
-  insn_code icode = code_for_pred_mulh (UNSPEC_VMULHS, mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
-  DONE;
-}
-[(set_attr "type" "vimul")])
-
-(define_insn_and_split "umul3_highpart"
-  [(set (match_operand:VFULLI 0 "register_operand")
-(umul_highpart:VFULLI
-  (match_operand:VFULLI 1 "register_operand")
-  (match_operand:VFULLI 2 "register_operand")))]
-  "TARGET_VECTOR && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  insn_code icode = code_for_pred_mulh (UNSPEC_VMULHU, mode);
+  insn_code icode = code_for_pred_mulh (, mode);
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
   DONE;
 }
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 2f7f7cbe08c..e70a9bc5c74 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -2354,6 +2354,10 @@
 (define_code_iterator sat_int_plus_binop [ss_plus us_plus])
 (define_code_iterator sat_int_minus_binop [ss_minus us_minus])
 

[PATCH] RISC-V: Support cond vnsrl/vnsra

2023-09-12 Thread Lehua Ding
This patch add combine patterns to combine vnsra.w[vxi] + vcond_mask
to a mask vnsra.w[vxi].

gcc/ChangeLog:

* config/riscv/autovec-opt.md 
(*cond_vtrunc):
New combine pattern.
(*cond_trunc): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift-1.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift-2.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift-3.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift_run-3.c: New test.

---
 gcc/config/riscv/autovec-opt.md   | 46 +++
 .../rvv/autovec/cond/cond_narrow_shift-1.c| 27 +++
 .../rvv/autovec/cond/cond_narrow_shift-2.c| 30 
 .../rvv/autovec/cond/cond_narrow_shift-3.c| 30 
 .../autovec/cond/cond_narrow_shift_run-1.c| 29 
 .../autovec/cond/cond_narrow_shift_run-2.c| 30 
 .../autovec/cond/cond_narrow_shift_run-3.c| 31 +
 7 files changed, 223 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift_run-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift_run-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift_run-3.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index f759525f96b..0d2721f0b29 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -924,6 +924,52 @@
DONE;
 })
 
+;; Combine vnsra + vcond_mask
+(define_insn_and_split 
"*cond_vtrunc"
+  [(set (match_operand: 0 "register_operand")
+ (if_then_else:
+   (match_operand: 1 "register_operand")
+   (truncate:
+ (any_shiftrt:VWEXTI
+   (match_operand:VWEXTI 2 "register_operand")
+  (any_extend:VWEXTI
+ (match_operand: 3 "vector_shift_operand"
+   (match_operand: 4 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  insn_code icode = code_for_pred_narrow (, mode);
+  rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
+   gen_int_mode (GET_MODE_NUNITS (mode), Pmode)};
+  riscv_vector::expand_cond_len_binop (icode, ops);
+  DONE;
+}
+ [(set_attr "type" "vnshift")])
+
+(define_insn_and_split "*cond_trunc"
+  [(set (match_operand: 0 "register_operand")
+ (if_then_else:
+   (match_operand: 1 "register_operand")
+   (truncate:
+ (any_shiftrt:VWEXTI
+   (match_operand:VWEXTI 2 "register_operand")
+  (match_operand: 3 "csr_operand")))
+   (match_operand: 4 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  insn_code icode = code_for_pred_narrow_scalar (, 
mode);
+  rtx ops[] = {operands[0], operands[1], operands[2], gen_lowpart (Pmode, 
operands[3]),
+   operands[4], gen_int_mode (GET_MODE_NUNITS (mode), 
Pmode)};
+  riscv_vector::expand_cond_len_binop (icode, ops);
+  DONE;
+}
+ [(set_attr "type" "vnshift")])
+
 ;; 
=
 ;; Combine extend + binop to widen_binop
 ;; 
=
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift-1.c
new file mode 100644
index 000..d068110a8a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_narrow_shift-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d 
--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include 
+
+#define DEF_LOOP(TYPE1, TYPE2) 
\
+  void __attribute__ ((noipa)) 
\
+  test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, TYPE2 *__restrict a,
  \
+   TYPE1 *__restrict b, int n)\
+  {
\
+for (int i = 0; i < n; ++i)
\
+  r[i] = a[i] > 20 ? (TYPE2) (b[i] >> 3) : r[i];   
\
+  }
+
+#define TEST_ALL(T)
\
+  T (int16_t, int8_t)  

[PATCH] RISC-V: Support cond vfsgnj.vv autovec pattern

2023-09-12 Thread Lehua Ding
This patch add combine patterns to combine vfsgnj.vv + vcond_mask
to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend
currently. We will send another patch to take this issue.

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*copysign_neg): Move.
(*cond_copysign): New combine pattern.
* config/riscv/riscv-v.cc (needs_fp_rounding): Extend.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New test.
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New test.

---
 gcc/config/riscv/autovec-opt.md   | 68 +
 gcc/config/riscv/riscv-v.cc   |  4 +-
 .../rvv/autovec/cond/cond_copysign-run.c  | 99 +++
 .../rvv/autovec/cond/cond_copysign-rv32gcv.c  | 12 +++
 .../rvv/autovec/cond/cond_copysign-rv64gcv.c  | 12 +++
 .../rvv/autovec/cond/cond_copysign-template.h | 81 +++
 .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +
 7 files changed, 349 insertions(+), 20 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 58e80044f1e..f759525f96b 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -609,6 +609,10 @@
(set_attr "mode" "")
(set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))])
 
+;; 
=
+;; Combine op + vmerge to cond_op
+;; 
=
+
 ;; Combine  and vcond_mask generated by midend into cond_len_
 ;; Currently supported operations:
 ;;   abs(FP)
@@ -651,25 +655,6 @@
   DONE;
 })
 
-;; Combine vlmax neg and UNSPEC_VCOPYSIGN
-(define_insn_and_split "*copysign_neg"
-  [(set (match_operand:VF 0 "register_operand")
-(neg:VF
-  (unspec:VF [
-(match_operand:VF 1 "register_operand")
-(match_operand:VF 2 "register_operand")
-  ] UNSPEC_VCOPYSIGN)))]
-  "TARGET_VECTOR && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (mode),
-  riscv_vector::BINARY_OP, operands);
-  DONE;
-}
-[(set_attr "type" "vector")])
-
 ;; Combine sign_extend/zero_extend(vf2) and vcond_mask
 (define_insn_and_split "*cond_"
   [(set (match_operand:VWEXTI 0 "register_operand")
@@ -918,6 +903,27 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine vfsgnj.vv + vcond_mask
+(define_insn_and_split "*cond_copysign"
+   [(set (match_operand:VF 0 "register_operand")
+(if_then_else:VF
+  (match_operand: 1 "register_operand")
+  (unspec:VF
+   [(match_operand:VF 2 "register_operand")
+(match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN)
+  (match_operand:VF 4 "register_operand")))]
+   "TARGET_VECTOR && can_create_pseudo_p ()"
+   "#"
+   "&& 1"
+   [(const_int 0)]
+{
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, mode);
+  rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
+   gen_int_mode (GET_MODE_NUNITS (mode), Pmode)};
+  riscv_vector::expand_cond_len_binop (icode, ops);
+   DONE;
+})
+
 ;; 
=
 ;; Combine extend + binop to widen_binop
 ;; 
=
@@ -1119,3 +1125,27 @@
   DONE;
 }
 [(set_attr "type" "vfwmul")])
+
+
+;; 
=
+;; Misc combine patterns
+;; 
=
+
+;; Combine vlmax neg and UNSPEC_VCOPYSIGN
+(define_insn_and_split "*copysign_neg"
+  [(set (match_operand:VF 0 "register_operand")
+(neg:VF
+  (unspec:VF [
+(match_operand:VF 1 "register_operand")
+(match_operand:VF 2 "register_operand")
+  ] UNSPEC_VCOPYSIGN)))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (mode),
+  riscv_vector::BINARY_OP, operands);
+  DONE;
+}
+[(set_attr "type" "vector")])
diff 

Re: [PATCH] [11/12/13/14 Regression] ABI break in _Hash_node_value_base since GCC 11 [PR 111050]

2023-09-12 Thread Jonathan Wakely via Gcc-patches
On Mon, 11 Sept 2023 at 18:19, François Dumont  wrote:
>
>
> On 11/09/2023 13:51, Jonathan Wakely wrote:
> > On Sun, 10 Sept 2023 at 14:57, François Dumont via Libstdc++
> >  wrote:
> >> Following confirmation of the fix by TC here is the patch where I'm
> >> simply adding a 'constexpr' on _M_next().
> >>
> >> Please let me know this ChangeLog entry is correct. I would prefer this
> >> patch to be assigned to 'TC' with me as co-author but I don't know how
> >> to do such a thing. Unless I need to change my user git identity to do so ?
> > Sam already explained that, but please check with Tim how he wants to
> > be credited, if at all. He doesn't have a copyright assignment, and
> > hasn't added a DCO sign-off to the patch, but it's small enough to not
> > need it as this is the first contribution credited to him.
> >
> >
> >>   libstdc++: Add constexpr qualification to _Hash_node::_M_next()
> > What has this constexpr addition got to do with the ABI change and the
> > always_inline attributes?
> >
> > It certainly doesn't seem like it should be the summary line of the
> > git commit message.
>
> Oops, sorry, that's what I had started to do before Tim submitted anything.
>
> Here is latest version:

No patch attached, and the ChangeLog below still mentions the constexpr.

I've pinged Tim via another channel to ask him about the author attribution.


>
> Author: TC 
> Date:   Wed Sep 6 19:31:55 2023 +0200
>
>  libstdc++: Force inline on _Hash_node_value_base methods to fix abi
> (PR111050)
>
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b6f0476837205932613ddb2b3429a55c26c409d
>  changed _Hash_node_value_base to no longer derive from
> _Hash_node_base, which means
>  that its member functions expect _M_storage to be at a different
> offset. So explosions
>  result if an out-of-line definition is emitted for any of the
> member functions (say,
>  in a non-optimized build) and the resulting object file is then
> linked with code built
>  using older version of GCC/libstdc++.
>
>  libstdc++-v3/ChangeLog:
>
>  PR libstdc++/111050
>  * include/bits/hashtable_policy.h
>  (_Hash_node_value_base<>::_M_valptr(),
> _Hash_node_value_base<>::_M_v())
>  Add [[__gnu__::__always_inline__]].
>  (_Hash_node<>::_M_next()): Add constexpr.
>
>  Co-authored-by: François Dumont 
>
> Ok for you TC (Tim ?) ?
>
>



Re: [PATCH] ggc, jit: forcibly clear GTY roots in jit

2023-09-12 Thread Antoni Boucher via Gcc-patches
It seems to not be enough to fix the issue.
Let me find out what's missing from my patch.

On Tue, 2023-09-12 at 11:35 +0200, Richard Biener via Jit wrote:
> On Wed, Sep 6, 2023 at 3:41 PM David Malcolm via Gcc-patches
>  wrote:
> > 
> > As part of Antoyo's work on supporting LTO in rustc_codegen_gcc, he
> > noticed an ICE inside libgccjit when compiling certain rust files.
> > 
> > Debugging libgccjit showed that outdated information from a
> > previous
> > in-memory compile was referring to ad-hoc locations in the previous
> > compile's line_table.
> > 
> > The issue turned out to be the function decls in
> > internal_fn_fnspec_array
> > from the previous compile keeping alive the symtab nodes for these
> > functions, and from this finding other functions in the previous
> > compile, walking their CFGs, and finding ad-hoc data pointers in an
> > edge
> > with a location_t using ad-hoc data from the previous line_table
> > instance, and thus a use-after-free ICE attempting to use this ad-
> > hoc
> > data.
> > 
> > Previously in toplev::finalize we've fixed global state "piecemeal"
> > by
> > calling out to individual source_name_cc_finalize functions. 
> > However,
> > it occurred to me that we have run-time information on where the
> > GTY-marked pointers are.
> > 
> > Hence this patch takes something of a "big hammer" approach by
> > adding a
> > new ggc_common_finalize that walks the GC roots, zeroing all of the
> > pointers.  I stepped through this in the debugger and observed
> > that, in
> > particular, this correctly zeroes the internal_fn_fnspec_array at
> > the end
> > of a libgccjit compile.  Antoyo reports that this fixes the ICE for
> > him.
> > Doing so uncovered an ICE with libgccjit in dwarf2cfi.cc due to
> > reuse of
> > global variables from the previous compile, which this patch also
> > fixes.
> > 
> > I noticed that in ggc_mark_roots when clearing deletable roots we
> > only
> > clear the initial element in each gcc_root_tab_t.  This looks like
> > a
> > latent bug to me, which the patch fixes.  That said, there don't
> > seem to
> > be any deletable roots where the number of elements != 1.
> > 
> > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> > 
> > OK for trunk?
> 
> OK.
> 
> Thanks,
> Richard.
> 
> > Thanks
> > Dave
> > 
> > gcc/ChangeLog:
> >     * dwarf2cfi.cc (dwarf2cfi_cc_finalize): New.
> >     * dwarf2out.h (dwarf2cfi_cc_finalize): New decl.
> >     * ggc-common.cc (ggc_mark_roots): Multiply by rti->nelt
> > when
> >     clearing the deletable gcc_root_tab_t.
> >     (ggc_common_finalize): New.
> >     * ggc.h (ggc_common_finalize): New decl.
> >     * toplev.cc (toplev::finalize): Call dwarf2cfi_cc_finalize
> > and
> >     ggc_common_finalize.
> > ---
> >  gcc/dwarf2cfi.cc  |  9 +
> >  gcc/dwarf2out.h   |  1 +
> >  gcc/ggc-common.cc | 23 ++-
> >  gcc/ggc.h |  2 ++
> >  gcc/toplev.cc |  3 +++
> >  5 files changed, 37 insertions(+), 1 deletion(-)
> > 
> > diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
> > index ddc728f4ad00..f1777c0a4cf1 100644
> > --- a/gcc/dwarf2cfi.cc
> > +++ b/gcc/dwarf2cfi.cc
> > @@ -3822,4 +3822,13 @@ make_pass_dwarf2_frame (gcc::context *ctxt)
> >    return new pass_dwarf2_frame (ctxt);
> >  }
> > 
> > +void dwarf2cfi_cc_finalize ()
> > +{
> > +  add_cfi_insn = NULL;
> > +  add_cfi_vec = NULL;
> > +  cur_trace = NULL;
> > +  cur_row = NULL;
> > +  cur_cfa = NULL;
> > +}
> > +
> >  #include "gt-dwarf2cfi.h"
> > diff --git a/gcc/dwarf2out.h b/gcc/dwarf2out.h
> > index 870b56a6a372..61a996050ff9 100644
> > --- a/gcc/dwarf2out.h
> > +++ b/gcc/dwarf2out.h
> > @@ -419,6 +419,7 @@ struct fixed_point_type_info
> >  } scale_factor;
> >  };
> > 
> > +void dwarf2cfi_cc_finalize (void);
> >  void dwarf2out_cc_finalize (void);
> > 
> >  /* Some DWARF internals are exposed for the needs of DWARF-based
> > debug
> > diff --git a/gcc/ggc-common.cc b/gcc/ggc-common.cc
> > index bed7a9d4d021..95803fa95a17 100644
> > --- a/gcc/ggc-common.cc
> > +++ b/gcc/ggc-common.cc
> > @@ -86,7 +86,7 @@ ggc_mark_roots (void)
> > 
> >    for (rt = gt_ggc_deletable_rtab; *rt; rt++)
> >  for (rti = *rt; rti->base != NULL; rti++)
> > -  memset (rti->base, 0, rti->stride);
> > +  memset (rti->base, 0, rti->stride * rti->nelt);
> > 
> >    for (rt = gt_ggc_rtab; *rt; rt++)
> >  ggc_mark_root_tab (*rt);
> > @@ -1293,3 +1293,24 @@ report_heap_memory_use ()
> >  SIZE_AMOUNT (MALLINFO_FN ().arena));
> >  #endif
> >  }
> > +
> > +/* Forcibly clear all GTY roots.  */
> > +
> > +void
> > +ggc_common_finalize ()
> > +{
> > +  const struct ggc_root_tab *const *rt;
> > +  const_ggc_root_tab_t rti;
> > +
> > +  for (rt = gt_ggc_deletable_rtab; *rt; rt++)
> > +    for (rti = *rt; rti->base != NULL; rti++)
> > +  memset (rti->base, 0, rti->stride * rti->nelt);
> > +
> > +  for (rt = gt_ggc_rtab; *rt; rt++)
> > +    for (rti = *rt; rti->base != NULL; rti++)
> > +  memset 

Re: [PATCH V6] RISC-V: Enable vec_int testsuite for RVV VLA vectorization

2023-09-12 Thread Robin Dapp via Gcc-patches
The current status (for rv64gcv) is:

=== gcc tests ===

Running target unix/-march=rv64gcv
XPASS: gcc.dg/vect/bb-slp-subgroups-3.c -flto -ffat-lto-objects  
scan-tree-dump-times slp2 "optimized: basic block" 2
XPASS: gcc.dg/vect/bb-slp-subgroups-3.c scan-tree-dump-times slp2 "optimized: 
basic block" 2
XPASS: gcc.dg/vect/no-scevccp-outer-16.c scan-tree-dump-times vect "OUTER LOOP 
VECTORIZED." 1
XPASS: gcc.dg/vect/no-scevccp-outer-17.c scan-tree-dump-times vect "OUTER LOOP 
VECTORIZED." 1
XPASS: gcc.dg/vect/no-scevccp-outer-19.c scan-tree-dump-times vect "OUTER LOOP 
VECTORIZED." 1
XPASS: gcc.dg/vect/no-scevccp-outer-21.c scan-tree-dump-times vect "OUTER LOOP 
VECTORIZED." 1
FAIL: gcc.dg/vect/no-scevccp-outer-7.c scan-tree-dump-times vect 
"vect_recog_widen_mult_pattern: detected" 1
FAIL: gcc.dg/vect/no-scevccp-vect-iv-3.c scan-tree-dump-times vect 
"vect_recog_widen_sum_pattern: detected" 1
FAIL: gcc.dg/vect/pr57705.c -flto -ffat-lto-objects  scan-tree-dump-times vect 
"vectorized 1 loop" 2
FAIL: gcc.dg/vect/pr57705.c scan-tree-dump-times vect "vectorized 1 loop" 2
FAIL: gcc.dg/vect/pr65518.c -flto -ffat-lto-objects  scan-tree-dump-times vect 
"vectorized 0 loops in function" 2
FAIL: gcc.dg/vect/pr65518.c scan-tree-dump-times vect "vectorized 0 loops in 
function" 2
FAIL: gcc.dg/vect/slp-1.c -flto -ffat-lto-objects  scan-tree-dump-times vect 
"vectorizing stmts using SLP" 4
FAIL: gcc.dg/vect/slp-1.c scan-tree-dump-times vect "vectorizing stmts using 
SLP" 4
FAIL: gcc.dg/vect/slp-12a.c -flto -ffat-lto-objects  scan-tree-dump-times vect 
"vectorizing stmts using SLP" 1
FAIL: gcc.dg/vect/slp-12a.c scan-tree-dump-times vect "vectorizing stmts using 
SLP" 1
FAIL: gcc.dg/vect/slp-16.c -flto -ffat-lto-objects  scan-tree-dump-times vect 
"vectorizing stmts using SLP" 2
FAIL: gcc.dg/vect/slp-16.c scan-tree-dump-times vect "vectorizing stmts using 
SLP" 2
FAIL: gcc.dg/vect/slp-34-big-array.c -flto -ffat-lto-objects  
scan-tree-dump-times vect "vectorizing stmts using SLP" 2
FAIL: gcc.dg/vect/slp-34-big-array.c scan-tree-dump-times vect "vectorizing 
stmts using SLP" 2
FAIL: gcc.dg/vect/slp-34.c -flto -ffat-lto-objects  scan-tree-dump-times vect 
"vectorizing stmts using SLP" 2
FAIL: gcc.dg/vect/slp-34.c scan-tree-dump-times vect "vectorizing stmts using 
SLP" 2
FAIL: gcc.dg/vect/slp-35.c -flto -ffat-lto-objects  scan-tree-dump-times vect 
"vectorizing stmts using SLP" 1
FAIL: gcc.dg/vect/slp-35.c scan-tree-dump-times vect "vectorizing stmts using 
SLP" 1
XPASS: gcc.dg/vect/slp-reduc-3.c -flto -ffat-lto-objects  scan-tree-dump-times 
vect "vectorizing stmts using SLP" 1
XPASS: gcc.dg/vect/slp-reduc-3.c scan-tree-dump-times vect "vectorizing stmts 
using SLP" 1
FAIL: gcc.dg/vect/slp-reduc-4.c -flto -ffat-lto-objects  scan-tree-dump vect 
"vectorizing stmts using SLP"
FAIL: gcc.dg/vect/slp-reduc-4.c scan-tree-dump vect "vectorizing stmts using 
SLP"
FAIL: gcc.dg/vect/slp-reduc-7.c -flto -ffat-lto-objects execution test
FAIL: gcc.dg/vect/slp-reduc-7.c execution test
XPASS: gcc.dg/vect/vect-24.c -flto -ffat-lto-objects  scan-tree-dump-times vect 
"vectorized 3 loops" 1
XPASS: gcc.dg/vect/vect-24.c scan-tree-dump-times vect "vectorized 3 loops" 1
FAIL: gcc.dg/vect/vect-alias-check-4.c  (test for warnings, line 34)
FAIL: gcc.dg/vect/vect-alias-check-4.c  at line 19 (test for warnings, line 17)
FAIL: gcc.dg/vect/vect-alias-check-4.c  at line 27 (test for warnings, line 25)
FAIL: gcc.dg/vect/vect-alias-check-4.c (test for excess errors)
FAIL: gcc.dg/vect/vect-alias-check-4.c -flto -ffat-lto-objects  (test for 
warnings, line 34)
FAIL: gcc.dg/vect/vect-alias-check-4.c -flto -ffat-lto-objects  at line 19 
(test for warnings, line 17)
FAIL: gcc.dg/vect/vect-alias-check-4.c -flto -ffat-lto-objects  at line 27 
(test for warnings, line 25)
FAIL: gcc.dg/vect/vect-alias-check-4.c -flto -ffat-lto-objects (test for excess 
errors)
FAIL: gcc.dg/vect/vect-bic-bitmask-12.c -flto -ffat-lto-objects  scan-tree-dump 
dce7 "<=s*.+{ 255,.+}"
FAIL: gcc.dg/vect/vect-bic-bitmask-12.c scan-tree-dump dce7 "<=s*.+{ 
255,.+}"
FAIL: gcc.dg/vect/vect-bic-bitmask-23.c -flto -ffat-lto-objects  scan-tree-dump 
dce7 "<=s*.+{ 255, 15, 1, 65535 }"
FAIL: gcc.dg/vect/vect-bic-bitmask-23.c scan-tree-dump dce7 "<=s*.+{ 255, 
15, 1, 65535 }"
FAIL: gcc.dg/vect/vect-multitypes-11.c -flto -ffat-lto-objects  
scan-tree-dump-times vect "vectorized 1 loops" 1
FAIL: gcc.dg/vect/vect-multitypes-11.c scan-tree-dump-times vect "vectorized 1 
loops" 1

All of these are well understood.  For slp-reduc-7.c there is already a fix
posted and we will be needing a vsetvl pass fix after that.

Therefore, I'm going to push this to the trunk.

Note there are also a number of fortran vect failures that we haven't
looked at yet:

=== gfortran tests ===

Running target unix/-march=rv64gcv
FAIL: gfortran.dg/vect/O3-bb-slp-1.f   -O  (test for excess errors)
FAIL: gfortran.dg/vect/O3-bb-slp-2.f   -O  (test for excess errors)
FAIL: 

[PATCH 2/2] MATCH: Move `X <= MAX(X, Y)` before `MIN (X, C1) < C2` pattern

2023-09-12 Thread Andrew Pinski via Gcc-patches
Since matching C1 as C2 here will decrease how much other simplifications
will need to happen to get the final answer.

OK? Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* match.pd (`X <= MAX(X, Y)`):
Move before `MIN (X, C1) < C2` pattern.
---
 gcc/match.pd | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 36e3da4841b..34b67df784e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3931,13 +3931,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (wi::lt_p (wi::to_wide (@1), wi::to_wide (@2),
  TYPE_SIGN (TREE_TYPE (@0
 (cmp @0 @2)
-/* MIN (X, C1) < C2 -> X < C2 || C1 < C2  */
-(for minmax (min min max max min min max max)
- cmp(lt  le  gt  ge  gt  ge  lt  le )
- comb   (bit_ior bit_ior bit_ior bit_ior bit_and bit_and bit_and bit_and)
- (simplify
-  (cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
-  (comb (cmp @0 @2) (cmp @1 @2
 
 /* X <= MAX(X, Y) -> true
X > MAX(X, Y) -> false 
@@ -3949,6 +3942,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cmp:c @0 (minmax:c @0 @1))
   { constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } ))
 
+/* MIN (X, C1) < C2 -> X < C2 || C1 < C2  */
+(for minmax (min min max max min min max max)
+ cmp(lt  le  gt  ge  gt  ge  lt  le )
+ comb   (bit_ior bit_ior bit_ior bit_ior bit_and bit_and bit_and bit_and)
+ (simplify
+  (cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
+  (comb (cmp @0 @2) (cmp @1 @2
+
 /* Undo fancy ways of writing max/min or other ?: expressions, like
a - ((a - b) & -(a < b))  and  a - (a - b) * (a < b) into (a < b) ? b : a.
People normally use ?: and that is what we actually try to optimize.  */
-- 
2.31.1



[PATCH 17/19] aarch64: Explicitly record probe registers in frame info

2023-09-12 Thread Richard Sandiford via Gcc-patches
The stack frame is currently divided into three areas:

A: the area above the hard frame pointer
B: the SVE saves below the hard frame pointer
C: the outgoing arguments

If the stack frame is allocated in one chunk, the allocation needs a
probe if the frame size is >= guard_size - 1KiB.  In addition, if the
function is not a leaf function, it must probe an address no more than
1KiB above the outgoing SP.  We ensured the second condition by

(1) using single-chunk allocations for non-leaf functions only if
the link register save slot is within 512 bytes of the bottom
of the frame; and

(2) using the link register save as a probe (meaning, for instance,
that it can't be individually shrink wrapped)

If instead the stack is allocated in multiple chunks, then:

* an allocation involving only the outgoing arguments (C above) requires
  a probe if the allocation size is > 1KiB

* any other allocation requires a probe if the allocation size
  is >= guard_size - 1KiB

* second and subsequent allocations require the previous allocation
  to probe at the bottom of the allocated area, regardless of the size
  of that previous allocation

The final point means that, unlike for single allocations,
it can be necessary to have both a non-SVE register probe and
an SVE register probe.  For example:

* allocate A, probe using a non-SVE register save
* allocate B, probe using an SVE register save
* allocate C

The non-SVE register used in this case was again the link register.
It was previously used even if the link register save slot was some
bytes above the bottom of the non-SVE register saves, but an earlier
patch avoided that by putting the link register save slot first.

As a belt-and-braces fix, this patch explicitly records which
probe registers we're using and allows the non-SVE probe to be
whichever register comes first (as for SVE).

The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c.

gcc/
* config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe)
(aarch64_frame::hard_fp_save_and_probe): New fields.
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them.
Rather than asserting that a leaf function saves LR, instead assert
that a leaf function saves something.
(aarch64_get_separate_components): Prevent the chosen probe
registers from being individually shrink-wrapped.
(aarch64_allocate_and_probe_stack_space): Remove workaround for
probe registers that aren't at the bottom of the previous allocation.

gcc/testsuite/
* gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes.
---
 gcc/config/aarch64/aarch64.cc | 68 +++
 gcc/config/aarch64/aarch64.h  |  8 +++
 .../aarch64/sve/pcs/stack_clash_3.c   |  6 +-
 3 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index bcb879ba94b..3c7c476c4c6 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8510,15 +8510,11 @@ aarch64_layout_frame (void)
&& !crtl->abi->clobbers_full_reg_p (regno))
   frame.reg_offset[regno] = SLOT_REQUIRED;
 
-  /* With stack-clash, LR must be saved in non-leaf functions.  The saving of
- LR counts as an implicit probe which allows us to maintain the invariant
- described in the comment at expand_prologue.  */
-  gcc_assert (crtl->is_leaf
- || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
 
   poly_int64 offset = crtl->outgoing_args_size;
   gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
   frame.bytes_below_saved_regs = offset;
+  frame.sve_save_and_probe = INVALID_REGNUM;
 
   /* Now assign stack slots for the registers.  Start with the predicate
  registers, since predicate LDR and STR have a relatively small
@@ -8526,6 +8522,8 @@ aarch64_layout_frame (void)
   for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
   {
+   if (frame.sve_save_and_probe == INVALID_REGNUM)
+ frame.sve_save_and_probe = regno;
frame.reg_offset[regno] = offset;
offset += BYTES_PER_SVE_PRED;
   }
@@ -8563,6 +8561,8 @@ aarch64_layout_frame (void)
 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
   if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
{
+ if (frame.sve_save_and_probe == INVALID_REGNUM)
+   frame.sve_save_and_probe = regno;
  frame.reg_offset[regno] = offset;
  offset += vector_save_size;
}
@@ -8572,10 +8572,18 @@ aarch64_layout_frame (void)
   frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
   bool saves_below_hard_fp_p
 = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
+  gcc_assert (!saves_below_hard_fp_p
+ || (frame.sve_save_and_probe != INVALID_REGNUM
+ && known_eq 

[PATCH 19/19] aarch64: Make stack smash canary protect saved registers

2023-09-12 Thread Richard Sandiford via Gcc-patches
AArch64 normally puts the saved registers near the bottom of the frame,
immediately above any dynamic allocations.  But this means that a
stack-smash attack on those dynamic allocations could overwrite the
saved registers without needing to reach as far as the stack smash
canary.

The same thing could also happen for variable-sized arguments that are
passed by value, since those are allocated before a call and popped on
return.

This patch avoids that by putting the locals (and thus the canary) below
the saved registers when stack smash protection is active.

The patch fixes CVE-2023-4039.

gcc/
* config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p):
New function.
(aarch64_layout_frame): Use it to decide whether locals should
go above or below the saved registers.
(aarch64_expand_prologue): Update stack layout comment.
Emit a stack tie after the final adjustment.

gcc/testsuite/
* gcc.target/aarch64/stack-protector-8.c: New test.
* gcc.target/aarch64/stack-protector-9.c: Likewise.
---
 gcc/config/aarch64/aarch64.cc | 46 +++--
 .../gcc.target/aarch64/stack-protector-8.c| 95 +++
 .../gcc.target/aarch64/stack-protector-9.c| 33 +++
 3 files changed, 168 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 51e57370807..3739a44bfd9 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8433,6 +8433,20 @@ aarch64_needs_frame_chain (void)
   return aarch64_use_frame_pointer;
 }
 
+/* Return true if the current function should save registers above
+   the locals area, rather than below it.  */
+
+static bool
+aarch64_save_regs_above_locals_p ()
+{
+  /* When using stack smash protection, make sure that the canary slot
+ comes between the locals and the saved registers.  Otherwise,
+ it would be possible for a carefully sized smash attack to change
+ the saved registers (particularly LR and FP) without reaching the
+ canary.  */
+  return crtl->stack_protect_guard;
+}
+
 /* Mark the registers that need to be saved by the callee and calculate
the size of the callee-saved registers area and frame record (both FP
and LR may be omitted).  */
@@ -8444,6 +8458,7 @@ aarch64_layout_frame (void)
   poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
   bool frame_related_fp_reg_p = false;
   aarch64_frame  = cfun->machine->frame;
+  poly_int64 top_of_locals = -1;
 
   frame.emit_frame_chain = aarch64_needs_frame_chain ();
 
@@ -8510,9 +8525,16 @@ aarch64_layout_frame (void)
&& !crtl->abi->clobbers_full_reg_p (regno))
   frame.reg_offset[regno] = SLOT_REQUIRED;
 
+  bool regs_at_top_p = aarch64_save_regs_above_locals_p ();
 
   poly_int64 offset = crtl->outgoing_args_size;
   gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
+  if (regs_at_top_p)
+{
+  offset += get_frame_size ();
+  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+  top_of_locals = offset;
+}
   frame.bytes_below_saved_regs = offset;
   frame.sve_save_and_probe = INVALID_REGNUM;
 
@@ -8652,15 +8674,18 @@ aarch64_layout_frame (void)
  at expand_prologue.  */
   gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
 
-  offset += get_frame_size ();
-  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-  auto top_of_locals = offset;
-
+  if (!regs_at_top_p)
+{
+  offset += get_frame_size ();
+  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+  top_of_locals = offset;
+}
   offset += frame.saved_varargs_size;
   gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
   frame.frame_size = offset;
 
   frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
+  gcc_assert (known_ge (top_of_locals, 0));
   frame.bytes_above_locals = frame.frame_size - top_of_locals;
 
   frame.initial_adjust = 0;
@@ -9979,10 +10004,10 @@ aarch64_epilogue_uses (int regno)
|  for register varargs |
|   |
+---+
-   |  local variables  | <-- frame_pointer_rtx
+   |  local variables (1)  | <-- frame_pointer_rtx
|   |
+---+
-   |  padding  |
+   |  padding (1)  |
+---+
|  callee-saved registers   |
+---+
@@ -9994,6 +10019,10 @@ aarch64_epilogue_uses (int regno)
+---+
|  SVE predicate registers  |
+---+
+   |  local variables (2)  

[PATCH 16/19] aarch64: Simplify probe of final frame allocation

2023-09-12 Thread Richard Sandiford via Gcc-patches
Previous patches ensured that the final frame allocation only needs
a probe when the size is strictly greater than 1KiB.  It's therefore
safe to use the normal 1024 probe offset in all cases.

The main motivation for doing this is to simplify the code and
remove the number of special cases.

gcc/
* config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
Always probe the residual allocation at offset 1024, asserting
that that is in range.

gcc/testsuite/
* gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
to be at offset 1024 rather than offset 0.
* gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
* gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
---
 gcc/config/aarch64/aarch64.cc| 12 
 .../gcc.target/aarch64/stack-check-prologue-17.c |  2 +-
 .../gcc.target/aarch64/stack-check-prologue-18.c |  4 ++--
 .../gcc.target/aarch64/stack-check-prologue-19.c |  4 ++--
 4 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 383b32f2078..bcb879ba94b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9887,16 +9887,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
  are still safe.  */
   if (residual)
 {
-  HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
+  gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
+
   /* If we're doing final adjustments, and we've done any full page
 allocations then any residual needs to be probed.  */
   if (final_adjustment_p && rounded_size != 0)
min_probe_threshold = 0;
-  /* If doing a small final adjustment, we always probe at offset 0.
-This is done to avoid issues when the final adjustment is smaller
-than the probing offset.  */
-  else if (final_adjustment_p && rounded_size == 0)
-   residual_probe_offset = 0;
 
   aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
   if (residual >= min_probe_threshold)
@@ -9907,8 +9903,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
 "\n", residual);
 
-   emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-residual_probe_offset));
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+  guard_used_by_caller));
  emit_insn (gen_blockage ());
}
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c 
b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
index 0d8a25d73a2..f0ec1389771 100644
--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
@@ -33,7 +33,7 @@ int test1(int z) {
 ** ...
 ** str x30, \[sp\]
 ** sub sp, sp, #1040
-** str xzr, \[sp\]
+** str xzr, \[sp, #?1024\]
 ** cbnzw0, .*
 ** bl  g
 ** ...
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c 
b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
index 82447d20fff..6383bec5ebc 100644
--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
@@ -9,7 +9,7 @@ void g();
 ** ...
 ** str x30, \[sp\]
 ** sub sp, sp, #4064
-** str xzr, \[sp\]
+** str xzr, \[sp, #?1024\]
 ** cbnzw0, .*
 ** bl  g
 ** ...
@@ -50,7 +50,7 @@ int test1(int z) {
 ** ...
 ** str x30, \[sp\]
 ** sub sp, sp, #1040
-** str xzr, \[sp\]
+** str xzr, \[sp, #?1024\]
 ** cbnzw0, .*
 ** bl  g
 ** ...
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c 
b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
index 73ac3e4e4eb..562039b5e9b 100644
--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
@@ -9,7 +9,7 @@ void g();
 ** ...
 ** str x30, \[sp\]
 ** sub sp, sp, #4064
-** str xzr, \[sp\]
+** str xzr, \[sp, #?1024\]
 ** cbnzw0, .*
 ** bl  g
 ** ...
@@ -50,7 +50,7 @@ int test1(int z) {
 ** ...
 ** str x30, \[sp\]
 ** sub sp, sp, #1040
-** str xzr, \[sp\]
+** str xzr, \[sp, #?1024\]
 ** cbnzw0, .*
 ** bl  g
 ** ...
-- 
2.25.1



[PATCH 08/19] aarch64: Rename locals_offset to bytes_above_locals

2023-09-12 Thread Richard Sandiford via Gcc-patches
locals_offset was described as:

  /* Offset from the base of the frame (incomming SP) to the
 top of the locals area.  This value is always a multiple of
 STACK_BOUNDARY.  */

This is implicitly an “upside down” view of the frame: the incoming
SP is at offset 0, and anything N bytes below the incoming SP is at
offset N (rather than -N).

However, reg_offset instead uses a “right way up” view; that is,
it views offsets in address terms.  Something above X is at a
positive offset from X and something below X is at a negative
offset from X.

Also, even on FRAME_GROWS_DOWNWARD targets like AArch64,
target-independent code views offsets in address terms too:
locals are allocated at negative offsets to virtual_stack_vars.

It seems confusing to have *_offset fields of the same structure
using different polarities like this.  This patch tries to avoid
that by renaming locals_offset to bytes_above_locals.

gcc/
* config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to...
(aarch64_frame::bytes_above_locals): ...this.
* config/aarch64/aarch64.cc (aarch64_layout_frame)
(aarch64_initial_elimination_offset): Update accordingly.
---
 gcc/config/aarch64/aarch64.cc | 6 +++---
 gcc/config/aarch64/aarch64.h  | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 25b5fb243a6..bcd1dec6f51 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8637,7 +8637,7 @@ aarch64_layout_frame (void)
  STACK_BOUNDARY / BITS_PER_UNIT));
   frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
 
-  frame.locals_offset = frame.saved_varargs_size;
+  frame.bytes_above_locals = frame.saved_varargs_size;
 
   frame.initial_adjust = 0;
   frame.final_adjust = 0;
@@ -12854,13 +12854,13 @@ aarch64_initial_elimination_offset (unsigned from, 
unsigned to)
return frame.hard_fp_offset;
 
   if (from == FRAME_POINTER_REGNUM)
-   return frame.hard_fp_offset - frame.locals_offset;
+   return frame.hard_fp_offset - frame.bytes_above_locals;
 }
 
   if (to == STACK_POINTER_REGNUM)
 {
   if (from == FRAME_POINTER_REGNUM)
-   return frame.frame_size - frame.locals_offset;
+   return frame.frame_size - frame.bytes_above_locals;
 }
 
   return frame.frame_size;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 46dd981b85c..3382f819e72 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -790,10 +790,10 @@ struct GTY (()) aarch64_frame
  always a multiple of STACK_BOUNDARY.  */
   poly_int64 bytes_below_hard_fp;
 
-  /* Offset from the base of the frame (incomming SP) to the
- top of the locals area.  This value is always a multiple of
+  /* The number of bytes between the top of the locals area and the top
+ of the frame (the incomming SP).  This value is always a multiple of
  STACK_BOUNDARY.  */
-  poly_int64 locals_offset;
+  poly_int64 bytes_above_locals;
 
   /* Offset from the base of the frame (incomming SP) to the
  hard_frame_pointer.  This value is always a multiple of
-- 
2.25.1



[PATCH 18/19] aarch64: Remove below_hard_fp_saved_regs_size

2023-09-12 Thread Richard Sandiford via Gcc-patches
After previous patches, it's no longer necessary to store
saved_regs_size and below_hard_fp_saved_regs_size in the frame info.
All measurements instead use the top or bottom of the frame as
reference points.

gcc/
* config/aarch64/aarch64.h (aarch64_frame::saved_regs_size)
(aarch64_frame::below_hard_fp_saved_regs_size): Delete.
* config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly.
---
 gcc/config/aarch64/aarch64.cc | 45 ---
 gcc/config/aarch64/aarch64.h  |  7 --
 2 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3c7c476c4c6..51e57370807 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8569,9 +8569,8 @@ aarch64_layout_frame (void)
 
   /* OFFSET is now the offset of the hard frame pointer from the bottom
  of the callee save area.  */
-  frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
-  bool saves_below_hard_fp_p
-= maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
+  auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
+  bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0);
   gcc_assert (!saves_below_hard_fp_p
  || (frame.sve_save_and_probe != INVALID_REGNUM
  && known_eq (frame.reg_offset[frame.sve_save_and_probe],
@@ -8641,9 +8640,8 @@ aarch64_layout_frame (void)
 
   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
 
-  frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
-  gcc_assert (known_eq (frame.saved_regs_size,
-   frame.below_hard_fp_saved_regs_size)
+  auto saved_regs_size = offset - frame.bytes_below_saved_regs;
+  gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
  || (frame.hard_fp_save_and_probe != INVALID_REGNUM
  && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
   frame.bytes_below_hard_fp)));
@@ -8652,7 +8650,7 @@ aarch64_layout_frame (void)
  The saving of the bottommost register counts as an implicit probe,
  which allows us to maintain the invariant described in the comment
  at expand_prologue.  */
-  gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
+  gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
 
   offset += get_frame_size ();
   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
@@ -8709,7 +8707,7 @@ aarch64_layout_frame (void)
 
   HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
   HOST_WIDE_INT const_saved_regs_size;
-  if (known_eq (frame.saved_regs_size, 0))
+  if (known_eq (saved_regs_size, 0))
 frame.initial_adjust = frame.frame_size;
   else if (frame.frame_size.is_constant (_size)
   && const_size < max_push_offset
@@ -8722,7 +8720,7 @@ aarch64_layout_frame (void)
   frame.callee_adjust = const_size;
 }
   else if (frame.bytes_below_saved_regs.is_constant (_below_saved_regs)
-  && frame.saved_regs_size.is_constant (_saved_regs_size)
+  && saved_regs_size.is_constant (_saved_regs_size)
   && const_below_saved_regs + const_saved_regs_size < 512
   /* We could handle this case even with data below the saved
  registers, provided that that data left us with valid offsets
@@ -8741,8 +8739,7 @@ aarch64_layout_frame (void)
   frame.initial_adjust = frame.frame_size;
 }
   else if (saves_below_hard_fp_p
-  && known_eq (frame.saved_regs_size,
-   frame.below_hard_fp_saved_regs_size))
+  && known_eq (saved_regs_size, below_hard_fp_saved_regs_size))
 {
   /* Frame in which all saves are SVE saves:
 
@@ -8764,7 +8761,7 @@ aarch64_layout_frame (void)
 [save SVE registers relative to SP]
 sub sp, sp, bytes_below_saved_regs  */
   frame.callee_adjust = const_above_fp;
-  frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+  frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
   frame.final_adjust = frame.bytes_below_saved_regs;
 }
   else
@@ -8779,7 +8776,7 @@ aarch64_layout_frame (void)
 [save SVE registers relative to SP]
 sub sp, sp, bytes_below_saved_regs  */
   frame.initial_adjust = frame.bytes_above_hard_fp;
-  frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
+  frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
   frame.final_adjust = frame.bytes_below_saved_regs;
 }
 
@@ -9985,17 +9982,17 @@ aarch64_epilogue_uses (int regno)
|  local variables  | <-- frame_pointer_rtx
|   |
+---+
-   |  padding  | \
-   +---+  |
-   |  callee-saved registers   |  | frame.saved_regs_size
-   

[PATCH 14/19] aarch64: Tweak stack clash boundary condition

2023-09-12 Thread Richard Sandiford via Gcc-patches
The AArch64 ABI says that, when stack clash protection is used,
there can be a maximum of 1KiB of unprobed space at sp on entry
to a function.  Therefore, we need to probe when allocating
>= guard_size - 1KiB of data (>= rather than >).  This is what
GCC does.

If an allocation is exactly guard_size bytes, it is enough to allocate
those bytes and probe once at offset 1024.  It isn't possible to use a
single probe at any other offset: higher would conmplicate later code,
by leaving more unprobed space than usual, while lower would risk
leaving an entire page unprobed.  For simplicity, the code probes all
allocations at offset 1024.

Some register saves also act as probes.  If we need to allocate
more space below the last such register save probe, we need to
probe the allocation if it is > 1KiB.  Again, this allocation is
then sometimes (but not always) probed at offset 1024.  This sort of
allocation is currently only used for outgoing arguments, which are
rarely this big.

However, the code also probed if this final outgoing-arguments
allocation was == 1KiB, rather than just > 1KiB.  This isn't
necessary, since the register save then probes at offset 1024
as required.  Continuing to probe allocations of exactly 1KiB
would complicate later patches.

gcc/
* config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
Don't probe final allocations that are exactly 1KiB in size (after
unprobed space above the final allocation has been deducted).

gcc/testsuite/
* gcc.target/aarch64/stack-check-prologue-17.c: New test.
---
 gcc/config/aarch64/aarch64.cc |  4 +-
 .../aarch64/stack-check-prologue-17.c | 55 +++
 2 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e40ccc7d1cf..b942bf3de4a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9697,9 +9697,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
   HOST_WIDE_INT guard_size
 = 1 << param_stack_clash_protection_guard_size;
   HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
+  HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
+  gcc_assert (multiple_p (poly_size, byte_sp_alignment));
   HOST_WIDE_INT min_probe_threshold
 = (final_adjustment_p
-   ? guard_used_by_caller
+   ? guard_used_by_caller + byte_sp_alignment
: guard_size - guard_used_by_caller);
   /* When doing the final adjustment for the outgoing arguments, take into
  account any unprobed space there is above the current SP.  There are
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c 
b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
new file mode 100644
index 000..0d8a25d73a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
@@ -0,0 +1,55 @@
+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param 
stack-clash-protection-guard-size=12" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void f(int, ...);
+void g();
+
+/*
+** test1:
+** ...
+** str x30, \[sp\]
+** sub sp, sp, #1024
+** cbnzw0, .*
+** bl  g
+** ...
+*/
+int test1(int z) {
+  __uint128_t x = 0;
+  int y[0x400];
+  if (z)
+{
+  f(0, 0, 0, 0, 0, 0, 0, ,
+   x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+   x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+   x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+   x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
+}
+  g();
+  return 1;
+}
+
+/*
+** test2:
+** ...
+** str x30, \[sp\]
+** sub sp, sp, #1040
+** str xzr, \[sp\]
+** cbnzw0, .*
+** bl  g
+** ...
+*/
+int test2(int z) {
+  __uint128_t x = 0;
+  int y[0x400];
+  if (z)
+{
+  f(0, 0, 0, 0, 0, 0, 0, ,
+   x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+   x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+   x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+   x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+   x);
+}
+  g();
+  return 1;
+}
-- 
2.25.1



[PATCH 04/19] aarch64: Add bytes_below_saved_regs to frame info

2023-09-12 Thread Richard Sandiford via Gcc-patches
The frame layout code currently hard-codes the assumption that
the number of bytes below the saved registers is equal to the
size of the outgoing arguments.  This patch abstracts that
value into a new field of aarch64_frame.

gcc/
* config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New
field.
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it,
and use it instead of crtl->outgoing_args_size.
(aarch64_get_separate_components): Use bytes_below_saved_regs instead
of outgoing_args_size.
(aarch64_process_components): Likewise.
---
 gcc/config/aarch64/aarch64.cc | 71 ++-
 gcc/config/aarch64/aarch64.h  |  5 +++
 2 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 34d0ccc9a67..49c2fbedd14 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8517,6 +8517,8 @@ aarch64_layout_frame (void)
   gcc_assert (crtl->is_leaf
  || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
 
+  frame.bytes_below_saved_regs = crtl->outgoing_args_size;
+
   /* Now assign stack slots for the registers.  Start with the predicate
  registers, since predicate LDR and STR have a relatively small
  offset range.  These saves happen below the hard frame pointer.  */
@@ -8621,18 +8623,18 @@ aarch64_layout_frame (void)
 
   poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
 
-  poly_int64 above_outgoing_args
+  poly_int64 saved_regs_and_above
 = aligned_upper_bound (varargs_and_saved_regs_size
   + get_frame_size (),
   STACK_BOUNDARY / BITS_PER_UNIT);
 
   frame.hard_fp_offset
-= above_outgoing_args - frame.below_hard_fp_saved_regs_size;
+= saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
 
   /* Both these values are already aligned.  */
-  gcc_assert (multiple_p (crtl->outgoing_args_size,
+  gcc_assert (multiple_p (frame.bytes_below_saved_regs,
  STACK_BOUNDARY / BITS_PER_UNIT));
-  frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
+  frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
 
   frame.locals_offset = frame.saved_varargs_size;
 
@@ -8676,7 +8678,7 @@ aarch64_layout_frame (void)
   else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
 max_push_offset = 256;
 
-  HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
+  HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
   HOST_WIDE_INT const_saved_regs_size;
   if (known_eq (frame.saved_regs_size, 0))
 frame.initial_adjust = frame.frame_size;
@@ -8684,31 +8686,31 @@ aarch64_layout_frame (void)
   && const_size < max_push_offset
   && known_eq (frame.hard_fp_offset, const_size))
 {
-  /* Simple, small frame with no outgoing arguments:
+  /* Simple, small frame with no data below the saved registers.
 
 stp reg1, reg2, [sp, -frame_size]!
 stp reg3, reg4, [sp, 16]  */
   frame.callee_adjust = const_size;
 }
-  else if (crtl->outgoing_args_size.is_constant (_outgoing_args_size)
+  else if (frame.bytes_below_saved_regs.is_constant (_below_saved_regs)
   && frame.saved_regs_size.is_constant (_saved_regs_size)
-  && const_outgoing_args_size + const_saved_regs_size < 512
-  /* We could handle this case even with outgoing args, provided
- that the number of args left us with valid offsets for all
- predicate and vector save slots.  It's such a rare case that
- it hardly seems worth the effort though.  */
-  && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
+  && const_below_saved_regs + const_saved_regs_size < 512
+  /* We could handle this case even with data below the saved
+ registers, provided that that data left us with valid offsets
+ for all predicate and vector save slots.  It's such a rare
+ case that it hardly seems worth the effort though.  */
+  && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
   && !(cfun->calls_alloca
&& frame.hard_fp_offset.is_constant (_fp_offset)
&& const_fp_offset < max_push_offset))
 {
-  /* Frame with small outgoing arguments:
+  /* Frame with small area below the saved registers:
 
 sub sp, sp, frame_size
-stp reg1, reg2, [sp, outgoing_args_size]
-stp reg3, reg4, [sp, outgoing_args_size + 16]  */
+stp reg1, reg2, [sp, bytes_below_saved_regs]
+stp reg3, reg4, [sp, bytes_below_saved_regs + 16]  */
   frame.initial_adjust = frame.frame_size;
-  frame.callee_offset = const_outgoing_args_size;
+  frame.callee_offset = const_below_saved_regs;
 }
   else if (saves_below_hard_fp_p
   && known_eq 

[PATCH 15/19] aarch64: Put LR save probe in first 16 bytes

2023-09-12 Thread Richard Sandiford via Gcc-patches
-fstack-clash-protection uses the save of LR as a probe for the next
allocation.  The next allocation could be:

* another part of the static frame, e.g. when allocating SVE save slots
  or outgoing arguments

* an alloca in the same function

* an allocation made by a callee function

However, when -fomit-frame-pointer is used, the LR save slot is placed
above the other GPR save slots.  It could therefore be up to 80 bytes
above the base of the GPR save area (which is also the hard fp address).

aarch64_allocate_and_probe_stack_space took this into account when
deciding how much subsequent space could be allocated without needing
a probe.  However, it interacted badly with:

  /* If doing a small final adjustment, we always probe at offset 0.
 This is done to avoid issues when LR is not at position 0 or when
 the final adjustment is smaller than the probing offset.  */
  else if (final_adjustment_p && rounded_size == 0)
residual_probe_offset = 0;

which forces any allocation that is smaller than the guard page size
to be probed at offset 0 rather than the usual offset 1024.  It was
therefore possible to construct cases in which we had:

* a probe using LR at SP + 80 bytes (or some other value >= 16)
* an allocation of the guard page size - 16 bytes
* a probe at SP + 0

which allocates guard page size + 64 consecutive unprobed bytes.

This patch requires the LR probe to be in the first 16 bytes of the
save area when stack clash protection is active.  Doing it
unconditionally would cause code-quality regressions.

Putting LR before other registers prevents push/pop allocation
when shadow call stacks are enabled, since LR is restored
separately from the other callee-saved registers.

The new comment doesn't say that the probe register is required
to be LR, since a later patch removes that restriction.

gcc/
* config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that
the LR save slot is in the first 16 bytes of the register save area.
Only form STP/LDP push/pop candidates if both registers are valid.
(aarch64_allocate_and_probe_stack_space): Remove workaround for
when LR was not in the first 16 bytes.

gcc/testsuite/
* gcc.target/aarch64/stack-check-prologue-18.c: New test.
* gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
* gcc.target/aarch64/stack-check-prologue-20.c: Likewise.
---
 gcc/config/aarch64/aarch64.cc |  72 ++---
 .../aarch64/stack-check-prologue-18.c | 100 ++
 .../aarch64/stack-check-prologue-19.c | 100 ++
 .../aarch64/stack-check-prologue-20.c |   3 +
 4 files changed, 233 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index b942bf3de4a..383b32f2078 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8573,26 +8573,34 @@ aarch64_layout_frame (void)
   bool saves_below_hard_fp_p
 = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
   frame.bytes_below_hard_fp = offset;
+
+  auto allocate_gpr_slot = [&](unsigned int regno)
+{
+  frame.reg_offset[regno] = offset;
+  if (frame.wb_push_candidate1 == INVALID_REGNUM)
+   frame.wb_push_candidate1 = regno;
+  else if (frame.wb_push_candidate2 == INVALID_REGNUM)
+   frame.wb_push_candidate2 = regno;
+  offset += UNITS_PER_WORD;
+};
+
   if (frame.emit_frame_chain)
 {
   /* FP and LR are placed in the linkage record.  */
-  frame.reg_offset[R29_REGNUM] = offset;
-  frame.wb_push_candidate1 = R29_REGNUM;
-  frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD;
-  frame.wb_push_candidate2 = R30_REGNUM;
-  offset += 2 * UNITS_PER_WORD;
+  allocate_gpr_slot (R29_REGNUM);
+  allocate_gpr_slot (R30_REGNUM);
 }
+  else if (flag_stack_clash_protection
+  && known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED))
+/* Put the LR save slot first, since it makes a good choice of probe
+   for stack clash purposes.  The idea is that the link register usually
+   has to be saved before a call anyway, and so we lose little by
+   stopping it from being individually shrink-wrapped.  */
+allocate_gpr_slot (R30_REGNUM);
 
   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
-  {
-   frame.reg_offset[regno] = offset;
-   if (frame.wb_push_candidate1 == INVALID_REGNUM)
- frame.wb_push_candidate1 = regno;
-   else if (frame.wb_push_candidate2 == INVALID_REGNUM)
- frame.wb_push_candidate2 = regno;
-   offset += UNITS_PER_WORD;
-  }
+  allocate_gpr_slot 

[PATCH 13/19] aarch64: Minor initial adjustment tweak

2023-09-12 Thread Richard Sandiford via Gcc-patches
This patch just changes a calculation of initial_adjust
to one that makes it slightly more obvious that the total
adjustment is frame.frame_size.

gcc/
* config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak
calculation of initial_adjust for frames in which all saves
are SVE saves.
---
 gcc/config/aarch64/aarch64.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9578592d256..e40ccc7d1cf 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8714,11 +8714,10 @@ aarch64_layout_frame (void)
 {
   /* Frame in which all saves are SVE saves:
 
-sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
+sub sp, sp, frame_size - bytes_below_saved_regs
 save SVE registers relative to SP
 sub sp, sp, bytes_below_saved_regs  */
-  frame.initial_adjust = (frame.bytes_above_hard_fp
- + frame.below_hard_fp_saved_regs_size);
+  frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs;
   frame.final_adjust = frame.bytes_below_saved_regs;
 }
   else if (frame.bytes_above_hard_fp.is_constant (_above_fp)
-- 
2.25.1



[PATCH 10/19] aarch64: Tweak frame_size comment

2023-09-12 Thread Richard Sandiford via Gcc-patches
This patch fixes another case in which a value was described with
an “upside-down” view.

gcc/
* config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
---
 gcc/config/aarch64/aarch64.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 4a4de9c044e..92965eced0a 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -800,8 +800,8 @@ struct GTY (()) aarch64_frame
  STACK_BOUNDARY.  */
   poly_int64 bytes_above_hard_fp;
 
-  /* The size of the frame.  This value is the offset from base of the
- frame (incomming SP) to the stack_pointer.  This value is always
+  /* The size of the frame, i.e. the number of bytes between the bottom
+ of the outgoing arguments and the incoming SP.  This value is always
  a multiple of STACK_BOUNDARY.  */
   poly_int64 frame_size;
 
-- 
2.25.1



[PATCH 03/19] aarch64: Explicitly handle frames with no saved registers

2023-09-12 Thread Richard Sandiford via Gcc-patches
If a frame has no saved registers, it can be allocated in one go.
There is no need to treat the areas below and above the saved
registers as separate.

And if we allocate the frame in one go, it should be allocated
as the initial_adjust rather than the final_adjust.  This allows the
frame size to grow to guard_size - guard_used_by_caller before a stack
probe is needed.  (A frame with no register saves is necessarily a
leaf frame.)

This is a no-op as thing stand, since a leaf function will have
no outgoing arguments, and so all the frame will be above where
the saved registers normally go.

gcc/
* config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly
allocate the frame in one go if there are no saved registers.
---
 gcc/config/aarch64/aarch64.cc | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9fb94623693..34d0ccc9a67 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8678,9 +8678,11 @@ aarch64_layout_frame (void)
 
   HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
   HOST_WIDE_INT const_saved_regs_size;
-  if (frame.frame_size.is_constant (_size)
-  && const_size < max_push_offset
-  && known_eq (frame.hard_fp_offset, const_size))
+  if (known_eq (frame.saved_regs_size, 0))
+frame.initial_adjust = frame.frame_size;
+  else if (frame.frame_size.is_constant (_size)
+  && const_size < max_push_offset
+  && known_eq (frame.hard_fp_offset, const_size))
 {
   /* Simple, small frame with no outgoing arguments:
 
-- 
2.25.1



[PATCH 11/19] aarch64: Measure reg_offset from the bottom of the frame

2023-09-12 Thread Richard Sandiford via Gcc-patches
reg_offset was measured from the bottom of the saved register area.
This made perfect sense with the original layout, since the bottom
of the saved register area was also the hard frame pointer address.
It became slightly less obvious with SVE, since we save SVE
registers below the hard frame pointer, but it still made sense.

However, if we want to allow different frame layouts, it's more
convenient and obvious to measure reg_offset from the bottom of
the frame.  After previous patches, it's also a slight simplification
in its own right.

gcc/
* config/aarch64/aarch64.h (aarch64_frame): Add comment above
reg_offset.
* config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets
from the bottom of the frame, rather than the bottom of the saved
register area.  Measure reg_offset from the bottom of the frame
rather than the bottom of the saved register area.
(aarch64_save_callee_saves): Update accordingly.
(aarch64_restore_callee_saves): Likewise.
(aarch64_get_separate_components): Likewise.
(aarch64_process_components): Likewise.
---
 gcc/config/aarch64/aarch64.cc | 53 ---
 gcc/config/aarch64/aarch64.h  |  3 ++
 2 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 7d642d06871..ca2e6af5d12 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8439,7 +8439,6 @@ aarch64_needs_frame_chain (void)
 static void
 aarch64_layout_frame (void)
 {
-  poly_int64 offset = 0;
   int regno, last_fp_reg = INVALID_REGNUM;
   machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
   poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
@@ -8517,7 +8516,9 @@ aarch64_layout_frame (void)
   gcc_assert (crtl->is_leaf
  || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
 
-  frame.bytes_below_saved_regs = crtl->outgoing_args_size;
+  poly_int64 offset = crtl->outgoing_args_size;
+  gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
+  frame.bytes_below_saved_regs = offset;
 
   /* Now assign stack slots for the registers.  Start with the predicate
  registers, since predicate LDR and STR have a relatively small
@@ -8529,7 +8530,8 @@ aarch64_layout_frame (void)
offset += BYTES_PER_SVE_PRED;
   }
 
-  if (maybe_ne (offset, 0))
+  poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs;
+  if (maybe_ne (saved_prs_size, 0))
 {
   /* If we have any vector registers to save above the predicate registers,
 the offset of the vector register save slots need to be a multiple
@@ -8547,10 +8549,10 @@ aarch64_layout_frame (void)
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
   else
{
- if (known_le (offset, vector_save_size))
-   offset = vector_save_size;
- else if (known_le (offset, vector_save_size * 2))
-   offset = vector_save_size * 2;
+ if (known_le (saved_prs_size, vector_save_size))
+   offset = frame.bytes_below_saved_regs + vector_save_size;
+ else if (known_le (saved_prs_size, vector_save_size * 2))
+   offset = frame.bytes_below_saved_regs + vector_save_size * 2;
  else
gcc_unreachable ();
}
@@ -8567,9 +8569,10 @@ aarch64_layout_frame (void)
 
   /* OFFSET is now the offset of the hard frame pointer from the bottom
  of the callee save area.  */
-  bool saves_below_hard_fp_p = maybe_ne (offset, 0);
-  frame.below_hard_fp_saved_regs_size = offset;
-  frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
+  frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
+  bool saves_below_hard_fp_p
+= maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
+  frame.bytes_below_hard_fp = offset;
   if (frame.emit_frame_chain)
 {
   /* FP and LR are placed in the linkage record.  */
@@ -8620,9 +8623,10 @@ aarch64_layout_frame (void)
 
   offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
 
-  frame.saved_regs_size = offset;
+  frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
 
-  poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
+  poly_int64 varargs_and_saved_regs_size
+= frame.saved_regs_size + frame.saved_varargs_size;
 
   poly_int64 saved_regs_and_above
 = aligned_upper_bound (varargs_and_saved_regs_size
@@ -9144,9 +9148,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
 
   machine_mode mode = aarch64_reg_save_mode (regno);
   reg = gen_rtx_REG (mode, regno);
-  offset = (frame.reg_offset[regno]
-   + frame.bytes_below_saved_regs
-   - bytes_below_sp);
+  offset = frame.reg_offset[regno] - bytes_below_sp;
   rtx base_rtx = stack_pointer_rtx;
   poly_int64 sp_offset = offset;
 
@@ -9253,9 +9255,7 @@ 

[PATCH 09/19] aarch64: Rename hard_fp_offset to bytes_above_hard_fp

2023-09-12 Thread Richard Sandiford via Gcc-patches
Similarly to the previous locals_offset patch, hard_fp_offset
was described as:

  /* Offset from the base of the frame (incomming SP) to the
 hard_frame_pointer.  This value is always a multiple of
 STACK_BOUNDARY.  */
  poly_int64 hard_fp_offset;

which again took an “upside-down” view: higher offsets meant lower
addresses.  This patch renames the field to bytes_above_hard_fp instead.

gcc/
* config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename
to...
(aarch64_frame::bytes_above_hard_fp): ...this.
* config/aarch64/aarch64.cc (aarch64_layout_frame)
(aarch64_expand_prologue): Update accordingly.
(aarch64_initial_elimination_offset): Likewise.
---
 gcc/config/aarch64/aarch64.cc | 26 +-
 gcc/config/aarch64/aarch64.h  |  6 +++---
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index bcd1dec6f51..7d642d06871 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8629,7 +8629,7 @@ aarch64_layout_frame (void)
   + get_frame_size (),
   STACK_BOUNDARY / BITS_PER_UNIT);
 
-  frame.hard_fp_offset
+  frame.bytes_above_hard_fp
 = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
 
   /* Both these values are already aligned.  */
@@ -8678,13 +8678,13 @@ aarch64_layout_frame (void)
   else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
 max_push_offset = 256;
 
-  HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
+  HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
   HOST_WIDE_INT const_saved_regs_size;
   if (known_eq (frame.saved_regs_size, 0))
 frame.initial_adjust = frame.frame_size;
   else if (frame.frame_size.is_constant (_size)
   && const_size < max_push_offset
-  && known_eq (frame.hard_fp_offset, const_size))
+  && known_eq (frame.bytes_above_hard_fp, const_size))
 {
   /* Simple, small frame with no data below the saved registers.
 
@@ -8701,8 +8701,8 @@ aarch64_layout_frame (void)
  case that it hardly seems worth the effort though.  */
   && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
   && !(cfun->calls_alloca
-   && frame.hard_fp_offset.is_constant (_fp_offset)
-   && const_fp_offset < max_push_offset))
+   && frame.bytes_above_hard_fp.is_constant (_above_fp)
+   && const_above_fp < max_push_offset))
 {
   /* Frame with small area below the saved registers:
 
@@ -8720,12 +8720,12 @@ aarch64_layout_frame (void)
 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
 save SVE registers relative to SP
 sub sp, sp, bytes_below_saved_regs  */
-  frame.initial_adjust = (frame.hard_fp_offset
+  frame.initial_adjust = (frame.bytes_above_hard_fp
  + frame.below_hard_fp_saved_regs_size);
   frame.final_adjust = frame.bytes_below_saved_regs;
 }
-  else if (frame.hard_fp_offset.is_constant (_fp_offset)
-  && const_fp_offset < max_push_offset)
+  else if (frame.bytes_above_hard_fp.is_constant (_above_fp)
+  && const_above_fp < max_push_offset)
 {
   /* Frame with large area below the saved registers, or with SVE saves,
 but with a small area above:
@@ -8735,7 +8735,7 @@ aarch64_layout_frame (void)
 [sub sp, sp, below_hard_fp_saved_regs_size]
 [save SVE registers relative to SP]
 sub sp, sp, bytes_below_saved_regs  */
-  frame.callee_adjust = const_fp_offset;
+  frame.callee_adjust = const_above_fp;
   frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
   frame.final_adjust = frame.bytes_below_saved_regs;
 }
@@ -8750,7 +8750,7 @@ aarch64_layout_frame (void)
 [sub sp, sp, below_hard_fp_saved_regs_size]
 [save SVE registers relative to SP]
 sub sp, sp, bytes_below_saved_regs  */
-  frame.initial_adjust = frame.hard_fp_offset;
+  frame.initial_adjust = frame.bytes_above_hard_fp;
   frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
   frame.final_adjust = frame.bytes_below_saved_regs;
 }
@@ -10118,7 +10118,7 @@ aarch64_expand_prologue (void)
 {
   /* The offset of the frame chain record (if any) from the current SP.  */
   poly_int64 chain_offset = (initial_adjust + callee_adjust
-- frame.hard_fp_offset);
+- frame.bytes_above_hard_fp);
   gcc_assert (known_ge (chain_offset, 0));
 
   if (callee_adjust == 0)
@@ -12851,10 +12851,10 @@ aarch64_initial_elimination_offset (unsigned from, 
unsigned to)
   if (to == HARD_FRAME_POINTER_REGNUM)
 {
   if (from == ARG_POINTER_REGNUM)
-   return frame.hard_fp_offset;
+   return frame.bytes_above_hard_fp;
 
   if (from == FRAME_POINTER_REGNUM)

[PATCH 1/2] MATCH: [PR111364] Add some more minmax cmp operand simplifications

2023-09-12 Thread Andrew Pinski via Gcc-patches
This adds a few more minmax cmp operand simplifications which were missed 
before.
`MIN(a,b) < a` -> `a > b`
`MIN(a,b) >= a` -> `a <= b`
`MAX(a,b) > a` -> `a < b`
`MAX(a,b) <= a` -> `a >= b`

OK? Bootstrapped and tested on x86_64-linux-gnu.

Note gcc.dg/pr96708-negative.c needed to updated to remove the
check for MIN/MAX as they have been optimized (correctly) away.

PR tree-optimization/111364

gcc/ChangeLog:

* match.pd (`MIN (X, Y) == X`): Extend
to min/lt, min/ge, max/gt, max/le.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/minmaxcmp-1.c: New test.
* gcc.dg/tree-ssa/minmaxcmp-2.c: New test.
* gcc.dg/pr96708-negative.c: Update testcase.
* gcc.dg/pr96708-positive.c: Add comment about `return 0`.
---
 gcc/match.pd  |  8 +--
 .../gcc.c-torture/execute/minmaxcmp-1.c   | 51 +++
 gcc/testsuite/gcc.dg/pr96708-negative.c   |  4 +-
 gcc/testsuite/gcc.dg/pr96708-positive.c   |  1 +
 gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-2.c   | 30 +++
 5 files changed, 89 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/minmaxcmp-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-2.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 51985c1bad4..36e3da4841b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3902,9 +3902,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (maxmin @0 (bit_not @1
 
 /* MIN (X, Y) == X -> X <= Y  */
-(for minmax (min min max max)
- cmp(eq  ne  eq  ne )
- out(le  gt  ge  lt )
+/* MIN (X, Y) < X -> X > Y  */
+/* MIN (X, Y) >= X -> X <= Y  */
+(for minmax (min min min min max max max max)
+ cmp(eq  ne  lt  ge  eq  ne  gt  le )
+ out(le  gt  gt  le  ge  lt  lt  ge )
  (simplify
   (cmp:c (minmax:c @0 @1) @0)
   (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)))
diff --git a/gcc/testsuite/gcc.c-torture/execute/minmaxcmp-1.c 
b/gcc/testsuite/gcc.c-torture/execute/minmaxcmp-1.c
new file mode 100644
index 000..6705a053768
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/minmaxcmp-1.c
@@ -0,0 +1,51 @@
+#define func(vol, op1, op2)\
+_Bool op1##_##op2##_##vol (int a, int b)   \
+{  \
+ vol int x = op_##op1(a, b);   \
+ return op_##op2(x, a);\
+}
+
+#define op_lt(a, b) ((a) < (b))
+#define op_le(a, b) ((a) <= (b))
+#define op_eq(a, b) ((a) == (b))
+#define op_ne(a, b) ((a) != (b))
+#define op_gt(a, b) ((a) > (b))
+#define op_ge(a, b) ((a) >= (b))
+#define op_min(a, b) ((a) < (b) ? (a) : (b))
+#define op_max(a, b) ((a) > (b) ? (a) : (b))
+
+
+#define funcs(a) \
+ a(min,lt) \
+ a(max,lt) \
+ a(min,gt) \
+ a(max,gt) \
+ a(min,le) \
+ a(max,le) \
+ a(min,ge) \
+ a(max,ge) \
+ a(min,ne) \
+ a(max,ne) \
+ a(min,eq) \
+ a(max,eq)
+
+#define funcs1(a,b) \
+func(,a,b) \
+func(volatile,a,b)
+
+funcs(funcs1)
+
+#define test(op1,op2)   \
+do {\
+  if (op1##_##op2##_(x,y) != op1##_##op2##_volatile(x,y))   \
+__builtin_abort();  \
+} while(0);
+
+int main()
+{
+  for(int x = -10; x < 10; x++)
+for(int y = -10; y < 10; y++)
+{
+funcs(test)
+}
+}
diff --git a/gcc/testsuite/gcc.dg/pr96708-negative.c 
b/gcc/testsuite/gcc.dg/pr96708-negative.c
index 91964d3b971..c9c1aa85558 100644
--- a/gcc/testsuite/gcc.dg/pr96708-negative.c
+++ b/gcc/testsuite/gcc.dg/pr96708-negative.c
@@ -42,7 +42,7 @@ int main()
 return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "MAX_EXPR" 2 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "optimized" } } */
+/* Even though test[1-4] originally has MIN/MAX, those can be optimized away
+   into just comparing a and b arguments. */
 /* { dg-final { scan-tree-dump-times "return 0;" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-not { "return 1;" } "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/pr96708-positive.c 
b/gcc/testsuite/gcc.dg/pr96708-positive.c
index 65af85344b6..12c5fedfd30 100644
--- a/gcc/testsuite/gcc.dg/pr96708-positive.c
+++ b/gcc/testsuite/gcc.dg/pr96708-positive.c
@@ -42,6 +42,7 @@ int main()
 return 0;
 }
 
+/* Note main has one `return 0`. */
 /* { dg-final { scan-tree-dump-times "return 0;" 3 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "return 1;" 2 "optimized" } } */
 /* { dg-final { scan-tree-dump-not { "MAX_EXPR" } "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-2.c
new file mode 100644
index 000..f64a9253cfb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-original" } */
+/* PR tree-optimization/111364 */
+
+#define min1(a, b) ((a) < (b) ? (a) : (b))
+#define max1(a, b) ((a) > (b) ? (a) : (b))
+
+int minlt(int a, int b)
+{
+return min1(a, b) < a; // b < a or a > b

[PATCH 06/19] aarch64: Tweak aarch64_save/restore_callee_saves

2023-09-12 Thread Richard Sandiford via Gcc-patches
aarch64_save_callee_saves and aarch64_restore_callee_saves took
a parameter called start_offset that gives the offset of the
bottom of the saved register area from the current stack pointer.
However, it's more convenient for later patches if we use the
bottom of the entire frame as the reference point, rather than
the bottom of the saved registers.

Doing that removes the need for the callee_offset field.
Other than that, this is not a win on its own.  It only really
makes sense in combination with the follow-on patches.

gcc/
* config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete.
* config/aarch64/aarch64.cc (aarch64_layout_frame): Remove
callee_offset handling.
(aarch64_save_callee_saves): Replace the start_offset parameter
with a bytes_below_sp parameter.
(aarch64_restore_callee_saves): Likewise.
(aarch64_expand_prologue): Update accordingly.
(aarch64_expand_epilogue): Likewise.
---
 gcc/config/aarch64/aarch64.cc | 56 +--
 gcc/config/aarch64/aarch64.h  |  4 ---
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 58dd8946232..2c218c90906 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8643,7 +8643,6 @@ aarch64_layout_frame (void)
   frame.final_adjust = 0;
   frame.callee_adjust = 0;
   frame.sve_callee_adjust = 0;
-  frame.callee_offset = 0;
 
   frame.wb_pop_candidate1 = frame.wb_push_candidate1;
   frame.wb_pop_candidate2 = frame.wb_push_candidate2;
@@ -8711,7 +8710,6 @@ aarch64_layout_frame (void)
 stp reg1, reg2, [sp, bytes_below_saved_regs]
 stp reg3, reg4, [sp, bytes_below_saved_regs + 16]  */
   frame.initial_adjust = frame.frame_size;
-  frame.callee_offset = const_below_saved_regs;
 }
   else if (saves_below_hard_fp_p
   && known_eq (frame.saved_regs_size,
@@ -9112,12 +9110,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
 }
 
 /* Emit code to save the callee-saved registers from register number START
-   to LIMIT to the stack at the location starting at offset START_OFFSET,
-   skipping any write-back candidates if SKIP_WB is true.  HARD_FP_VALID_P
-   is true if the hard frame pointer has been set up.  */
+   to LIMIT to the stack.  The stack pointer is currently BYTES_BELOW_SP
+   bytes above the bottom of the static frame.  Skip any write-back
+   candidates if SKIP_WB is true.  HARD_FP_VALID_P is true if the hard
+   frame pointer has been set up.  */
 
 static void
-aarch64_save_callee_saves (poly_int64 start_offset,
+aarch64_save_callee_saves (poly_int64 bytes_below_sp,
   unsigned start, unsigned limit, bool skip_wb,
   bool hard_fp_valid_p)
 {
@@ -9145,7 +9144,9 @@ aarch64_save_callee_saves (poly_int64 start_offset,
 
   machine_mode mode = aarch64_reg_save_mode (regno);
   reg = gen_rtx_REG (mode, regno);
-  offset = start_offset + frame.reg_offset[regno];
+  offset = (frame.reg_offset[regno]
+   + frame.bytes_below_saved_regs
+   - bytes_below_sp);
   rtx base_rtx = stack_pointer_rtx;
   poly_int64 sp_offset = offset;
 
@@ -9156,9 +9157,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
   else if (GP_REGNUM_P (regno)
   && (!offset.is_constant (_offset) || const_offset >= 512))
{
- gcc_assert (known_eq (start_offset, 0));
- poly_int64 fp_offset
-   = frame.below_hard_fp_saved_regs_size;
+ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp;
  if (hard_fp_valid_p)
base_rtx = hard_frame_pointer_rtx;
  else
@@ -9222,12 +9221,13 @@ aarch64_save_callee_saves (poly_int64 start_offset,
 }
 
 /* Emit code to restore the callee registers from register number START
-   up to and including LIMIT.  Restore from the stack offset START_OFFSET,
-   skipping any write-back candidates if SKIP_WB is true.  Write the
-   appropriate REG_CFA_RESTORE notes into CFI_OPS.  */
+   up to and including LIMIT.  The stack pointer is currently BYTES_BELOW_SP
+   bytes above the bottom of the static frame.  Skip any write-back
+   candidates if SKIP_WB is true.  Write the appropriate REG_CFA_RESTORE
+   notes into CFI_OPS.  */
 
 static void
-aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
+aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
  unsigned limit, bool skip_wb, rtx *cfi_ops)
 {
   aarch64_frame  = cfun->machine->frame;
@@ -9253,7 +9253,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, 
unsigned start,
 
   machine_mode mode = aarch64_reg_save_mode (regno);
   reg = gen_rtx_REG (mode, regno);
-  offset = start_offset + frame.reg_offset[regno];
+  offset = (frame.reg_offset[regno]
+   + frame.bytes_below_saved_regs
+   - 

[PATCH 02/19] aarch64: Avoid a use of callee_offset

2023-09-12 Thread Richard Sandiford via Gcc-patches
When we emit the frame chain, i.e. when we reach Here in this statement
of aarch64_expand_prologue:

  if (emit_frame_chain)
{
  // Here
  ...
}

the stack is in one of two states:

- We've allocated up to the frame chain, but no more.

- We've allocated the whole frame, and the frame chain is within easy
  reach of the new SP.

The offset of the frame chain from the current SP is available
in aarch64_frame as callee_offset.  It is also available as the
chain_offset local variable, where the latter is calculated from other
data.  (However, chain_offset is not always equal to callee_offset when
!emit_frame_chain, so chain_offset isn't redundant.)

In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using
chain_offset for the initialisation of the hard frame pointer:

   aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
- stack_pointer_rtx, callee_offset,
+ stack_pointer_rtx, chain_offset,
  tmp1_rtx, tmp0_rtx, frame_pointer_needed);

But the later REG_CFA_ADJUST_CFA handling still used callee_offset.

I think the difference is harmless, but it's more logical for the
CFA note to be in sync, and it's more convenient for later patches
if it uses chain_offset.

gcc/
* config/aarch64/aarch64.cc (aarch64_expand_prologue): Use
chain_offset rather than callee_offset.
---
 gcc/config/aarch64/aarch64.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index b91f77d7b1f..9fb94623693 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -10034,7 +10034,6 @@ aarch64_expand_prologue (void)
   poly_int64 initial_adjust = frame.initial_adjust;
   HOST_WIDE_INT callee_adjust = frame.callee_adjust;
   poly_int64 final_adjust = frame.final_adjust;
-  poly_int64 callee_offset = frame.callee_offset;
   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
   poly_int64 below_hard_fp_saved_regs_size
 = frame.below_hard_fp_saved_regs_size;
@@ -10147,8 +10146,7 @@ aarch64_expand_prologue (void)
 implicit.  */
  if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
{
- rtx src = plus_constant (Pmode, stack_pointer_rtx,
-  callee_offset);
+ rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset);
  add_reg_note (insn, REG_CFA_ADJUST_CFA,
gen_rtx_SET (hard_frame_pointer_rtx, src));
}
-- 
2.25.1



[PATCH 12/19] aarch64: Simplify top of frame allocation

2023-09-12 Thread Richard Sandiford via Gcc-patches
After previous patches, it no longer really makes sense to allocate
the top of the frame in terms of varargs_and_saved_regs_size and
saved_regs_and_above.

gcc/
* config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify
the allocation of the top of the frame.
---
 gcc/config/aarch64/aarch64.cc | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index ca2e6af5d12..9578592d256 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8625,23 +8625,16 @@ aarch64_layout_frame (void)
 
   frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
 
-  poly_int64 varargs_and_saved_regs_size
-= frame.saved_regs_size + frame.saved_varargs_size;
-
-  poly_int64 saved_regs_and_above
-= aligned_upper_bound (varargs_and_saved_regs_size
-  + get_frame_size (),
-  STACK_BOUNDARY / BITS_PER_UNIT);
-
-  frame.bytes_above_hard_fp
-= saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
+  offset += get_frame_size ();
+  offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+  auto top_of_locals = offset;
 
-  /* Both these values are already aligned.  */
-  gcc_assert (multiple_p (frame.bytes_below_saved_regs,
- STACK_BOUNDARY / BITS_PER_UNIT));
-  frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
+  offset += frame.saved_varargs_size;
+  gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
+  frame.frame_size = offset;
 
-  frame.bytes_above_locals = frame.saved_varargs_size;
+  frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
+  frame.bytes_above_locals = frame.frame_size - top_of_locals;
 
   frame.initial_adjust = 0;
   frame.final_adjust = 0;
-- 
2.25.1



[PATCH 07/19] aarch64: Only calculate chain_offset if there is a chain

2023-09-12 Thread Richard Sandiford via Gcc-patches
After previous patches, it is no longer necessary to calculate
a chain_offset in cases where there is no chain record.

gcc/
* config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the
calculation of chain_offset into the emit_frame_chain block.
---
 gcc/config/aarch64/aarch64.cc | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 2c218c90906..25b5fb243a6 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -10111,16 +10111,16 @@ aarch64_expand_prologue (void)
   if (callee_adjust != 0)
 aarch64_push_regs (reg1, reg2, callee_adjust);
 
-  /* The offset of the frame chain record (if any) from the current SP.  */
-  poly_int64 chain_offset = (initial_adjust + callee_adjust
-- frame.hard_fp_offset);
-  gcc_assert (known_ge (chain_offset, 0));
-
   /* The offset of the current SP from the bottom of the static frame.  */
   poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
 
   if (emit_frame_chain)
 {
+  /* The offset of the frame chain record (if any) from the current SP.  */
+  poly_int64 chain_offset = (initial_adjust + callee_adjust
+- frame.hard_fp_offset);
+  gcc_assert (known_ge (chain_offset, 0));
+
   if (callee_adjust == 0)
{
  reg1 = R29_REGNUM;
-- 
2.25.1



[PATCH 05/19] aarch64: Add bytes_below_hard_fp to frame info

2023-09-12 Thread Richard Sandiford via Gcc-patches
Following on from the previous bytes_below_saved_regs patch, this one
records the number of bytes that are below the hard frame pointer.
This eventually replaces below_hard_fp_saved_regs_size.

If a frame pointer is not needed, the epilogue adds final_adjust
to the stack pointer before restoring registers:

 aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);

Therefore, if the epilogue needs to restore the stack pointer from
the hard frame pointer, the directly corresponding offset is:

 -bytes_below_hard_fp + final_adjust

i.e. go from the hard frame pointer to the bottom of the frame,
then add the same amount as if we were using the stack pointer
from the outset.

gcc/
* config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
field.
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it.
(aarch64_expand_epilogue): Use it instead of
below_hard_fp_saved_regs_size.
---
 gcc/config/aarch64/aarch64.cc | 6 +++---
 gcc/config/aarch64/aarch64.h  | 5 +
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 49c2fbedd14..58dd8946232 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8569,6 +8569,7 @@ aarch64_layout_frame (void)
  of the callee save area.  */
   bool saves_below_hard_fp_p = maybe_ne (offset, 0);
   frame.below_hard_fp_saved_regs_size = offset;
+  frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
   if (frame.emit_frame_chain)
 {
   /* FP and LR are placed in the linkage record.  */
@@ -10220,8 +10221,7 @@ aarch64_expand_epilogue (bool for_sibcall)
   poly_int64 final_adjust = frame.final_adjust;
   poly_int64 callee_offset = frame.callee_offset;
   poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
-  poly_int64 below_hard_fp_saved_regs_size
-= frame.below_hard_fp_saved_regs_size;
+  poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
   unsigned reg1 = frame.wb_pop_candidate1;
   unsigned reg2 = frame.wb_pop_candidate2;
   unsigned int last_gpr = (frame.is_scs_enabled
@@ -10279,7 +10279,7 @@ aarch64_expand_epilogue (bool for_sibcall)
is restored on the instruction doing the writeback.  */
 aarch64_add_offset (Pmode, stack_pointer_rtx,
hard_frame_pointer_rtx,
-   -callee_offset - below_hard_fp_saved_regs_size,
+   -bytes_below_hard_fp + final_adjust,
tmp1_rtx, tmp0_rtx, callee_adjust == 0);
   else
  /* The case where we need to re-use the register here is very rare, so
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 83939991eb1..75fd3b59b0d 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -785,6 +785,11 @@ struct GTY (()) aarch64_frame
  are saved below the hard frame pointer.  */
   poly_int64 below_hard_fp_saved_regs_size;
 
+  /* The number of bytes between the bottom of the static frame (the bottom
+ of the outgoing arguments) and the hard frame pointer.  This value is
+ always a multiple of STACK_BOUNDARY.  */
+  poly_int64 bytes_below_hard_fp;
+
   /* Offset from the base of the frame (incomming SP) to the
  top of the locals area.  This value is always a multiple of
  STACK_BOUNDARY.  */
-- 
2.25.1



[PATCH 01/19] aarch64: Use local frame vars in shrink-wrapping code

2023-09-12 Thread Richard Sandiford via Gcc-patches
aarch64_layout_frame uses a shorthand for referring to
cfun->machine->frame:

  aarch64_frame  = cfun->machine->frame;

This patch does the same for some other heavy users of the structure.
No functional change intended.

gcc/
* config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use
a local shorthand for cfun->machine->frame.
(aarch64_restore_callee_saves, aarch64_get_separate_components):
(aarch64_process_components): Likewise.
(aarch64_allocate_and_probe_stack_space): Likewise.
(aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
(aarch64_layout_frame): Use existing shorthand for one more case.
---
 gcc/config/aarch64/aarch64.cc | 123 ++
 1 file changed, 64 insertions(+), 59 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 37d414021ca..b91f77d7b1f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8651,7 +8651,7 @@ aarch64_layout_frame (void)
   frame.is_scs_enabled
 = (!crtl->calls_eh_return
&& sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK)
-   && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0));
+   && known_ge (frame.reg_offset[LR_REGNUM], 0));
 
   /* When shadow call stack is enabled, the scs_pop in the epilogue will
  restore x30, and we don't need to pop x30 again in the traditional
@@ -9117,6 +9117,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
   unsigned start, unsigned limit, bool skip_wb,
   bool hard_fp_valid_p)
 {
+  aarch64_frame  = cfun->machine->frame;
   rtx_insn *insn;
   unsigned regno;
   unsigned regno2;
@@ -9131,8 +9132,8 @@ aarch64_save_callee_saves (poly_int64 start_offset,
   bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
 
   if (skip_wb
- && (regno == cfun->machine->frame.wb_push_candidate1
- || regno == cfun->machine->frame.wb_push_candidate2))
+ && (regno == frame.wb_push_candidate1
+ || regno == frame.wb_push_candidate2))
continue;
 
   if (cfun->machine->reg_is_wrapped_separately[regno])
@@ -9140,7 +9141,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
 
   machine_mode mode = aarch64_reg_save_mode (regno);
   reg = gen_rtx_REG (mode, regno);
-  offset = start_offset + cfun->machine->frame.reg_offset[regno];
+  offset = start_offset + frame.reg_offset[regno];
   rtx base_rtx = stack_pointer_rtx;
   poly_int64 sp_offset = offset;
 
@@ -9153,7 +9154,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
{
  gcc_assert (known_eq (start_offset, 0));
  poly_int64 fp_offset
-   = cfun->machine->frame.below_hard_fp_saved_regs_size;
+   = frame.below_hard_fp_saved_regs_size;
  if (hard_fp_valid_p)
base_rtx = hard_frame_pointer_rtx;
  else
@@ -9175,8 +9176,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
  && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
  && !cfun->machine->reg_is_wrapped_separately[regno2]
  && known_eq (GET_MODE_SIZE (mode),
-  cfun->machine->frame.reg_offset[regno2]
-  - cfun->machine->frame.reg_offset[regno]))
+  frame.reg_offset[regno2] - frame.reg_offset[regno]))
{
  rtx reg2 = gen_rtx_REG (mode, regno2);
  rtx mem2;
@@ -9226,6 +9226,7 @@ static void
 aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
  unsigned limit, bool skip_wb, rtx *cfi_ops)
 {
+  aarch64_frame  = cfun->machine->frame;
   unsigned regno;
   unsigned regno2;
   poly_int64 offset;
@@ -9242,13 +9243,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, 
unsigned start,
   rtx reg, mem;
 
   if (skip_wb
- && (regno == cfun->machine->frame.wb_pop_candidate1
- || regno == cfun->machine->frame.wb_pop_candidate2))
+ && (regno == frame.wb_pop_candidate1
+ || regno == frame.wb_pop_candidate2))
continue;
 
   machine_mode mode = aarch64_reg_save_mode (regno);
   reg = gen_rtx_REG (mode, regno);
-  offset = start_offset + cfun->machine->frame.reg_offset[regno];
+  offset = start_offset + frame.reg_offset[regno];
   rtx base_rtx = stack_pointer_rtx;
   if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
@@ -9259,8 +9260,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, 
unsigned start,
  && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
  && !cfun->machine->reg_is_wrapped_separately[regno2]
  && known_eq (GET_MODE_SIZE (mode),
-  cfun->machine->frame.reg_offset[regno2]
-  - cfun->machine->frame.reg_offset[regno]))
+  

[PATCH 00/19] aarch64: Fix -fstack-protector issue

2023-09-12 Thread Richard Sandiford via Gcc-patches
This series of patches fixes deficiencies in GCC's -fstack-protector
implementation for AArch64 when using dynamically allocated stack space.
This is CVE-2023-4039.  See:

https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64
https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf

for more details.

The fix is to put the saved registers above the locals area when
-fstack-protector is used.

The series also fixes a stack-clash problem that I found while working
on the CVE.  In unpatched sources, the stack-clash problem would only
trigger for unrealistic numbers of arguments (8K 64-bit arguments, or an
equivalent).  But it would be a more significant issue with the new
-fstack-protector frame layout.  It's therefore important that both
problems are fixed together.

Some reorganisation of the code seemed necessary to fix the problems in a
cleanish way.  The series is therefore quite long, but only a handful of
patches should have any effect on code generation.

See the individual patches for a detailed description.

Tested on aarch64-linux-gnu. Pushed to trunk and to all active branches.
I've also pushed backports to GCC 7+ to vendors/ARM/heads/CVE-2023-4039.

Richard Sandiford (19):
  aarch64: Use local frame vars in shrink-wrapping code
  aarch64: Avoid a use of callee_offset
  aarch64: Explicitly handle frames with no saved registers
  aarch64: Add bytes_below_saved_regs to frame info
  aarch64: Add bytes_below_hard_fp to frame info
  aarch64: Tweak aarch64_save/restore_callee_saves
  aarch64: Only calculate chain_offset if there is a chain
  aarch64: Rename locals_offset to bytes_above_locals
  aarch64: Rename hard_fp_offset to bytes_above_hard_fp
  aarch64: Tweak frame_size comment
  aarch64: Measure reg_offset from the bottom of the frame
  aarch64: Simplify top of frame allocation
  aarch64: Minor initial adjustment tweak
  aarch64: Tweak stack clash boundary condition
  aarch64: Put LR save probe in first 16 bytes
  aarch64: Simplify probe of final frame allocation
  aarch64: Explicitly record probe registers in frame info
  aarch64: Remove below_hard_fp_saved_regs_size
  aarch64: Make stack smash canary protect saved registers

 gcc/config/aarch64/aarch64.cc | 518 ++
 gcc/config/aarch64/aarch64.h  |  44 +-
 .../aarch64/stack-check-prologue-17.c |  55 ++
 .../aarch64/stack-check-prologue-18.c | 100 
 .../aarch64/stack-check-prologue-19.c | 100 
 .../aarch64/stack-check-prologue-20.c |   3 +
 .../gcc.target/aarch64/stack-protector-8.c|  95 
 .../gcc.target/aarch64/stack-protector-9.c|  33 ++
 .../aarch64/sve/pcs/stack_clash_3.c   |   6 +-
 9 files changed, 699 insertions(+), 255 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c

-- 
2.25.1



gcc-patches From rewriting mailman settings (Was: [Linaro-TCWG-CI] gcc patch #75674: FAIL: 68 regressions)

2023-09-12 Thread Mark Wielaard
Hi Maxim,

Adding Jeff to CC who is the official gcc-patches mailinglist admin.

On Tue, 2023-09-12 at 11:08 +0400, Maxim Kuvyrkov wrote:
> Normally, notifications from Linaro TCWG precommit CI are sent only to
> patch author and patch submitter.  In this case the sender was rewritten
> to "Benjamin Priour via Gcc-patches ",
> which was detected by Patchwork [1] as patch submitter.

BTW. Really looking forward to your talk at Cauldron about this!

> Is "From:" re-write on gcc-patches@ mailing list a side-effect of [2]?
> I see that some, but not all messages to gcc-patches@ have their
> "From:" re-written.
> 
> Also, do you know if re-write of "From:" on gcc-patches@ is expected?

Yes, it is expected for emails that come from domains with a dmarc
policy. That is because the current settings of the gcc-patches
mailinglist might slightly alter the message or headers in a way that
invalidates the DKIM signature. Without From rewriting those messages
would be bounced by recipients that check the dmarc policy/dkim
signature.

As you noticed the glibc hackers have recently worked together with the
sourceware overseers to upgrade mailman and alter the postfix and the
libc-alpha mailinglist setting so it doesn't require From rewriting
anymore (the message and header aren't altered anymore to invalidate
the DKIM signatures).

We (Jeff or anyone else with mailman admin privs) could use the same
settings for gcc-patches. The settings that need to be set are in that
bug:

- subject_prefix (general): (empty)
- from_is_list (general): No
- anonymous_list (general): No
- first_strip_reply_to (general): No
- reply_goes_to_list (general): Poster
- reply_to_address (general): (empty)
- include_sender_header (general): No
- drop_cc (general): No
- msg_header (nondigest): (empty)
- msg_footer (nondigest): (empty)
- scrub_nondigest (nondigest): No
- dmarc_moderation_action (privacy): Accept
- filter_content (contentfilter): No

The only visible change (apart from no more From rewriting) is that
HTML multi-parts aren't scrubbed anymore (that would be a message
altering issue). The html part is still scrubbed from the
inbox.sourceware.org archive, so b4 works just fine. But I don't know
what patchwork.sourceware.org does with HTML attachements. Of course
people really shouldn't sent HTML attachments to gcc-patches, so maybe
this is no real problem.

Let me know if you want Jeff (or me or one of the other overseers) make
the above changes to the gcc-patches mailman settings.

Cheers,

Mark

> [1] https://patchwork.sourceware.org/project/gcc/list/
> [2] https://sourceware.org/bugzilla/show_bug.cgi?id=29713



[PATCH] libgomp, nvptx, amdgcn: parallel reverse offload

2023-09-12 Thread Andrew Stubbs

Hi all,

This patch implements parallel execution of OpenMP reverse offload kernels.

The first problem was that GPU device kernels may request reverse 
offload (via the "ancestor" clause) once for each running offload thread 
-- of which there may be thousands -- and the existing implementation 
ran each request serially, whilst blocking all other I/O from that 
device kernel.


The second problem was that the NVPTX plugin runs the reverse offload 
kernel in the context of whichever host thread sees the request first, 
regardless of which kernel originated the request. This is probably 
logically harmless, but may lead to surprising timing when it blocks the 
wrong kernel from exiting until the reverse offload is done. It was also 
only capable of receiving and processing a single request at a time, 
across all running kernels. (GCN did not have these problems.)


Both problems are now solved by making the reverse offload requests 
asynchronous. The host threads still recieve the requests in the same 
way, but instead of running them inline the request is queued for 
execution later in another thread. The requests are then consumed from 
the message passing buffer imediately (allowing I/O to continue, in the 
case of GCN). The device threads that sent requests are still blocked 
waiting for the completion signal, but any other threads may continue as 
usual.


The queued requests are processed by a thread pool created on demand and 
limited by a new environment variable GOMP_REVERSE_OFFLOAD_THREADS. By 
this means reverse offload should become much less of a bottleneck.


In the process of this work I have found and fixed a couple of 
target-specific issues. NVPTX asynchronous streams were independent of 
each other, but still synchronous w.r.t. the default NULL stream. Some 
GCN devices (at least gfx908) seem to have a race condition in the 
message passing system whereby the cache write-back triggered by 
__ATOMIC_RELEASE occurs slower than the atomically written value.


OK for mainline?

Andrewlibgomp: parallel reverse offload

Extend OpenMP reverse offload support to allow running the host kernels
on multiple threads.  The device plugin API for reverse offload is now made
non-blocking, meaning that running the host kernel in the wrong device
context is no longer a problem.  The NVPTX message passing interface now
uses a ring buffer aproximately matching GCN.

include/ChangeLog:

* gomp-constants.h (GOMP_VERSION): Bump.

libgomp/ChangeLog:

* config/gcn/target.c (GOMP_target_ext): Add "signal" field.
Fix atomics race condition.
* config/nvptx/libgomp-nvptx.h (REV_OFFLOAD_QUEUE_SIZE): New define.
(struct rev_offload): Implement ring buffer.
* config/nvptx/target.c (GOMP_target_ext): Likewise.
* env.c (initialize_env): Read GOMP_REVERSE_OFFLOAD_THREADS.
* libgomp-plugin.c (GOMP_PLUGIN_target_rev): Replace "aq" parameter
with "signal" and "use_aq".
* libgomp-plugin.h (GOMP_PLUGIN_target_rev): Likewise.
* libgomp.h (gomp_target_rev): Likewise.
* plugin/plugin-gcn.c (process_reverse_offload): Add "signal".
(console_output): Pass signal value through.
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_openacc_async_construct):
Attach new threads to the numbered device.
Change the flag to CU_STREAM_NON_BLOCKING.
(GOMP_OFFLOAD_run): Implement ring-buffer and remove signalling.
* target.c (gomp_target_rev): Rename to ...
(gomp_target_rev_internal): ... this, and change "dev_num" to
"devicep".
(gomp_target_rev_worker_thread): New function.
(gomp_target_rev): New function (old name).
* libgomp.texi: Document GOMP_REVERSE_OFFLOAD_THREADS.
* testsuite/libgomp.c/reverse-offload-threads-1.c: New test.
* testsuite/libgomp.c/reverse-offload-threads-2.c: New test.

diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index 8d4e8e81303..7ce07508e9d 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -314,7 +314,7 @@ enum gomp_map_kind
 /* Versions of libgomp and device-specific plugins.  GOMP_VERSION
should be incremented whenever an ABI-incompatible change is introduced
to the plugin interface defined in libgomp/libgomp.h.  */
-#define GOMP_VERSION   2
+#define GOMP_VERSION   3
 #define GOMP_VERSION_NVIDIA_PTX 1
 #define GOMP_VERSION_GCN 3
 
diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c
index ea5eb1ff5ed..906b04ca41e 100644
--- a/libgomp/config/gcn/target.c
+++ b/libgomp/config/gcn/target.c
@@ -103,19 +103,38 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t 
mapnum,
   <= (index - 1024))
   asm ("s_sleep 64");
 
+  /* In theory, it should be enough to write "written" with __ATOMIC_RELEASE,
+ and have the rest of the data flushed to memory automatically, but some
+ devices (gfx908) seem to have a race condition where the flushed data
+ 

[PATCH 14/13] libstdc++: Re-initialize static data files used by tests

2023-09-12 Thread Jonathan Wakely via Gcc-patches
This fixes the problem observed with some filebuf tests.

The "@require@" string seems a bit hacky, as I don't know why that
string is in the tests in the first palce ... but it is there, so this
works.

-- > 8--

Some tests rely on text files with specific content being present in the
test directory. Because the tests modify those files, running the same
test more than once in the same directory will FAIL because the content
of the file is not in the expected state.

This uses a "@require@" marker that happens to be present in those tests
to decide when we need to copy the original files into the test dir
again, so that repeated tests always see the initial file content.

libstdc++-v3/ChangeLog:

* testsuite/lib/libstdc++.exp (v3-init-data-files): New proc.
(libstdc++_init): Use v3-init-data-files.
(v3-dg-runtest): Use v3-init-data-files to update test data
files for repeated tests.
---
 libstdc++-v3/testsuite/lib/libstdc++.exp | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/testsuite/lib/libstdc++.exp 
b/libstdc++-v3/testsuite/lib/libstdc++.exp
index 2c497707184..daace4c1d59 100644
--- a/libstdc++-v3/testsuite/lib/libstdc++.exp
+++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
@@ -102,6 +102,12 @@ proc v3-copy-files {srcfiles} {
 }
 }
 
+proc v3-init-data-files { } {
+global srcdir
+v3-copy-files [glob -nocomplain "$srcdir/data/*.tst"]
+v3-copy-files [glob -nocomplain "$srcdir/data/*.txt"]
+}
+
 # Called once, during runtest.exp setup.
 proc libstdc++_init { testfile } {
 global env
@@ -159,8 +165,7 @@ proc libstdc++_init { testfile } {
 set dg-do-what-default run
 
 # Copy any required data files.
-v3-copy-files [glob -nocomplain "$srcdir/data/*.tst"]
-v3-copy-files [glob -nocomplain "$srcdir/data/*.txt"]
+v3-init-data-files
 
 set ld_library_path_tmp ""
 
@@ -556,11 +561,26 @@ proc v3-dg-runtest { testcases flags default-extra-flags 
} {
set option_list { "" }
}
 
+   # Some tests (e.g. 27_io/basic_filebuf/seek{off,pos}/char/[12]-io.cc)
+   # rely on text files with specific data being present in the test dir.
+   # Because the tests modify those files, running the same test a second
+   # time will FAIL due to the files not being in their initial state.
+   # We rely on the fact that those files contain a "@require@" comment
+   # to trigger creating fresh copies of the files for repeated tests.
+   if [search_for $test "@require@"] {
+   set need_fresh_data_files [llength $option_list]
+   } else {
+   set need_fresh_data_files 0
+   }
+
set nshort [file tail [file dirname $test]]/[file tail $test]
 
foreach flags_t $option_list {
verbose "Testing $nshort, $flags $flags_t" 1
dg-test $test "$flags $flags_t" ${default-extra-flags}
+   if { $need_fresh_data_files > 1 } {
+   v3-init-data-files
+   }
}
 }
 }
-- 
2.41.0



Re: [PATCH V2] RISC-V: Support VECTOR BOOL vcond_mask optab[PR111337]

2023-09-12 Thread Robin Dapp via Gcc-patches
The PR thing needs to be moved but I can commit it.

Regards
 Robin



Re: Re: [PATCH] RISC-V: Support VECTOR BOOL vcond_mask optab[PR111337]

2023-09-12 Thread juzhe.zh...@rivai.ai
Ok add it in V2:

https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630048.html 



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2023-09-12 21:29
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Support VECTOR BOOL vcond_mask optab[PR111337]
Maybe you want to add PR target/111337 to the changelog?
 
The rest LGTM.
 
Regards
Robin
 


[PATCH V2] RISC-V: Support VECTOR BOOL vcond_mask optab[PR111337]

2023-09-12 Thread Juzhe-Zhong
   PR target/111337

As this PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111337

We support VECTOR BOOL vcond_mask to fix this following ICE:
0x1a9e309 gimple_expand_vec_cond_expr
../../../../gcc/gcc/gimple-isel.cc:283
0x1a9ea56 execute
../../../../gcc/gcc/gimple-isel.cc:390

gcc/ChangeLog:

* config/riscv/autovec.md (vcond_mask_): New pattern.

---
 gcc/config/riscv/autovec.md | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index e9dd40af935..50c0104550b 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -565,6 +565,40 @@
   [(set_attr "type" "vector")]
 )
 
+;; -
+;;  [BOOL] Select based on masks
+;; -
+;; Includes merging patterns for:
+;; - vmand.mm
+;; - vmor.mm
+;; - vmnot.m
+;; -
+
+(define_expand "vcond_mask_"
+  [(match_operand:VB 0 "register_operand")
+   (match_operand:VB 1 "register_operand")
+   (match_operand:VB 2 "register_operand")
+   (match_operand:VB 3 "register_operand")]
+  "TARGET_VECTOR"
+  {
+/* mask1 = operands[3] & operands[1].  */
+rtx mask1 = expand_binop (mode, and_optab, operands[1],
+ operands[3], NULL_RTX, 0,
+ OPTAB_DIRECT);
+/* mask2 = ~operands[3] & operands[2].  */
+rtx inverse = expand_unop (mode, one_cmpl_optab, operands[3],
+  NULL_RTX, 0);
+rtx mask2 = expand_binop (mode, and_optab, operands[2],
+ inverse, NULL_RTX, 0,
+ OPTAB_DIRECT);
+/* result = mask1 | mask2.  */
+rtx result = expand_binop (mode, ior_optab, mask1,
+  mask2, NULL_RTX, 0,
+  OPTAB_DIRECT);
+emit_move_insn (operands[0], result);
+DONE;
+  })
+
 ;; -
 ;;  [INT,FP] Comparisons
 ;; -
-- 
2.36.3



Re: [PATCH] RISC-V: Support VECTOR BOOL vcond_mask optab[PR111337]

2023-09-12 Thread Robin Dapp via Gcc-patches
Maybe you want to add PR target/111337 to the changelog?

The rest LGTM.

Regards
 Robin


RE: [PATCH v1] RISC-V: Remove unused structure in cost model

2023-09-12 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Jeff Law  
Sent: Tuesday, September 12, 2023 9:12 PM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: Wang, Yanzhang ; kito.ch...@gmail.com; 
juzhe.zh...@rivai.ai
Subject: Re: [PATCH v1] RISC-V: Remove unused structure in cost model



On 9/12/23 07:02, Pan Li via Gcc-patches wrote:
> From: Pan Li 
> 
> The struct range is unused, remove it.
> 
> gcc/ChangeLog:
> 
>   * config/riscv/riscv-vector-costs.h (struct range): Removed.
OK
jeff


[PATCH] RISC-V: Support VECTOR BOOL vcond_mask optab[PR111337]

2023-09-12 Thread Juzhe-Zhong
As this PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111337

We support VECTOR BOOL vcond_mask to fix this following ICE:
0x1a9e309 gimple_expand_vec_cond_expr
../../../../gcc/gcc/gimple-isel.cc:283
0x1a9ea56 execute
../../../../gcc/gcc/gimple-isel.cc:390

gcc/ChangeLog:

* config/riscv/autovec.md (@vcond_mask_): New pattern.

---
 gcc/config/riscv/autovec.md | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index e9dd40af935..45a70f16ee1 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -565,6 +565,40 @@
   [(set_attr "type" "vector")]
 )
 
+;; -
+;;  [BOOL] Select based on masks
+;; -
+;; Includes merging patterns for:
+;; - vmand.mm
+;; - vmor.mm
+;; - vmnot.m
+;; -
+
+(define_expand "@vcond_mask_"
+  [(match_operand:VB 0 "register_operand")
+   (match_operand:VB 1 "register_operand")
+   (match_operand:VB 2 "register_operand")
+   (match_operand:VB 3 "register_operand")]
+  "TARGET_VECTOR"
+  {
+/* mask1 = operands[3] & operands[1].  */
+rtx mask1 = expand_binop (mode, and_optab, operands[1],
+ operands[3], NULL_RTX, 0,
+ OPTAB_DIRECT);
+/* mask2 = ~operands[3] & operands[2].  */
+rtx inverse = expand_unop (mode, one_cmpl_optab, operands[3],
+  NULL_RTX, 0);
+rtx mask2 = expand_binop (mode, and_optab, operands[2],
+ inverse, NULL_RTX, 0,
+ OPTAB_DIRECT);
+/* result = mask1 | mask2.  */
+rtx result = expand_binop (mode, ior_optab, mask1,
+  mask2, NULL_RTX, 0,
+  OPTAB_DIRECT);
+emit_move_insn (operands[0], result);
+DONE;
+  })
+
 ;; -
 ;;  [INT,FP] Comparisons
 ;; -
-- 
2.36.3



Re: [PATCH 3/3] libstdc++: Fix std::not_fn perfect forwarding [PR111327]

2023-09-12 Thread Jonathan Wakely via Gcc-patches
On Tue, 12 Sept 2023 at 02:11, Patrick Palka via Libstdc++
 wrote:
>
> The previous patch fixed perfect forwarding for std::bind_front.
> This patch fixes the same issue for std::not_fn.
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk and
> perhaps 13?

Yes for both, thanks.

>
> PR libstdc++/111327
>
> libstdc++-v3/ChangeLog:
>
> * include/std/functional (_GLIBCXX_NOT_FN_CALL_OP): Also define
> a deleted fallback operator() overload.  Constrain both the
> main and deleted overloads accordingly.
> * testsuite/20_util/function_objects/not_fn/111327.cc: New test.
> ---
>  libstdc++-v3/include/std/functional   | 10 +--
>  .../20_util/function_objects/not_fn/111327.cc | 29 +++
>  2 files changed, 37 insertions(+), 2 deletions(-)
>  create mode 100644 
> libstdc++-v3/testsuite/20_util/function_objects/not_fn/111327.cc
>
> diff --git a/libstdc++-v3/include/std/functional 
> b/libstdc++-v3/include/std/functional
> index c50b9e4d365..9551e38dfdb 100644
> --- a/libstdc++-v3/include/std/functional
> +++ b/libstdc++-v3/include/std/functional
> @@ -1061,7 +1061,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>// forwarding _M_fn and the function arguments with the same 
> qualifiers,
>// and deducing the return type and exception-specification.
>  #define _GLIBCXX_NOT_FN_CALL_OP( _QUALS )  \
> -  template  \
> +  template +  typename = enable_if_t<__is_invocable<_Fn _QUALS, 
> _Args...>::value>> \
> _GLIBCXX20_CONSTEXPR\
> decltype(_S_not<__inv_res_t<_Fn _QUALS, _Args...>>())   \
> operator()(_Args&&... __args) _QUALS\
> @@ -1070,7 +1071,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> {   \
>   return !std::__invoke(std::forward< _Fn _QUALS >(_M_fn),  \
> std::forward<_Args>(__args)...);\
> -   }
> +   }   \
> +   \
> +  template +  typename = enable_if_t _Args...>::value>> \
> +   void operator()(_Args&&... __args) _QUALS = delete;
> +
>_GLIBCXX_NOT_FN_CALL_OP( & )
>_GLIBCXX_NOT_FN_CALL_OP( const & )
>_GLIBCXX_NOT_FN_CALL_OP( && )
> diff --git a/libstdc++-v3/testsuite/20_util/function_objects/not_fn/111327.cc 
> b/libstdc++-v3/testsuite/20_util/function_objects/not_fn/111327.cc
> new file mode 100644
> index 000..93e00ee8057
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/20_util/function_objects/not_fn/111327.cc
> @@ -0,0 +1,29 @@
> +// PR libstdc++/111327 - std::bind_front (and std::not_fn) doesn't perfectly
> +// forward according to value category of the call wrapper object
> +// { dg-do compile { target c++17 } }
> +
> +#include 
> +#include 
> +
> +struct F {
> +  void operator()(...) & = delete;
> +  bool operator()(...) const &;
> +};
> +
> +struct G {
> +  void operator()(...) && = delete;
> +  bool operator()(...) const &&;
> +};
> +
> +int main() {
> +  auto f = std::not_fn(F{});
> +  f(); // { dg-error "deleted" }
> +  std::move(f)();
> +  std::as_const(f)();
> +  std::move(std::as_const(f))();
> +
> +  auto g = std::not_fn(G{});
> +  g(); // { dg-error "deleted" }
> +  std::move(g)(); // { dg-error "deleted" }
> +  std::move(std::as_const(g))();
> +}
> --
> 2.42.0.158.g94e83dcf5b
>



Re: [PATCH 2/3] libstdc++: Fix std::bind_front perfect forwarding [PR111327]

2023-09-12 Thread Jonathan Wakely via Gcc-patches
On Tue, 12 Sept 2023 at 02:09, Patrick Palka via Libstdc++
 wrote:
>
> In order to properly implement a perfect forwarding call wrapper
> (before 'deducing this' at least) we need a total of 8 operator()
> overloads, 4 main ones and 4 deleted ones for each const/ref qual pair,
> as described in section 5.5 of P0847R6.  Otherwise the wrapper may
> not perfectly forward according to the value category and constness
> of the wrapped object.  This patch fixes this bug in std::bind_front.

OK for trunk, thanks.

>
> PR libstdc++/111327
>
> libstdc++-v3/ChangeLog:
>
> * include/std/functional (_Bind_front::operator()): Add deleted
> fallback overloads for each const/ref qualifier pair.  Give the
> main overloads dummy constraints to make them more specialized
> than the deleted overloads.
> * testsuite/20_util/function_objects/bind_front/111327.cc: New test.
> ---
>  libstdc++-v3/include/std/functional   | 16 
>  .../function_objects/bind_front/111327.cc | 41 +++
>  2 files changed, 57 insertions(+)
>  create mode 100644 
> libstdc++-v3/testsuite/20_util/function_objects/bind_front/111327.cc
>
> diff --git a/libstdc++-v3/include/std/functional 
> b/libstdc++-v3/include/std/functional
> index 7d1b890bb4e..c50b9e4d365 100644
> --- a/libstdc++-v3/include/std/functional
> +++ b/libstdc++-v3/include/std/functional
> @@ -938,6 +938,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>~_Bind_front() = default;
>
>template
> +   requires true
> constexpr
> invoke_result_t<_Fd&, _BoundArgs&..., _CallArgs...>
> operator()(_CallArgs&&... __call_args) &
> @@ -948,6 +949,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> }
>
>template
> +   requires true
> constexpr
> invoke_result_t
> operator()(_CallArgs&&... __call_args) const &
> @@ -959,6 +961,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> }
>
>template
> +   requires true
> constexpr
> invoke_result_t<_Fd, _BoundArgs..., _CallArgs...>
> operator()(_CallArgs&&... __call_args) &&
> @@ -969,6 +972,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> }
>
>template
> +   requires true
> constexpr
> invoke_result_t
> operator()(_CallArgs&&... __call_args) const &&
> @@ -979,6 +983,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   std::forward<_CallArgs>(__call_args)...);
> }
>
> +  template
> +   void operator()(_CallArgs&&...) & = delete;
> +
> +  template
> +   void operator()(_CallArgs&&...) const & = delete;
> +
> +  template
> +   void operator()(_CallArgs&&...) && = delete;
> +
> +  template
> +   void operator()(_CallArgs&&...) const && = delete;
> +
>  private:
>using _BoundIndices = index_sequence_for<_BoundArgs...>;
>
> diff --git 
> a/libstdc++-v3/testsuite/20_util/function_objects/bind_front/111327.cc 
> b/libstdc++-v3/testsuite/20_util/function_objects/bind_front/111327.cc
> new file mode 100644
> index 000..6eb51994476
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/20_util/function_objects/bind_front/111327.cc
> @@ -0,0 +1,41 @@
> +// PR libstdc++/111327 - std::bind_front doesn't perfectly forward according
> +// to value category of the call wrapper object
> +// { dg-options "-std=gnu++20" }
> +// { dg-do compile { target c++20 } }
> +
> +#include 
> +#include 
> +
> +struct F {
> +  void operator()(...) & = delete;
> +  void operator()(...) const &;
> +};
> +
> +struct G {
> +  void operator()(...) && = delete;
> +  void operator()(...) const &&;
> +};
> +
> +int main() {
> +  auto f0 = std::bind_front(F{});
> +  f0(); // { dg-error "deleted" }
> +  std::move(f0)();
> +  std::as_const(f0)();
> +  std::move(std::as_const(f0))();
> +
> +  auto g0 = std::bind_front(G{});
> +  g0(); // { dg-error "deleted" }
> +  std::move(g0)(); // { dg-error "deleted" }
> +  std::move(std::as_const(g0))();
> +
> +  auto f1 = std::bind_front(F{}, 42);
> +  f1(); // { dg-error "deleted" }
> +  std::move(f1)();
> +  std::as_const(f1)();
> +  std::move(std::as_const(f1))();
> +
> +  auto g1 = std::bind_front(G{}, 42);
> +  g1(); // { dg-error "deleted" }
> +  std::move(g1)(); // { dg-error "deleted" }
> +  std::move(std::as_const(g1))();
> +}
> --
> 2.42.0.158.g94e83dcf5b
>



Re: [PATCH 1/3] libstdc++: Remove std::bind_front specialization for no bound args

2023-09-12 Thread Jonathan Wakely via Gcc-patches
On Tue, 12 Sept 2023 at 13:46, Patrick Palka via Libstdc++
 wrote:
>
> On Mon, 11 Sep 2023, Patrick Palka wrote:
>
> > This specialization for the case of no bound args, added by
> > r13-4214-gcbd05ca5ab1231, seems to be mostly obsoleted by
> > r13-5033-ge2eab3c4edb6aa which added [[no_unique_address]] to the
> > main template's data members.  And the compile time advantage of
> > avoiding an empty tuple and index_sequence seems minimal.  Removing this
> > specialization also means we don't have to fix the PR111327 bug in
> > another place.
>
> FWIW I don't feel strongly about removing this specialization.  If we
> keep it We'd at least be able to reuse it for std::bind_back, and it
> wouldn't be hard to fix the PR111327 bug in its implementation.

Yeah, I'm ambivalent. But since you've got a patch to fix 111327 ready
which doesn't include this specialization, let's remove it.

The empty std::tuple is at least already explicitly specialized, so I
agree its overhead probably isn't very significant.

OK for trunk. I'm not sure if we should change it in gcc-13 now though.

>
> >
> >   PR libstdc++/111327
> >
> > libstdc++-v3/ChangeLog:
> >
> >   * include/std/functional (_Bind_front0): Remove.
> >   (_Bind_front_t): Adjust.
> > ---
> >  libstdc++-v3/include/std/functional | 63 +
> >  1 file changed, 1 insertion(+), 62 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/std/functional 
> > b/libstdc++-v3/include/std/functional
> > index 60d4d1f3dd2..7d1b890bb4e 100644
> > --- a/libstdc++-v3/include/std/functional
> > +++ b/libstdc++-v3/include/std/functional
> > @@ -996,69 +996,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >[[no_unique_address]] std::tuple<_BoundArgs...> _M_bound_args;
> >  };
> >
> > -  // Avoid the overhead of an empty tuple<> if there are no bound args.
> > -  template
> > -struct _Bind_front0
> > -{
> > -  static_assert(is_move_constructible_v<_Fd>);
> > -
> > -  // First parameter is to ensure this constructor is never used
> > -  // instead of the copy/move constructor.
> > -  template
> > - explicit constexpr
> > - _Bind_front0(int, _Fn&& __fn)
> > - noexcept(is_nothrow_constructible_v<_Fd, _Fn>)
> > - : _M_fd(std::forward<_Fn>(__fn))
> > - { }
> > -
> > -  _Bind_front0(const _Bind_front0&) = default;
> > -  _Bind_front0(_Bind_front0&&) = default;
> > -  _Bind_front0& operator=(const _Bind_front0&) = default;
> > -  _Bind_front0& operator=(_Bind_front0&&) = default;
> > -  ~_Bind_front0() = default;
> > -
> > -  template
> > - constexpr
> > - invoke_result_t<_Fd&, _CallArgs...>
> > - operator()(_CallArgs&&... __call_args) &
> > - noexcept(is_nothrow_invocable_v<_Fd&, _CallArgs...>)
> > - { return std::invoke(_M_fd, std::forward<_CallArgs>(__call_args)...); 
> > }
> > -
> > -  template
> > - constexpr
> > - invoke_result_t
> > - operator()(_CallArgs&&... __call_args) const &
> > - noexcept(is_nothrow_invocable_v)
> > - { return std::invoke(_M_fd, std::forward<_CallArgs>(__call_args)...); 
> > }
> > -
> > -  template
> > - constexpr
> > - invoke_result_t<_Fd, _CallArgs...>
> > - operator()(_CallArgs&&... __call_args) &&
> > - noexcept(is_nothrow_invocable_v<_Fd, _CallArgs...>)
> > - {
> > -   return std::invoke(std::move(_M_fd),
> > -  std::forward<_CallArgs>(__call_args)...);
> > - }
> > -
> > -  template
> > - constexpr
> > - invoke_result_t
> > - operator()(_CallArgs&&... __call_args) const &&
> > - noexcept(is_nothrow_invocable_v)
> > - {
> > -   return std::invoke(std::move(_M_fd),
> > -  std::forward<_CallArgs>(__call_args)...);
> > - }
> > -
> > -private:
> > -  [[no_unique_address]] _Fd _M_fd;
> > -};
> > -
> >template
> > -using _Bind_front_t
> > -  = __conditional_t>,
> > - _Bind_front, decay_t<_Args>...>>;
> > +using _Bind_front_t = _Bind_front, decay_t<_Args>...>;
> >
> >/** Create call wrapper by partial application of arguments to function.
> > *
> > --
> > 2.42.0.158.g94e83dcf5b
> >
> >
>



Re: [PATCH v1] RISC-V: Remove unused structure in cost model

2023-09-12 Thread Jeff Law via Gcc-patches




On 9/12/23 07:02, Pan Li via Gcc-patches wrote:

From: Pan Li 

The struct range is unused, remove it.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.h (struct range): Removed.

OK
jeff


Re: [PATCH 00/13] libstdc++: Add support for running tests with multiple -std options

2023-09-12 Thread Jonathan Wakely via Gcc-patches
On Mon, 11 Sept 2023 at 17:37, Jonathan Wakely via Libstdc++
 wrote:
>
> This patch series replicates the behaviour of the g++ testsuite, so that
> libstdc++ tests can easily be run for multiple different -std options in
> a single testsuite run.  As described in the updated docs, the -std
> options to use for every test can be overridden by setting v3_std_list
> in ~/.dejagnurc or $DEJAGNU, or setting $GLIBCXX_TESTSUITE_STDS in the
> environment.  If not overridden, the default is just to run with
> -std=gnu++17 (so that we don't increase the time taken for a full
> testsuite run).
>
> Tests that require a newer standard than C++17 will default to that
> newer standard and C++26, so e.g. std::format tests will be run with
> both -std=gnu++20 and -std=gnu++26.  This does increase the number of
> tests, but only for the subset of tests for C++20/23/26 features.  If
> this is too costly for testers, we can change that (this might be
> needed, because the C++20 tests for std::ranges and std::format are
> particularly slow to compile).
>
> Because a correct default will be chosen for tests that require
> something newer than C++17, we no longer need dg-options "-std=gnu++20"
> or similar in any tests.  Removing the explicit -std option allows the
> test to be run for later standards via the v3_std_list settings, so that
> we can verify that C++20 features still work in C++23 and C++26, for
> example.  This change already found some tests which failed when run
> with a later standard (see r14-3771-gf12e26f3496275).
>
> Patches 2-13 in the series remove those unnecessary dg-options from
> about half the relevant tests, but there are more than 500 others that
> still need adjusting.
>
> We can remove files like testsuite/std/format/functions/format_c++23.cc
> which only exist to duplicate existing tests with a different -std
> option.  We can remove that file now, and rely on format.cc being run
> with multiple -std options by libstdc++ maintainers.
>
> It might also be useful to add a 'make check-quick' target which runs a
> small subset of smoke tests with every standard version in v3_std_list.
> This would be a suitable target for CI bots and for packagers who want
> to verify that a build of GCC is functional, without running the entire
> libstdc++ testsuite.

There's a problem with this change. Some of our tests fail if they're
run more than once.

We have some static data files which are copied into the test
directory by libstdc++_init at the start of the run. But some tests
modify those files, so if the same test gets run multiple times, the
file is no longer in the expected state after the first test.

This only shows up when overriding the list of -std modes to include
more than one option. The tests pass on the first run, and fail for
subsequent ones:

Running /home/test/src/gcc/libstdc++-v3/testsuite/libstdc++-dg/conformance.exp
...
PASS: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++98 (test for
excess errors)
PASS: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++98 execution test
PASS: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++11 (test for
excess errors)
FAIL: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++11 execution test
PASS: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++14 (test for
excess errors)
FAIL: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++14 execution test
PASS: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++17 (test for
excess errors)
FAIL: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++17 execution test
PASS: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++20 (test for
excess errors)
FAIL: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++20 execution test
PASS: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++23 (test for
excess errors)
FAIL: 27_io/basic_filebuf/seekoff/char/1-io.cc  -std=gnu++23 execution test

We either need to copy the data files again after each test, or
rewrite the tests to be idempotent.



[PATCH v1] RISC-V: Remove unused structure in cost model

2023-09-12 Thread Pan Li via Gcc-patches
From: Pan Li 

The struct range is unused, remove it.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.h (struct range): Removed.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-vector-costs.h | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.h 
b/gcc/config/riscv/riscv-vector-costs.h
index 7f120b79619..7b5814a4cff 100644
--- a/gcc/config/riscv/riscv-vector-costs.h
+++ b/gcc/config/riscv/riscv-vector-costs.h
@@ -40,13 +40,6 @@ struct autovec_info
   bool end_p;
 };
 
-struct range
-{
-  unsigned int pt;
-  bool start;
-  unsigned int nregs;
-};
-
 /* rvv-specific vector costs.  */
 class costs : public vector_costs
 {
-- 
2.34.1



Re: [PATCH 1/3] libstdc++: Remove std::bind_front specialization for no bound args

2023-09-12 Thread Patrick Palka via Gcc-patches
On Mon, 11 Sep 2023, Patrick Palka wrote:

> This specialization for the case of no bound args, added by
> r13-4214-gcbd05ca5ab1231, seems to be mostly obsoleted by
> r13-5033-ge2eab3c4edb6aa which added [[no_unique_address]] to the
> main template's data members.  And the compile time advantage of
> avoiding an empty tuple and index_sequence seems minimal.  Removing this
> specialization also means we don't have to fix the PR111327 bug in
> another place.

FWIW I don't feel strongly about removing this specialization.  If we
keep it We'd at least be able to reuse it for std::bind_back, and it
wouldn't be hard to fix the PR111327 bug in its implementation.

> 
>   PR libstdc++/111327
> 
> libstdc++-v3/ChangeLog:
> 
>   * include/std/functional (_Bind_front0): Remove.
>   (_Bind_front_t): Adjust.
> ---
>  libstdc++-v3/include/std/functional | 63 +
>  1 file changed, 1 insertion(+), 62 deletions(-)
> 
> diff --git a/libstdc++-v3/include/std/functional 
> b/libstdc++-v3/include/std/functional
> index 60d4d1f3dd2..7d1b890bb4e 100644
> --- a/libstdc++-v3/include/std/functional
> +++ b/libstdc++-v3/include/std/functional
> @@ -996,69 +996,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>[[no_unique_address]] std::tuple<_BoundArgs...> _M_bound_args;
>  };
>  
> -  // Avoid the overhead of an empty tuple<> if there are no bound args.
> -  template
> -struct _Bind_front0
> -{
> -  static_assert(is_move_constructible_v<_Fd>);
> -
> -  // First parameter is to ensure this constructor is never used
> -  // instead of the copy/move constructor.
> -  template
> - explicit constexpr
> - _Bind_front0(int, _Fn&& __fn)
> - noexcept(is_nothrow_constructible_v<_Fd, _Fn>)
> - : _M_fd(std::forward<_Fn>(__fn))
> - { }
> -
> -  _Bind_front0(const _Bind_front0&) = default;
> -  _Bind_front0(_Bind_front0&&) = default;
> -  _Bind_front0& operator=(const _Bind_front0&) = default;
> -  _Bind_front0& operator=(_Bind_front0&&) = default;
> -  ~_Bind_front0() = default;
> -
> -  template
> - constexpr
> - invoke_result_t<_Fd&, _CallArgs...>
> - operator()(_CallArgs&&... __call_args) &
> - noexcept(is_nothrow_invocable_v<_Fd&, _CallArgs...>)
> - { return std::invoke(_M_fd, std::forward<_CallArgs>(__call_args)...); }
> -
> -  template
> - constexpr
> - invoke_result_t
> - operator()(_CallArgs&&... __call_args) const &
> - noexcept(is_nothrow_invocable_v)
> - { return std::invoke(_M_fd, std::forward<_CallArgs>(__call_args)...); }
> -
> -  template
> - constexpr
> - invoke_result_t<_Fd, _CallArgs...>
> - operator()(_CallArgs&&... __call_args) &&
> - noexcept(is_nothrow_invocable_v<_Fd, _CallArgs...>)
> - {
> -   return std::invoke(std::move(_M_fd),
> -  std::forward<_CallArgs>(__call_args)...);
> - }
> -
> -  template
> - constexpr
> - invoke_result_t
> - operator()(_CallArgs&&... __call_args) const &&
> - noexcept(is_nothrow_invocable_v)
> - {
> -   return std::invoke(std::move(_M_fd),
> -  std::forward<_CallArgs>(__call_args)...);
> - }
> -
> -private:
> -  [[no_unique_address]] _Fd _M_fd;
> -};
> -
>template
> -using _Bind_front_t
> -  = __conditional_t>,
> - _Bind_front, decay_t<_Args>...>>;
> +using _Bind_front_t = _Bind_front, decay_t<_Args>...>;
>  
>/** Create call wrapper by partial application of arguments to function.
> *
> -- 
> 2.42.0.158.g94e83dcf5b
> 
> 



[committed] contrib: Quote variable in test expression [PR111360]

2023-09-12 Thread Jonathan Wakely via Gcc-patches
Committed as obvious.

-- >8 --

Without the quotes some shells will always return true and some will
print an error. It should be quoted so that a null variable works as
intended.

contrib/ChangeLog:

PR other/111360
* gcc_update: Quote variable.
---
 contrib/gcc_update | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/gcc_update b/contrib/gcc_update
index 1d7bfab4935..cda2bdb0df9 100755
--- a/contrib/gcc_update
+++ b/contrib/gcc_update
@@ -343,7 +343,7 @@ case $vcs_type in
revision=`$GCC_GIT log -n1 --pretty=tformat:%h`
r=`$GCC_GIT describe --all --match 'basepoints/gcc-[0-9]*' HEAD \
   | sed -n 
's,^\(tags/\)\?basepoints/gcc-\([0-9]\+\)-\([0-9]\+\)-g[0-9a-f]*$,r\2-\3,p;s,^\(tags/\)\?basepoints/gcc-\([0-9]\+\)$,r\2-0,p'`;
-   if test -n $r; then
+   if test -n "$r"; then
o=`$GCC_GIT config --get gcc-config.upstream`;
rr=`echo $r | sed -n 
's,^r\([0-9]\+\)-[0-9]\+\(-g[0-9a-f]\+\)\?$,\1,p'`;
if $GCC_GIT rev-parse --verify --quiet 
${o:-origin}/releases/gcc-$rr >/dev/null; then
-- 
2.41.0



[committed] libstdc++: Format Python code according to PEP8

2023-09-12 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

These files were filtered through autopep8 to reformat them more
conventionally.

libstdc++-v3/ChangeLog:

* python/libstdcxx/v6/printers.py: Reformat.
* python/libstdcxx/v6/xmethods.py: Likewise.
---
 libstdc++-v3/python/libstdcxx/v6/printers.py | 651 +++
 libstdc++-v3/python/libstdcxx/v6/xmethods.py |  58 +-
 2 files changed, 446 insertions(+), 263 deletions(-)

diff --git a/libstdc++-v3/python/libstdcxx/v6/printers.py 
b/libstdc++-v3/python/libstdcxx/v6/printers.py
index 37a447b514b..c0056de2565 100644
--- a/libstdc++-v3/python/libstdcxx/v6/printers.py
+++ b/libstdc++-v3/python/libstdcxx/v6/printers.py
@@ -18,10 +18,12 @@
 import gdb
 import itertools
 import re
-import sys, os, errno
+import sys
+import os
+import errno
 import datetime
 
-### Python 2 + Python 3 compatibility code
+# Python 2 + Python 3 compatibility code
 
 # Resources about compatibility:
 #
@@ -38,7 +40,7 @@ import datetime
 # 
 
 if sys.version_info[0] > 2:
-### Python 3 stuff
+# Python 3 stuff
 Iterator = object
 # Python 3 folds these into the normal functions.
 imap = map
@@ -47,7 +49,7 @@ if sys.version_info[0] > 2:
 long = int
 _utc_timezone = datetime.timezone.utc
 else:
-### Python 2 stuff
+# Python 2 stuff
 class Iterator:
 """Compatibility mixin for iterators
 
@@ -98,6 +100,8 @@ except ImportError:
 # Starting with the type ORIG, search for the member type NAME.  This
 # handles searching upward through superclasses.  This is needed to
 # work around http://sourceware.org/bugzilla/show_bug.cgi?id=13615.
+
+
 def find_type(orig, name):
 typ = orig.strip_typedefs()
 while True:
@@ -116,8 +120,10 @@ def find_type(orig, name):
 else:
 raise ValueError("Cannot find type %s::%s" % (str(orig), name))
 
+
 _versioned_namespace = '__8::'
 
+
 def lookup_templ_spec(templ, *args):
 """
 Lookup template specialization templ
@@ -139,6 +145,8 @@ def lookup_templ_spec(templ, *args):
 
 # Use this to find container node types instead of find_type,
 # see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91997 for details.
+
+
 def lookup_node_type(nodename, containertype):
 """
 Lookup specialization of template NODENAME corresponding to CONTAINERTYPE.
@@ -168,6 +176,7 @@ def lookup_node_type(nodename, containertype):
 pass
 return None
 
+
 def is_member_of_namespace(typ, *namespaces):
 """
 Test whether a type is a member of one of the specified namespaces.
@@ -181,6 +190,7 @@ def is_member_of_namespace(typ, *namespaces):
 return True
 return False
 
+
 def is_specialization_of(x, template_name):
 """
 Test whether a type is a specialization of the named class template.
@@ -195,12 +205,14 @@ def is_specialization_of(x, template_name):
 return re.match('^std::(%s)?%s<.*>$' % (_versioned_namespace, 
template_name), x) is not None
 return re.match('^std::%s<.*>$' % template_name, x) is not None
 
+
 def strip_versioned_namespace(typename):
 global _versioned_namespace
 if _versioned_namespace:
 return typename.replace(_versioned_namespace, '')
 return typename
 
+
 def strip_inline_namespaces(type_str):
 "Remove known inline namespaces from the canonical name of a type."
 type_str = strip_versioned_namespace(type_str)
@@ -212,6 +224,7 @@ def strip_inline_namespaces(type_str):
 type_str = type_str.replace(fs_ns+'v1::', fs_ns)
 return type_str
 
+
 def get_template_arg_list(type_obj):
 "Return a type's template arguments as a list"
 n = 0
@@ -223,6 +236,7 @@ def get_template_arg_list(type_obj):
 return template_args
 n += 1
 
+
 class SmartPtrIterator(Iterator):
 "An iterator for smart pointer types with a single 'child' value"
 
@@ -238,28 +252,29 @@ class SmartPtrIterator(Iterator):
 self.val, val = None, self.val
 return ('get()', val)
 
+
 class SharedPointerPrinter:
 "Print a shared_ptr, weak_ptr, atomic, or atomic"
 
-def __init__ (self, typename, val):
+def __init__(self, typename, val):
 self.typename = strip_versioned_namespace(typename)
 self.val = val
 self.pointer = val['_M_ptr']
 
-def children (self):
+def children(self):
 return SmartPtrIterator(self.pointer)
 
 # Return the _Sp_counted_base<>* that holds the refcounts.
-def _get_refcounts (self):
+def _get_refcounts(self):
 if self.typename == 'std::atomic':
 # A tagged pointer is stored as uintptr_t.
 ptr_val = self.val['_M_refcount']['_M_val']['_M_i']
-ptr_val = ptr_val - (ptr_val % 2) # clear lock bit
+ptr_val = ptr_val - (ptr_val % 2)  # clear lock bit
 ptr_type = find_type(self.val['_M_refcount'].type, 'pointer')
 return ptr_val.cast(ptr_type)
 

RE: [PATCH V5] RISC-V: Support Dynamic LMUL Cost model

2023-09-12 Thread Li, Pan2 via Gcc-patches
Committed, thanks Robin.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Robin Dapp via Gcc-patches
Sent: Tuesday, September 12, 2023 7:07 PM
To: Juzhe-Zhong ; gcc-patches@gcc.gnu.org
Cc: kito.ch...@sifive.com; kito.ch...@gmail.com
Subject: Re: [PATCH V5] RISC-V: Support Dynamic LMUL Cost model

LGTM.  We should just keep in mind the restrictions discussed in the
other thread.

Regards
 Robin


Re: [PATCH] small _BitInt tweaks

2023-09-12 Thread Jakub Jelinek via Gcc-patches
On Tue, Sep 12, 2023 at 10:27:18AM +, Richard Biener wrote:
> On Mon, 11 Sep 2023, Jakub Jelinek wrote:
> > And, also I think it is undesirable when being asked for signed_type_for
> > of unsigned _BitInt(1) (which is valid) to get signed _BitInt(1) (which is
> > invalid, the standard only allows signed _BitInt(2) and larger), so the
> > patch returns 1-bit signed INTEGER_TYPE for those cases.
> 
> I think the last bit is a bit surprising - do the frontends use
> signed_or_unsigned_type_for and would they be confused if getting
> back an INTEGER_TYPE here?

I see a single c-family/c-pretty-print.cc use of signed_or_unsigned_type_for
and none of signed_type_for in the C/C++ FEs (unsigned_type_for is used in a
couple of spots, but that isn't affected), c_common_signed_type
or c_common_signed_or_unsigned_type is used more than that, but I still
think it is mostly used for warning stuff and similar or when called with
some specific types like sizetype.  I don't think the FE uses (or should
use) those functions to decide e.g. on types of expressions etc., that is
what common_type etc. are for.
And, for the very small precisions the distinction between BITINT_TYPE and
INTEGER_TYPE should be limited to just loads/stores from memory (in case
there are different rules for what to do with padding bits in those cases if
any) and on function arguments/return values, I think none of this is really
affected by those signed_type_for/c_common_signed_type results.

And by ensuring we never create 1-bit signed BITINT_TYPE e.g. the backends
don't need to worry about them.

But I admit I don't feel strongly about that.

Joseph, what do you think about this?

Jakub



libgo: Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing (instead of build-time 'CC' etc.) [PR109951] (was: [PATCH 3/4] libgo/test: Fix compilation for build sysroot)

2023-09-12 Thread Thomas Schwinge
Hi!

On 2019-11-11T18:12:44+, "Maciej W. Rozycki"  wrote:
> Fix a problem with the libgo testsuite using a method to determine the
> compiler to use resulting in the tool being different from one the
> library has been built with, and causing a catastrophic failure from the
> lack of a suitable `--sysroot=' option where the `--with-build-sysroot='
> configuration option has been used to build the compiler resulting in
> the inability to link executables.
>
> Address this problem by providing a DejaGNU configuration file defining
> the compiler to use, via the GOC_UNDER_TEST TCL variable, set from $GOC
> by autoconf, which will have all the required options set for the target
> compiler to build executables in the environment configured

As we've found, this is conceptually problematic, as discussed in

"Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing 
(instead of build-time 'CC' etc.)
[PR109951]".
I therefore suggest to apply to libgo the conceptually same changes
as I've just pushed for libgomp:

"libgomp: Consider '--with-build-sysroot=[...]' for target libraries' 
build-tree testing (instead of build-time 'CC'
etc.) [PR91884, PR109951]".
OK to push (via Ian/Go upstream) the attached
"libgo: Consider '--with-build-sysroot=[...]' for target libraries' build-tree 
testing (instead of build-time 'CC' etc.) [PR109951]"?

By the way, I've tested this one via hard-coding
'libgo/configure.ac:USE_DEJAGNU' to 'yes', and observing that my
"quick hack to replicate the original requirement"
('internal_error ("MISSING SYSROOT");') no longer triggers.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 81a73112e3d0b43c240c7c9040c24d68c2739bf3 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Mon, 11 Sep 2023 16:55:24 +0200
Subject: [PATCH] libgo: Consider '--with-build-sysroot=[...]' for target
 libraries' build-tree testing (instead of build-time 'CC' etc.) [PR109951]

Similar to commit fb5d27be272b71fb9026224535fc73f125ce3be7
"libgomp: Consider '--with-build-sysroot=[...]' for target libraries' build-tree testing (instead of build-time 'CC' etc.) [PR91884, PR109951]",
this is commit b72813a68c943643a6241418f27aa8b9d4614647
"libgo: fix DejaGNU testsuite compiler when using build sysroot" done
differently, avoiding build-tree testing use of any random gunk that may
appear in build-time 'GOC'.

	PR testsuite/109951
	libgo/
	* configure.ac: 'AC_SUBST(SYSROOT_CFLAGS_FOR_TARGET)'.
	* Makefile.in: Regenerate.
	* configure: Likewise.
	* testsuite/Makefile.in: Likewise.
	* testsuite/lib/libgo.exp (libgo_init): If
	'--with-build-sysroot=[...]' was specified, use it for build-tree
	testing.
	* testsuite/libgo-test-support.exp.in (GOC_UNDER_TEST): Don't set.
	(SYSROOT_CFLAGS_FOR_TARGET): Set.
---
 libgo/Makefile.in | 1 +
 libgo/configure   | 7 +--
 libgo/configure.ac| 2 ++
 libgo/testsuite/Makefile.in   | 1 +
 libgo/testsuite/lib/libgo.exp | 8 
 libgo/testsuite/libgo-test-support.exp.in | 2 +-
 6 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/libgo/Makefile.in b/libgo/Makefile.in
index 40340bfb7a5..8dcb6d6a354 100644
--- a/libgo/Makefile.in
+++ b/libgo/Makefile.in
@@ -474,6 +474,7 @@ SPLIT_STACK = @SPLIT_STACK@
 STRINGOPS_FLAG = @STRINGOPS_FLAG@
 STRIP = @STRIP@
 STRUCT_EPOLL_EVENT_FD_OFFSET = @STRUCT_EPOLL_EVENT_FD_OFFSET@
+SYSROOT_CFLAGS_FOR_TARGET = @SYSROOT_CFLAGS_FOR_TARGET@
 USE_DEJAGNU = @USE_DEJAGNU@
 VERSION = @VERSION@
 WARN_FLAGS = @WARN_FLAGS@
diff --git a/libgo/configure b/libgo/configure
index a607dbff68e..2f1609b42b5 100755
--- a/libgo/configure
+++ b/libgo/configure
@@ -633,6 +633,7 @@ ac_subst_vars='am__EXEEXT_FALSE
 am__EXEEXT_TRUE
 LTLIBOBJS
 LIBOBJS
+SYSROOT_CFLAGS_FOR_TARGET
 HAVE_STATIC_LINK_FALSE
 HAVE_STATIC_LINK_TRUE
 HAVE_STAT_TIMESPEC_FALSE
@@ -11544,7 +11545,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11547 "configure"
+#line 11548 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11650,7 +11651,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11653 "configure"
+#line 11654 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -16147,6 +16148,8 @@ else
 fi
 
 
+
+
 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
 # tests run on this system so they can be shared between configure
diff --git a/libgo/configure.ac b/libgo/configure.ac
index 

  1   2   >