Re: [PATCH v3 4/9] LoongArch:Added support for SX vector floating-point instructions.

2023-09-10 Thread Xi Ruoyao via Gcc-patches
The subject should be "Add tests for SX vector floating-point
instructions".  The "support" has already been added.

Likewise for patches 5-9.


-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH v3 7/9] LoongArch:Add vector arithmetic addition vsadd instruction.

2023-09-10 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c: New test.
---
 .../loongarch/vector/lsx/lsx-vsadd-1.c| 335 +
 .../loongarch/vector/lsx/lsx-vsadd-2.c| 345 ++
 2 files changed, 680 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
new file mode 100644
index 000..1bc27c983bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
@@ -0,0 +1,335 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include 
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0xfefefefefefefefe;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x3c992b2e;
+  *((unsigned long *)&__m128i_op1[0]) = 0x730f;
+  *((unsigned long *)&__m128i_result[1]) = 0x3c992b2e;
+  *((unsigned long *)&__m128i_result[0]) = 0x730f;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x;
+  *((unsigned long *)&__m128i_result[0]) = 0x;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x7fff7fff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x2bfd9461;
+  *((unsigned long *)&__m128i_result[1]) = 0x7fff7fff;
+  *((unsigned long *)&__m128i_result[0]) = 0x2bfd9461;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00d3012acc56f9bb;
+  *((unsigned long *)&__m128i_op0[0]) = 0x1021;
+  *((unsigned long *)&__m128i_op1[1]) = 0x;
+  *((unsigned long *)&__m128i_op1[0]) = 0x;
+  *((unsigned long *)&__m128i_result[1]) = 0x00d3012acc56f9bb;
+  *((unsigned long *)&__m128i_result[0]) = 0x1021;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x1000;
+  *((unsigned long *)&__m128i_op0[0]) = 

[PATCH v3 3/9] LoongArch: Add tests for Loongson SX builtin functions.

2023-09-10 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lsx/lsx-builtin.c: New test.
---
 .../loongarch/vector/lsx/lsx-builtin.c| 5038 +
 1 file changed, 5038 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c
new file mode 100644
index 000..dcc8f9211bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c
@@ -0,0 +1,5038 @@
+/* Test builtins for LOONGARCH LSX ASE instructions */
+/* { dg-do compile } */
+/* { dg-options "-mlsx" } */
+/* { dg-final { scan-assembler-times "lsx_vsll_b:.*vsll\\.b.*lsx_vsll_b" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsll_h:.*vsll\\.h.*lsx_vsll_h" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsll_w:.*vsll\\.w.*lsx_vsll_w" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsll_d:.*vsll\\.d.*lsx_vsll_d" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vslli_b:.*vslli\\.b.*lsx_vslli_b" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vslli_h:.*vslli\\.h.*lsx_vslli_h" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vslli_w:.*vslli\\.w.*lsx_vslli_w" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vslli_d:.*vslli\\.d.*lsx_vslli_d" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsra_b:.*vsra\\.b.*lsx_vsra_b" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsra_h:.*vsra\\.h.*lsx_vsra_h" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsra_w:.*vsra\\.w.*lsx_vsra_w" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsra_d:.*vsra\\.d.*lsx_vsra_d" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsrai_b:.*vsrai\\.b.*lsx_vsrai_b" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrai_h:.*vsrai\\.h.*lsx_vsrai_h" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrai_w:.*vsrai\\.w.*lsx_vsrai_w" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrai_d:.*vsrai\\.d.*lsx_vsrai_d" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrar_b:.*vsrar\\.b.*lsx_vsrar_b" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrar_h:.*vsrar\\.h.*lsx_vsrar_h" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrar_w:.*vsrar\\.w.*lsx_vsrar_w" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrar_d:.*vsrar\\.d.*lsx_vsrar_d" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrari_b:.*vsrari\\.b.*lsx_vsrari_b"
+ * 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrari_h:.*vsrari\\.h.*lsx_vsrari_h"
+ * 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrari_w:.*vsrari\\.w.*lsx_vsrari_w"
+ * 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrari_d:.*vsrari\\.d.*lsx_vsrari_d"
+ * 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrl_b:.*vsrl\\.b.*lsx_vsrl_b" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsrl_h:.*vsrl\\.h.*lsx_vsrl_h" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsrl_w:.*vsrl\\.w.*lsx_vsrl_w" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsrl_d:.*vsrl\\.d.*lsx_vsrl_d" 1 } }
+ */
+/* { dg-final { scan-assembler-times "lsx_vsrli_b:.*vsrli\\.b.*lsx_vsrli_b" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrli_h:.*vsrli\\.h.*lsx_vsrli_h" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrli_w:.*vsrli\\.w.*lsx_vsrli_w" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrli_d:.*vsrli\\.d.*lsx_vsrli_d" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrlr_b:.*vsrlr\\.b.*lsx_vsrlr_b" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrlr_h:.*vsrlr\\.h.*lsx_vsrlr_h" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrlr_w:.*vsrlr\\.w.*lsx_vsrlr_w" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrlr_d:.*vsrlr\\.d.*lsx_vsrlr_d" 1 }
+ * } */
+/* { dg-final { scan-assembler-times "lsx_vsrlri_b:.*vsrlri\\.b.*lsx_vsrlri_b"
+ * 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrlri_h:.*vsrlri\\.h.*lsx_vsrlri_h"
+ * 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrlri_w:.*vsrlri\\.w.*lsx_vsrlri_w"
+ * 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vsrlri_d:.*vsrlri\\.d.*lsx_vsrlri_d"
+ * 1 } } */
+/* { dg-final { scan-assembler-times
+ * "lsx_vbitclr_b:.*vbitclr\\.b.*lsx_vbitclr_b" 1 } } */
+/* { dg-final { scan-assembler-times
+ * "lsx_vbitclr_h:.*vbitclr\\.h.*lsx_vbitclr_h" 1 } } */
+/* { dg-final { scan-assembler-times
+ * "lsx_vbitclr_w:.*vbitclr\\.w.*lsx_vbitclr_w" 1 } } */
+/* { dg-final { scan-assembler-times
+ * "lsx_vbitclr_d:.*vbitclr\\.d.*lsx_vbitclr_d" 1 } } */
+/* { dg-final { scan-assembler-times
+ * "lsx_vbitclri_b:.*vbitclri\\.b.*lsx_vbitclri_b" 1 } } */
+/* { dg-final { scan-assembler-times
+ * "lsx_vbitclri_h:.*vbitclri\\.h.*lsx_vbitclri_h" 1 } } */
+/* { dg-final { scan-assembler-times
+ * "lsx_vbitclri_w:.*vbitclri\\.w.*lsx_vbitclri_w" 1 } } */
+/* { dg-final { scan-assembler-times
+ * 

[PATCH v3 1/9] LoongArch: Add tests of -mstrict-align option.

2023-09-10 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/strict-align.c: New test.
---
 gcc/testsuite/gcc.target/loongarch/strict-align.c | 12 
 1 file changed, 12 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/strict-align.c

diff --git a/gcc/testsuite/gcc.target/loongarch/strict-align.c 
b/gcc/testsuite/gcc.target/loongarch/strict-align.c
new file mode 100644
index 000..040d849584b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/strict-align.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mstrict-align -mlasx" } */
+/* { dg-final { scan-assembler-not "vfadd.s" } } */
+
+void
+foo (float *restrict x, float *restrict y)
+{
+  x[0] = x[0] + y[0];
+  x[1] = x[1] + y[1];
+  x[2] = x[2] + y[2];
+  x[3] = x[3] + y[3];
+}
-- 
2.20.1



[PATCH v3 2/9] LoongArch: Add testsuite framework for Loongson SX/ASX.

2023-09-10 Thread Xiaolong Chen
gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/loongarch-vector.exp: New test.
* gcc.target/loongarch/vector/simd_correctness_check.h: New test.
---
 .../loongarch/vector/loongarch-vector.exp | 42 +++
 .../loongarch/vector/simd_correctness_check.h | 54 +++
 2 files changed, 96 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp 
b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
new file mode 100644
index 000..f33bad82cb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
@@ -0,0 +1,42 @@
+#Copyright(C) 2021 - 2023 Free Software Foundation, Inc.
+
+#This program is free software; you can redistribute it and / or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3 of the License, or
+#(at your option) any later version.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
+#GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+#along with GCC; see the file COPYING3.If not see
+# .
+
+#GCC testsuite that uses the `dg.exp' driver.
+
+#Exit immediately if this isn't a LoongArch target.
+if ![istarget loongarch*-*-*] then {
+return
+}
+
+#Load support procs.
+load_lib gcc-dg.exp
+
+#If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+set DEFAULT_CFLAGS " -mlasx"
+}
+
+#Initialize `dg'.
+dg-init
+
+#Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \
+   "" $DEFAULT_CFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \
+   "" $DEFAULT_CFLAGS
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h 
b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
new file mode 100644
index 000..eb7fbd59cc7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
@@ -0,0 +1,54 @@
+#include 
+#include 
+#include 
+
+#define ASSERTEQ_64(line, ref, res)   \
+  do  \
+{ \
+  int fail = 0;   \
+  for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
+{ \
+  long *temp_ref = [i], *temp_res = [i];  \
+  if (abs (*temp_ref - *temp_res) > 0)\
+{ \
+  printf (" error: %s at line %ld , expected " #ref   \
+  "[%ld]:0x%lx, got: 0x%lx\n",\
+  __FILE__, line, i, *temp_ref, *temp_res);   \
+  fail = 1;   \
+} \
+} \
+  if (fail == 1)  \
+abort (); \
+} \
+  while (0)
+
+#define ASSERTEQ_32(line, ref, res)   \
+  do  \
+{ \
+  int fail = 0;   \
+  for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
+{ \
+  int *temp_ref = [i], *temp_res = [i];   \
+  if (abs (*temp_ref - *temp_res) > 0)\
+{ \
+  printf (" error: %s at line %ld , expected " #ref   \
+  "[%ld]:0x%x, got: 0x%x\n",  \
+  __FILE__, line, i, *temp_ref, *temp_res);   \
+  fail = 1;   \
+}

[PATCH v3 0/9] Added support for SX/LSX vector instructions.

2023-09-10 Thread Xiaolong Chen
v2 -> v3:
  Standardize the code using the GNU format.

  In order to better test the function of the vector instruction, the 128 and 
256 
bit test cases are further split according to the function of the instruction.

Xiaolong Chen (9):
  LoongArch: Add tests of -mstrict-align option.
  LoongArch: Add testsuite framework for Loongson SX/ASX.
  LoongArch: Add tests for Loongson SX builtin functions.
  LoongArch:Added support for SX vector floating-point instructions.
  LoongArch:Add SX instructions for vector arithmetic addition
operations.
  LoongArch:Add vector subtraction arithmetic operation SX instruction.
  LoongArch:Add vector arithmetic addition vsadd instruction.
  LoongArch:Added SX vector arithmetic multiplication instruction.
  LoongArch:Add SX instructions for vector arithmetic operations other
than multiplication, addition, and subtraction.

 .../gcc.target/loongarch/strict-align.c   |   12 +
 .../loongarch/vector/loongarch-vector.exp |   42 +
 .../loongarch/vector/lsx/lsx-builtin.c| 5038 +
 .../loongarch/vector/lsx/lsx-vadd.c   |  416 ++
 .../loongarch/vector/lsx/lsx-vadda.c  |  344 ++
 .../loongarch/vector/lsx/lsx-vaddi.c  |  251 +
 .../loongarch/vector/lsx/lsx-vaddwev-1.c  |  335 ++
 .../loongarch/vector/lsx/lsx-vaddwev-2.c  |  344 ++
 .../loongarch/vector/lsx/lsx-vaddwev-3.c  |  425 ++
 .../loongarch/vector/lsx/lsx-vaddwod-1.c  |  408 ++
 .../loongarch/vector/lsx/lsx-vaddwod-2.c  |  344 ++
 .../loongarch/vector/lsx/lsx-vaddwod-3.c  |  237 +
 .../loongarch/vector/lsx/lsx-vavg-1.c |  398 ++
 .../loongarch/vector/lsx/lsx-vavg-2.c |  308 +
 .../loongarch/vector/lsx/lsx-vavgr-1.c|  299 +
 .../loongarch/vector/lsx/lsx-vavgr-2.c|  317 ++
 .../loongarch/vector/lsx/lsx-vdiv-1.c |  299 +
 .../loongarch/vector/lsx/lsx-vdiv-2.c |  254 +
 .../loongarch/vector/lsx/lsx-vexth-1.c|  342 ++
 .../loongarch/vector/lsx/lsx-vexth-2.c|  182 +
 .../loongarch/vector/lsx/lsx-vfcvt-1.c|  398 ++
 .../loongarch/vector/lsx/lsx-vfcvt-2.c|  278 +
 .../loongarch/vector/lsx/lsx-vffint-1.c   |  161 +
 .../loongarch/vector/lsx/lsx-vffint-2.c   |  264 +
 .../loongarch/vector/lsx/lsx-vffint-3.c   |  102 +
 .../loongarch/vector/lsx/lsx-vfrint_d.c   |  230 +
 .../loongarch/vector/lsx/lsx-vfrint_s.c   |  350 ++
 .../loongarch/vector/lsx/lsx-vftint-1.c   |  349 ++
 .../loongarch/vector/lsx/lsx-vftint-2.c   |  695 +++
 .../loongarch/vector/lsx/lsx-vftint-3.c   | 1028 
 .../loongarch/vector/lsx/lsx-vftint-4.c   |  345 ++
 .../loongarch/vector/lsx/lsx-vhaddw-1.c   |  488 ++
 .../loongarch/vector/lsx/lsx-vhaddw-2.c   |  452 ++
 .../loongarch/vector/lsx/lsx-vhsubw-1.c   |  327 ++
 .../loongarch/vector/lsx/lsx-vhsubw-2.c   |  353 ++
 .../loongarch/vector/lsx/lsx-vldi.c   |   61 +
 .../loongarch/vector/lsx/lsx-vmadd.c  |  450 ++
 .../loongarch/vector/lsx/lsx-vmaddwev-1.c |  472 ++
 .../loongarch/vector/lsx/lsx-vmaddwev-2.c |  383 ++
 .../loongarch/vector/lsx/lsx-vmaddwev-3.c |  383 ++
 .../loongarch/vector/lsx/lsx-vmaddwod-1.c |  372 ++
 .../loongarch/vector/lsx/lsx-vmaddwod-2.c |  438 ++
 .../loongarch/vector/lsx/lsx-vmaddwod-3.c |  460 ++
 .../loongarch/vector/lsx/lsx-vmax-1.c |  317 ++
 .../loongarch/vector/lsx/lsx-vmax-2.c |  362 ++
 .../loongarch/vector/lsx/lsx-vmaxi-1.c|  279 +
 .../loongarch/vector/lsx/lsx-vmaxi-2.c|  223 +
 .../loongarch/vector/lsx/lsx-vmin-1.c |  434 ++
 .../loongarch/vector/lsx/lsx-vmin-2.c |  344 ++
 .../loongarch/vector/lsx/lsx-vmini-1.c|  314 +
 .../loongarch/vector/lsx/lsx-vmini-2.c|  216 +
 .../loongarch/vector/lsx/lsx-vmskgez.c|  119 +
 .../loongarch/vector/lsx/lsx-vmskltz.c|  321 ++
 .../loongarch/vector/lsx/lsx-vmsknz.c |  104 +
 .../loongarch/vector/lsx/lsx-vmsub.c  |  461 ++
 .../loongarch/vector/lsx/lsx-vmuh-1.c |  353 ++
 .../loongarch/vector/lsx/lsx-vmuh-2.c |  372 ++
 .../loongarch/vector/lsx/lsx-vmul.c   |  282 +
 .../loongarch/vector/lsx/lsx-vmulwev-1.c  |  434 ++
 .../loongarch/vector/lsx/lsx-vmulwev-2.c  |  344 ++
 .../loongarch/vector/lsx/lsx-vmulwev-3.c  |  245 +
 .../loongarch/vector/lsx/lsx-vmulwod-1.c  |  272 +
 .../loongarch/vector/lsx/lsx-vmulwod-2.c  |  282 +
 .../loongarch/vector/lsx/lsx-vmulwod-3.c  |  308 +
 .../loongarch/vector/lsx/lsx-vneg.c   |  321 ++
 .../loongarch/vector/lsx/lsx-vsadd-1.c|  335 ++
 .../loongarch/vector/lsx/lsx-vsadd-2.c|  345 ++
 .../loongarch/vector/lsx/lsx-vsat-1.c |  231 +
 .../loongarch/vector/lsx/lsx-vsat-2.c |  272 +
 .../loongarch/vector/lsx/lsx-vsigncov.c   |  425 ++
 .../loongarch/vector/lsx/lsx-vssub-1.c|  398 ++
 .../loongarch/vector/lsx/lsx-vssub-2.c|  408 ++
 

Re: [PATCH] RISC-V: Enable RVV scalable vectorization by default[PR111311]

2023-09-10 Thread juzhe.zh...@rivai.ai
Ping this patch.

I think it's time to enable scalable vectorization by default and do the whole 
regression every time (except vect.exp that we didn't enable yet)

Update current FAILs status:

Real FAILS (ICE and execution FAIL):

FAIL: gcc.dg/pr70252.c (internal compiler error: in 
gimple_expand_vec_cond_expr, at gimple-isel.cc:284)
FAIL: gcc.dg/pr70252.c (test for excess errors)
FAIL: gcc.dg/pr92301.c execution test

Robin is working on these 3 issues and will be solved soon.

FAIL: g++.dg/torture/vshuf-v4df.C   -O2 -flto -fno-use-linker-plugin 
-flto-partition=none  (internal compiler error: in as_a, at machmode.h:381)
FAIL: g++.dg/torture/vshuf-v4df.C   -O2 -flto -fno-use-linker-plugin 
-flto-partition=none  (test for excess errors)
FAIL: g++.dg/torture/vshuf-v4df.C   -O2 -flto -fuse-linker-plugin 
-fno-fat-lto-objects  (internal compiler error: in as_a, at machmode.h:381)
FAIL: g++.dg/torture/vshuf-v4df.C   -O2 -flto -fuse-linker-plugin 
-fno-fat-lto-objects  (test for excess errors)
This is a long time known issue I have mentioned many times, we need help for 
LTO since it's caused by mode bits extension.

The rest bogus FAILs:
FAIL: gcc.dg/unroll-8.c scan-rtl-dump loop2_unroll "Not unrolling loop, doesn't 
roll"
FAIL: gcc.dg/unroll-8.c scan-rtl-dump loop2_unroll "likely upper bound: 6"
FAIL: gcc.dg/unroll-8.c scan-rtl-dump loop2_unroll "realistic bound: -1"
FAIL: gcc.dg/var-expand1.c scan-rtl-dump loop2_unroll "Expanding Accumulator"
FAIL: gcc.dg/tree-ssa/cunroll-16.c scan-tree-dump cunroll "optimized: loop with 
[0-9]+ iterations completely unrolled"
FAIL: gcc.dg/tree-ssa/cunroll-16.c scan-tree-dump-not optimized "foo"
FAIL: gcc.dg/tree-ssa/forwprop-40.c scan-tree-dump-times optimized 
"BIT_FIELD_REF" 0
FAIL: gcc.dg/tree-ssa/forwprop-40.c scan-tree-dump-times optimized 
"BIT_INSERT_EXPR" 0
FAIL: gcc.dg/tree-ssa/forwprop-41.c scan-tree-dump-times optimized 
"BIT_FIELD_REF" 0
FAIL: gcc.dg/tree-ssa/forwprop-41.c scan-tree-dump-times optimized 
"BIT_INSERT_EXPR" 1
FAIL: gcc.dg/tree-ssa/gen-vect-11b.c scan-tree-dump-times vect "vectorized 0 
loops" 1
FAIL: gcc.dg/tree-ssa/gen-vect-11c.c scan-tree-dump-times vect "vectorized 0 
loops" 1
FAIL: gcc.dg/tree-ssa/gen-vect-26.c scan-tree-dump-times vect "Alignment of 
access forced using peeling" 1
FAIL: gcc.dg/tree-ssa/gen-vect-28.c scan-tree-dump-times vect "Alignment of 
access forced using peeling" 1
FAIL: gcc.dg/tree-ssa/loop-bound-1.c scan-tree-dump ivopts "bounded by 254"
FAIL: gcc.dg/tree-ssa/loop-bound-2.c scan-tree-dump ivopts "bounded by 254"
FAIL: gcc.dg/tree-ssa/predcom-2.c scan-tree-dump-times pcom "Unrolling 2 
times." 2
FAIL: gcc.dg/tree-ssa/predcom-4.c scan-tree-dump-times pcom "Combination" 1
FAIL: gcc.dg/tree-ssa/predcom-4.c scan-tree-dump-times pcom "Unrolling 3 
times." 1
FAIL: gcc.dg/tree-ssa/predcom-5.c scan-tree-dump-times pcom "Combination" 2
FAIL: gcc.dg/tree-ssa/predcom-5.c scan-tree-dump-times pcom "Unrolling 3 
times." 1
FAIL: gcc.dg/tree-ssa/predcom-9.c scan-tree-dump pcom "Executing predictive 
commoning without unrolling"
FAIL: gcc.dg/tree-ssa/reassoc-46.c scan-tree-dump-times optimized 
"(?:vect_)?sum_[\\d._]+ = (?:(?:vect_)?_[\\d._]+ \\+ 
(?:vect_)?sum_[\\d._]+|(?:v   ect_)?sum_[\\d._]+ \\+ (?:vect_)?_[\\d._]+)" 1
FAIL: gcc.dg/tree-ssa/scev-10.c scan-tree-dump-times ivopts "  
Type:\\tREFERENCE ADDRESS\n" 1
FAIL: gcc.dg/tree-ssa/scev-11.c scan-tree-dump-times ivopts "  
Type:\\tREFERENCE ADDRESS\n" 2
FAIL: gcc.dg/tree-ssa/scev-14.c scan-tree-dump ivopts "Overflowness wrto loop 
niter:\tNo-overflow"
FAIL: gcc.dg/tree-ssa/scev-9.c scan-tree-dump-times ivopts "  Type:\\tREFERENCE 
ADDRESS\n" 1
FAIL: gcc.dg/tree-ssa/split-path-11.c scan-tree-dump-times split-paths "join 
point for if-convertable half-diamond" 1

These are bogus dump FAILs and I have 100% confirm each of them, we are having 
same behavior as SVE.

So is this patch ok for trunk ?



juzhe.zh...@rivai.ai
 
From: Juzhe-Zhong
Date: 2023-09-07 15:28
To: gcc-patches
CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH] RISC-V: Enable RVV scalable vectorization by default[PR111311]
This patch is not ready but they all will be fixed very soon.
 
gcc/ChangeLog:
 
* config/riscv/riscv.opt: Set default as scalable vectorization.
 
---
gcc/config/riscv/riscv.opt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 98f342348b7..bf2eca08221 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -292,7 +292,7 @@ EnumValue
Enum(riscv_autovec_preference) String(fixed-vlmax) Value(RVV_FIXED_VLMAX)
-param=riscv-autovec-preference=
-Target RejectNegative Joined Enum(riscv_autovec_preference) 
Var(riscv_autovec_preference) Init(NO_AUTOVEC)
+Target RejectNegative Joined Enum(riscv_autovec_preference) 
Var(riscv_autovec_preference) Init(RVV_SCALABLE)
-param=riscv-autovec-preference= Set the preference of 
auto-vectorization in the RISC-V port.
Enum
-- 
2.36.3
 


[PATCH] RISC-V: Use dominance analysis in global vsetvl elimination

2023-09-10 Thread Juzhe-Zhong
I found that it's more reasonable to use existing dominance analysis.

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc 
(pass_vsetvl::global_eliminate_vsetvl_insn): Use dominance analysis.
(pass_vsetvl::init): Ditto.
(pass_vsetvl::done): Ditto.

---
 gcc/config/riscv/riscv-vsetvl.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 134b97737ae..f81361c4ccd 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -4054,7 +4054,7 @@ pass_vsetvl::global_eliminate_vsetvl_insn (const bb_info 
*bb) const
 }
 
   /* Step1: Reshape the VL/VTYPE status to make sure everything compatible.  */
-  hash_set pred_cfg_bbs = get_all_predecessors (cfg_bb);
+  auto_vec pred_cfg_bbs = get_dominated_by (CDI_POST_DOMINATORS, 
cfg_bb);
   FOR_EACH_EDGE (e, ei, cfg_bb->preds)
 {
   sbitmap avout = m_vector_manager->vector_avout[e->src->index];
@@ -4243,6 +4243,7 @@ pass_vsetvl::init (void)
 {
   /* Initialization of RTL_SSA.  */
   calculate_dominance_info (CDI_DOMINATORS);
+  calculate_dominance_info (CDI_POST_DOMINATORS);
   df_analyze ();
   crtl->ssa = new function_info (cfun);
 }
@@ -4264,6 +4265,7 @@ pass_vsetvl::done (void)
 {
   /* Finalization of RTL_SSA.  */
   free_dominance_info (CDI_DOMINATORS);
+  free_dominance_info (CDI_POST_DOMINATORS);
   if (crtl->ssa->perform_pending_updates ())
cleanup_cfg (0);
   delete crtl->ssa;
-- 
2.36.3



[Committed V2] RISC-V: Add VLS modes VEC_PERM support[PR111311]

2023-09-10 Thread Juzhe-Zhong
This patch add VLS modes VEC_PERM support which fix these following
FAILs in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111311:

FAIL: gcc.dg/tree-ssa/forwprop-40.c scan-tree-dump-times optimized 
"BIT_FIELD_REF" 0
FAIL: gcc.dg/tree-ssa/forwprop-40.c scan-tree-dump-times optimized 
"BIT_INSERT_EXPR" 0
FAIL: gcc.dg/tree-ssa/forwprop-41.c scan-tree-dump-times optimized 
"BIT_FIELD_REF" 0
FAIL: gcc.dg/tree-ssa/forwprop-41.c scan-tree-dump-times optimized 
"BIT_INSERT_EXPR" 1

These FAILs are fixed after this patch.

gcc/ChangeLog:

* config/riscv/autovec.md: Add VLS modes.
* config/riscv/riscv-protos.h (cmp_lmul_le_one): New function.
(cmp_lmul_gt_one): Ditto.
* config/riscv/riscv-v.cc (cmp_lmul_le_one): Ditto.
(cmp_lmul_gt_one): Ditto.
* config/riscv/riscv.cc (riscv_print_operand): Add VLS modes.
(riscv_vectorize_vec_perm_const): Ditto.
* config/riscv/vector-iterators.md: Ditto.
* config/riscv/vector.md: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/partial/slp-1.c: Adapt test.
* gcc.target/riscv/rvv/autovec/partial/slp-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/partial/slp-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/partial/slp-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/partial/slp-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/compress-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/compress-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/compress-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls/compress-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls/compress-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls/compress-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls/merge-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/merge-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/merge-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls/merge-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls/merge-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls/merge-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls/merge-7.c: New test.
* gcc.target/riscv/rvv/autovec/vls/perm-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/perm-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/perm-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls/perm-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls/perm-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls/perm-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls/perm-7.c: New test.

---
 gcc/config/riscv/autovec.md   |   6 +-
 gcc/config/riscv/riscv-protos.h   |   2 +
 gcc/config/riscv/riscv-v.cc   |  22 ++
 gcc/config/riscv/riscv.cc |   4 +-
 gcc/config/riscv/vector-iterators.md  | 289 -
 gcc/config/riscv/vector.md| 302 +-
 .../riscv/rvv/autovec/partial/slp-1.c |   2 +-
 .../riscv/rvv/autovec/partial/slp-16.c|   2 +-
 .../riscv/rvv/autovec/partial/slp-17.c|   2 +-
 .../riscv/rvv/autovec/partial/slp-3.c |   2 +-
 .../riscv/rvv/autovec/partial/slp-5.c |   2 +-
 .../riscv/rvv/autovec/vls/compress-1.c|   6 +
 .../riscv/rvv/autovec/vls/compress-2.c|   7 +
 .../riscv/rvv/autovec/vls/compress-3.c|   7 +
 .../riscv/rvv/autovec/vls/compress-4.c|   7 +
 .../riscv/rvv/autovec/vls/compress-5.c|   6 +
 .../riscv/rvv/autovec/vls/compress-6.c|   6 +
 .../riscv/rvv/autovec/vls/merge-1.c   |   6 +
 .../riscv/rvv/autovec/vls/merge-2.c   |   6 +
 .../riscv/rvv/autovec/vls/merge-3.c   |   6 +
 .../riscv/rvv/autovec/vls/merge-4.c   |   6 +
 .../riscv/rvv/autovec/vls/merge-5.c   |   6 +
 .../riscv/rvv/autovec/vls/merge-6.c   |   6 +
 .../riscv/rvv/autovec/vls/merge-7.c   |   6 +
 .../gcc.target/riscv/rvv/autovec/vls/perm-1.c |   6 +
 .../gcc.target/riscv/rvv/autovec/vls/perm-2.c |   6 +
 .../gcc.target/riscv/rvv/autovec/vls/perm-3.c |   6 +
 .../gcc.target/riscv/rvv/autovec/vls/perm-4.c |   8 +
 .../gcc.target/riscv/rvv/autovec/vls/perm-5.c |   6 +
 .../gcc.target/riscv/rvv/autovec/vls/perm-6.c |   6 +
 .../gcc.target/riscv/rvv/autovec/vls/perm-7.c |   6 +
 31 files changed, 584 insertions(+), 176 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-6.c
 create mode 100644 

Re: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]

2023-09-10 Thread juzhe.zh...@rivai.ai
Sure. Thanks kito.



juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-09-11 10:57
To: juzhe.zh...@rivai.ai
CC: gcc-patches; Kito.cheng
Subject: Re: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]
OK, but could you split this patch into two patches? pre-approved for both.
 
On Mon, Sep 11, 2023 at 10:36 AM juzhe.zh...@rivai.ai
 wrote:
>
> >> Should we also add loads and stores as well?
> >> and just make sure this is also necessary for the fix and not sneaky, 
> >> right?
>
> No, we don't need loads/stores. Since this following handling codes:
> (define_insn_and_split "*mov_lra"
>   [(set (match_operand:VLS_AVL_REG 0 "reg_or_mem_operand" "=vr, m,vr")
>   (match_operand:VLS_AVL_REG 1 "reg_or_mem_operand" "  m,vr,vr"))
>(clobber (match_scratch:P 2 "=,,X"))]
>   "TARGET_VECTOR && (lra_in_progress || reload_completed)
>&& (register_operand (operands[0], mode)
>|| register_operand (operands[1], mode))"
>   "#"
>   "&& reload_completed"
>   [(const_int 0)]
> {
>   if (REG_P (operands[0]) && REG_P (operands[1]))
>   emit_insn (gen_rtx_SET (operands[0], operands[1]));
>   else
> {
>   emit_move_insn (operands[2], gen_int_mode (GET_MODE_NUNITS 
> (mode),
>  Pmode));
>   unsigned insn_flags
> = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
>  ? riscv_vector::UNARY_MASK_OP
>  : riscv_vector::UNARY_OP;
>   riscv_vector::emit_nonvlmax_insn (code_for_pred_mov 
> (mode),
>   insn_flags, operands, operands[2]);
> }
>   DONE;
> }
>   [(set_attr "type" "vmov")]
> )
>
> We split special case use emit_insn (gen_rtx_SET (operands[0], operands[1]));
>
> Missing this pattern will cause ICE but current testcases didn't produce such 
> issues.
> This issue is recognized after I support this pattern.
>
>
>
> juzhe.zh...@rivai.ai
>
> From: Kito Cheng
> Date: 2023-09-11 10:18
> To: Juzhe-Zhong
> CC: gcc-patches; kito.cheng
> Subject: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]
> > diff --git a/gcc/config/riscv/autovec-vls.md 
> > b/gcc/config/riscv/autovec-vls.md
> > index d208b418e5f..6f48f7d6232 100644
> > --- a/gcc/config/riscv/autovec-vls.md
> > +++ b/gcc/config/riscv/autovec-vls.md
> > @@ -148,6 +148,14 @@
> >[(set_attr "type" "vmov")
> > (set_attr "mode" "")])
> >
> > +(define_insn "*mov_vls"
> > +  [(set (match_operand:VLSB 0 "register_operand" "=vr")
> > +   (match_operand:VLSB 1 "register_operand" " vr"))]
> > +  "TARGET_VECTOR"
> > +  "vmv1r.v\t%0,%1"
> > +  [(set_attr "type" "vmov")
> > +   (set_attr "mode" "")])
>
> Should we also add loads and stores as well?
> and just make sure this is also necessary for the fix and not sneaky, right?
>
> > +
> >  (define_expand "movmisalign"
> >[(set (match_operand:VLS 0 "nonimmediate_operand")
> > (match_operand:VLS 1 "general_operand"))]
>
 


[Committed] RISC-V: Add missing VLS mask bool mode reg -> reg patterns

2023-09-10 Thread Juzhe-Zhong
Committed.

gcc/ChangeLog:

* config/riscv/autovec-vls.md (*mov_vls): New pattern.
* config/riscv/vector-iterators.md: New iterator

---
 gcc/config/riscv/autovec-vls.md  |  8 
 gcc/config/riscv/vector-iterators.md | 15 +++
 2 files changed, 23 insertions(+)

diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md
index d208b418e5f..6f48f7d6232 100644
--- a/gcc/config/riscv/autovec-vls.md
+++ b/gcc/config/riscv/autovec-vls.md
@@ -148,6 +148,14 @@
   [(set_attr "type" "vmov")
(set_attr "mode" "")])
 
+(define_insn "*mov_vls"
+  [(set (match_operand:VLSB 0 "register_operand" "=vr")
+   (match_operand:VLSB 1 "register_operand" " vr"))]
+  "TARGET_VECTOR"
+  "vmv1r.v\t%0,%1"
+  [(set_attr "type" "vmov")
+   (set_attr "mode" "")])
+
 (define_expand "movmisalign"
   [(set (match_operand:VLS 0 "nonimmediate_operand")
(match_operand:VLS 1 "general_operand"))]
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index a98ed9fcbb6..5694c0c8f37 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -2425,6 +2425,21 @@
   (V256DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 
2048")
   (V512DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 
4096")])
 
+(define_mode_iterator VLSB [
+  (V1BI "TARGET_VECTOR_VLS")
+  (V2BI "TARGET_VECTOR_VLS")
+  (V4BI "TARGET_VECTOR_VLS")
+  (V8BI "TARGET_VECTOR_VLS")
+  (V16BI "TARGET_VECTOR_VLS")
+  (V32BI "TARGET_VECTOR_VLS")
+  (V64BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 64")
+  (V128BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 128")
+  (V256BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 256")
+  (V512BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 512")
+  (V1024BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 1024")
+  (V2048BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 2048")
+  (V4096BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 4096")])
+
 ;; VLS modes that has NUNITS < 32.
 (define_mode_iterator VLS_AVL_IMM [
   (V1QI "TARGET_VECTOR_VLS")
-- 
2.36.3



Re: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]

2023-09-10 Thread Kito Cheng via Gcc-patches
OK, but could you split this patch into two patches? pre-approved for both.

On Mon, Sep 11, 2023 at 10:36 AM juzhe.zh...@rivai.ai
 wrote:
>
> >> Should we also add loads and stores as well?
> >> and just make sure this is also necessary for the fix and not sneaky, 
> >> right?
>
> No, we don't need loads/stores. Since this following handling codes:
> (define_insn_and_split "*mov_lra"
>   [(set (match_operand:VLS_AVL_REG 0 "reg_or_mem_operand" "=vr, m,vr")
>   (match_operand:VLS_AVL_REG 1 "reg_or_mem_operand" "  m,vr,vr"))
>(clobber (match_scratch:P 2 "=,,X"))]
>   "TARGET_VECTOR && (lra_in_progress || reload_completed)
>&& (register_operand (operands[0], mode)
>|| register_operand (operands[1], mode))"
>   "#"
>   "&& reload_completed"
>   [(const_int 0)]
> {
>   if (REG_P (operands[0]) && REG_P (operands[1]))
>   emit_insn (gen_rtx_SET (operands[0], operands[1]));
>   else
> {
>   emit_move_insn (operands[2], gen_int_mode (GET_MODE_NUNITS 
> (mode),
>  Pmode));
>   unsigned insn_flags
> = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
>  ? riscv_vector::UNARY_MASK_OP
>  : riscv_vector::UNARY_OP;
>   riscv_vector::emit_nonvlmax_insn (code_for_pred_mov 
> (mode),
>   insn_flags, operands, operands[2]);
> }
>   DONE;
> }
>   [(set_attr "type" "vmov")]
> )
>
> We split special case use emit_insn (gen_rtx_SET (operands[0], operands[1]));
>
> Missing this pattern will cause ICE but current testcases didn't produce such 
> issues.
> This issue is recognized after I support this pattern.
>
>
>
> juzhe.zh...@rivai.ai
>
> From: Kito Cheng
> Date: 2023-09-11 10:18
> To: Juzhe-Zhong
> CC: gcc-patches; kito.cheng
> Subject: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]
> > diff --git a/gcc/config/riscv/autovec-vls.md 
> > b/gcc/config/riscv/autovec-vls.md
> > index d208b418e5f..6f48f7d6232 100644
> > --- a/gcc/config/riscv/autovec-vls.md
> > +++ b/gcc/config/riscv/autovec-vls.md
> > @@ -148,6 +148,14 @@
> >[(set_attr "type" "vmov")
> > (set_attr "mode" "")])
> >
> > +(define_insn "*mov_vls"
> > +  [(set (match_operand:VLSB 0 "register_operand" "=vr")
> > +   (match_operand:VLSB 1 "register_operand" " vr"))]
> > +  "TARGET_VECTOR"
> > +  "vmv1r.v\t%0,%1"
> > +  [(set_attr "type" "vmov")
> > +   (set_attr "mode" "")])
>
> Should we also add loads and stores as well?
> and just make sure this is also necessary for the fix and not sneaky, right?
>
> > +
> >  (define_expand "movmisalign"
> >[(set (match_operand:VLS 0 "nonimmediate_operand")
> > (match_operand:VLS 1 "general_operand"))]
>


Re: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]

2023-09-10 Thread juzhe.zh...@rivai.ai
>> Should we also add loads and stores as well?
>> and just make sure this is also necessary for the fix and not sneaky, right?

No, we don't need loads/stores. Since this following handling codes:
(define_insn_and_split "*mov_lra"
  [(set (match_operand:VLS_AVL_REG 0 "reg_or_mem_operand" "=vr, m,vr")
  (match_operand:VLS_AVL_REG 1 "reg_or_mem_operand" "  m,vr,vr"))
   (clobber (match_scratch:P 2 "=,,X"))]
  "TARGET_VECTOR && (lra_in_progress || reload_completed)
   && (register_operand (operands[0], mode)
   || register_operand (operands[1], mode))"
  "#"
  "&& reload_completed"
  [(const_int 0)]
{
  if (REG_P (operands[0]) && REG_P (operands[1]))
  emit_insn (gen_rtx_SET (operands[0], operands[1]));
  else
{
  emit_move_insn (operands[2], gen_int_mode (GET_MODE_NUNITS 
(mode),
 Pmode));
  unsigned insn_flags
= GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
 ? riscv_vector::UNARY_MASK_OP
 : riscv_vector::UNARY_OP;
  riscv_vector::emit_nonvlmax_insn (code_for_pred_mov 
(mode),
  insn_flags, operands, operands[2]);
}
  DONE;
}
  [(set_attr "type" "vmov")]
)

We split special case use emit_insn (gen_rtx_SET (operands[0], operands[1]));

Missing this pattern will cause ICE but current testcases didn't produce such 
issues.
This issue is recognized after I support this pattern.



juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-09-11 10:18
To: Juzhe-Zhong
CC: gcc-patches; kito.cheng
Subject: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]
> diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md
> index d208b418e5f..6f48f7d6232 100644
> --- a/gcc/config/riscv/autovec-vls.md
> +++ b/gcc/config/riscv/autovec-vls.md
> @@ -148,6 +148,14 @@
>[(set_attr "type" "vmov")
> (set_attr "mode" "")])
>
> +(define_insn "*mov_vls"
> +  [(set (match_operand:VLSB 0 "register_operand" "=vr")
> +   (match_operand:VLSB 1 "register_operand" " vr"))]
> +  "TARGET_VECTOR"
> +  "vmv1r.v\t%0,%1"
> +  [(set_attr "type" "vmov")
> +   (set_attr "mode" "")])
 
Should we also add loads and stores as well?
and just make sure this is also necessary for the fix and not sneaky, right?
 
> +
>  (define_expand "movmisalign"
>[(set (match_operand:VLS 0 "nonimmediate_operand")
> (match_operand:VLS 1 "general_operand"))]
 


Re: [PATCH] MATCH: [PR111346] `X CMP MINMAX` pattern missing :c on CMP

2023-09-10 Thread Jeff Law via Gcc-patches




On 9/10/23 20:18, Andrew Pinski via Gcc-patches wrote:

I noticed this while working on other MINMAX optimizations. It was
hard to find a simplified testcase though because it was dependent on
the ssa name versions. Adding the `:c` to cmp allows the pattern to
be match for the case where minmax as the first operand of the comparison
rather than the second.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR tree-optimization/111346

gcc/ChangeLog:

* match.pd (`X CMP MINMAX`): Add `:c` on the cmp part
of the pattern

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/minmaxcmp-1.c: New test.

OK
jeff


Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]

2023-09-10 Thread Kito Cheng via Gcc-patches
> diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md
> index d208b418e5f..6f48f7d6232 100644
> --- a/gcc/config/riscv/autovec-vls.md
> +++ b/gcc/config/riscv/autovec-vls.md
> @@ -148,6 +148,14 @@
>[(set_attr "type" "vmov")
> (set_attr "mode" "")])
>
> +(define_insn "*mov_vls"
> +  [(set (match_operand:VLSB 0 "register_operand" "=vr")
> +   (match_operand:VLSB 1 "register_operand" " vr"))]
> +  "TARGET_VECTOR"
> +  "vmv1r.v\t%0,%1"
> +  [(set_attr "type" "vmov")
> +   (set_attr "mode" "")])

Should we also add loads and stores as well?
and just make sure this is also necessary for the fix and not sneaky, right?

> +
>  (define_expand "movmisalign"
>[(set (match_operand:VLS 0 "nonimmediate_operand")
> (match_operand:VLS 1 "general_operand"))]


[PATCH] MATCH: [PR111346] `X CMP MINMAX` pattern missing :c on CMP

2023-09-10 Thread Andrew Pinski via Gcc-patches
I noticed this while working on other MINMAX optimizations. It was
hard to find a simplified testcase though because it was dependent on
the ssa name versions. Adding the `:c` to cmp allows the pattern to
be match for the case where minmax as the first operand of the comparison
rather than the second.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR tree-optimization/111346

gcc/ChangeLog:

* match.pd (`X CMP MINMAX`): Add `:c` on the cmp part
of the pattern

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/minmaxcmp-1.c: New test.
---
 gcc/match.pd|  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c | 39 +
 2 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index c7b6db4b543..a60fe04885e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3942,7 +3942,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (for minmax (min min max max )
  cmp(ge  lt  le  gt  )
  (simplify
-  (cmp @0 (minmax:c @0 @1))
+  (cmp:c @0 (minmax:c @0 @1))
   { constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } ))
 
 /* Undo fancy ways of writing max/min or other ?: expressions, like
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c
new file mode 100644
index 000..0706c026076
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-original" } */
+/* PR tree-optimization/111346 */
+
+int f();
+int g();
+
+_Bool test1(int a, int b)
+{
+return ((a > b) ? a : b) >= a; // return 1;
+}
+_Bool test1_(int a, int b)
+{
+return a <= ((a > b) ? a : b); // return 1;
+}
+/* test1 and test1_ should be able to optimize to `return 1;` during fold.  */
+/* { dg-final { scan-tree-dump-times "return 1;" 2 "original" } } */
+/* { dg-final { scan-tree-dump-not " MAX_EXPR " "original" } } */
+
+_Bool test2(int a, int b)
+{
+a = f();
+a = g();
+int t = a;
+if (t < b) t = b;
+return t >= a; // return 1;
+}
+
+_Bool test2_(int a, int b)
+{
+a = g();
+int t = a;
+if (t < b) t = b;
+return t >= a; // return 1;
+}
+
+/* All of these should be optimized to just be the function calls and `return 
1;` */
+/* { dg-final { scan-tree-dump-times "return 1;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-not " MAX_EXPR " "optimized" } } */
-- 
2.31.1



Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]

2023-09-10 Thread Eric Feng via Gcc-patches
On Thu, Sep 7, 2023 at 1:28 PM David Malcolm  wrote:

> On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote:
>
> > Hi Dave,
>
> Hi Eric, thanks for the patch.
>
> >
> > Recently I've been working on symbolic value support for the reference
> > count checker. I've attached a patch for it below; let me know it looks
> > OK for trunk. Thanks!
> >
> > Best,
> > Eric
> >
> > ---
> >
> > This patch enhances the reference count checker in the CPython plugin by
> > adding support for symbolic values. Whereas previously we were only able
> > to check the reference count of PyObject* objects created in the scope
> > of the function; we are now able to emit diagnostics on reference count
> > mismatch of objects that were, for example, passed in as a function
> > parameter.
> >
> > rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but
> ob_refcnt field is N + ‘2’
> > 6 |   return obj;
> >   |  ^~~
>
> [...snip...]
>
> >  create mode 100644
> gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> >
> > diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > index bf1982e79c3..d7ecd7fce09 100644
> > --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > @@ -314,17 +314,20 @@ public:
> >{
> >  diagnostic_metadata m;
> >  bool warned;
> > -// just assuming constants for now
> > -auto actual_refcnt
> > - = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
> > -auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue
> ()->get_constant ();
> > -warned = warning_meta (rich_loc, m, get_controlling_option (),
> > -"expected %qE to have "
> > -"reference count: %qE but ob_refcnt field is:
> %qE",
> > -m_reg_tree, actual_refcnt, ob_refcnt);
> > -
> > -// location_t loc = rich_loc->get_loc ();
> > -// foo (loc);
> > +
> > +const auto *actual_refcnt_constant
> > + = m_actual_refcnt->dyn_cast_constant_svalue ();
> > +const auto *ob_refcnt_constant =
> m_ob_refcnt->dyn_cast_constant_svalue ();
> > +if (!actual_refcnt_constant || !ob_refcnt_constant)
> > +  return false;
> > +
> > +auto actual_refcnt = actual_refcnt_constant->get_constant ();
> > +auto ob_refcnt = ob_refcnt_constant->get_constant ();
> > +warned = warning_meta (
> > + rich_loc, m, get_controlling_option (),
> > + "expected %qE to have "
> > + "reference count: N + %qE but ob_refcnt field is N + %qE",
> > + m_reg_tree, actual_refcnt, ob_refcnt);
> >  return warned;
>
> I know you're emulating the old behavior I implemented way back in
> cpychecker, but I don't like that behavior :(
>
> Specifically, although the patch improves the behavior for symbolic
> values, it regresses the precision of wording for the concrete values
> case.  If we have e.g. a concrete ob_refcnt of 2, whereas we only have
> 1 pointer, then it's more readable to say:
>
>   warning: expected ‘obj’ to have reference count: ‘1’ but ob_refcnt
> field is ‘2’
>
> than:
>
>   warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt
> field is N + ‘2’
>
> ...and we shouldn't quote concrete numbers, the message should be:
>
>   warning: expected ‘obj’ to have reference count of 1 but ob_refcnt field
> is 2


> or better:
>
>   warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field is 2
>
>
> Can you move the unwrapping of the svalue from the tests below into the
> emit vfunc?  That way the m_actual_refcnt doesn't have to be a
> constant_svalue; you could have logic in the emit vfunc to print
> readable messages based on what kind of svalue it is.
>
> Rather than 'N', it might be better to say 'initial'; how about:
>
>   warning: ‘*obj’ is pointed to by 0 additional pointers but 'ob_refcnt'
> field has increased by 1
>   warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt'
> field has increased by 2
>   warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt'
> field is unchanged
>   warning: ‘*obj’ is pointed to by 2 additional pointers but 'ob_refcnt'
> field has decreased by 1
>   warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' field
> is unchanged
>
> and similar?
>

That makes sense to me as well (indeed I was just emulating the old
behavior)! Will experiment and keep you posted on a revised patch with this
in mind.  This is somewhat of a minor detail but can we emit ‘*obj’ as
bolded text in the diagnostic message? Currently, I can emit this
(including the asterisk) like so: '*%E'. But unlike using %qE, it doesn't
bold the body of the single quotations. Is this possible?

>
> Maybe have a flag that tracks whether we're talking about a concrete
> value that's absolute versus a concrete value that's relative to the
> initial value?
>
>
> [...snip...]
>
>
> > @@ -369,6 +368,19 @@ 

[PATCH] Remove constraint modifier % for fcmaddcph/fmaddcph/fcmulcph since there're not commutative.

2023-09-10 Thread liuhongt via Gcc-patches
Here's the patch I've commited.
The patch also remove % for vfmaddcph.

gcc/ChangeLog:

PR target/111306
PR target/111335
* config/i386/sse.md (int_comm): New int_attr.
(fma__):
Remove % for Complex conjugate operations since they're not
commutative.
(fma___pair): Ditto.
(___mask): Ditto.
(cmul3): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr111306.c: New test.
---
 gcc/config/i386/sse.md   | 16 ---
 gcc/testsuite/gcc.target/i386/pr111306.c | 36 
 2 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr111306.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6d3ae8dea0c..14615999394 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6480,6 +6480,14 @@ (define_int_attr complexpairopname
[(UNSPEC_COMPLEX_FMA_PAIR "fmaddc")
 (UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc")])
 
+(define_int_attr int_comm
+   [(UNSPEC_COMPLEX_FMA "")
+(UNSPEC_COMPLEX_FMA_PAIR "")
+(UNSPEC_COMPLEX_FCMA "")
+(UNSPEC_COMPLEX_FCMA_PAIR "")
+(UNSPEC_COMPLEX_FMUL "%")
+(UNSPEC_COMPLEX_FCMUL "")])
+
 (define_int_attr conj_op
[(UNSPEC_COMPLEX_FMA "")
 (UNSPEC_COMPLEX_FCMA "_conj")
@@ -6593,7 +6601,7 @@ (define_expand "cmla4"
 (define_insn "fma__"
   [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=")
(unspec:VHF_AVX512VL
- [(match_operand:VHF_AVX512VL 1 "" "%v")
+ [(match_operand:VHF_AVX512VL 1 "" "v")
   (match_operand:VHF_AVX512VL 2 "" 
"")
   (match_operand:VHF_AVX512VL 3 "" "0")]
   UNSPEC_COMPLEX_F_C_MA))]
@@ -6658,7 +,7 @@ (define_insn_and_split 
"fma___fma_zero"
 (define_insn "fma___pair"
  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=")
(unspec:VF1_AVX512VL
-[(match_operand:VF1_AVX512VL 1 "vector_operand" "%v")
+[(match_operand:VF1_AVX512VL 1 "vector_operand" "v")
  (match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr")
  (match_operand:VF1_AVX512VL 3 "vector_operand" "0")]
  UNSPEC_COMPLEX_F_C_MA_PAIR))]
@@ -6727,7 +6735,7 @@ (define_insn 
"___mask"
   [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=")
(vec_merge:VHF_AVX512VL
  (unspec:VHF_AVX512VL
-   [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "%v")
+   [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "v")
 (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" 
"")
 (match_operand:VHF_AVX512VL 3 "register_operand" "0")]
 UNSPEC_COMPLEX_F_C_MA)
@@ -6752,7 +6760,7 @@ (define_expand "cmul3"
 (define_insn "__"
   [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=")
  (unspec:VHF_AVX512VL
-   [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "%v")
+   [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "v")
 (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" 
"")]
 UNSPEC_COMPLEX_F_C_MUL))]
   "TARGET_AVX512FP16 && "
diff --git a/gcc/testsuite/gcc.target/i386/pr111306.c 
b/gcc/testsuite/gcc.target/i386/pr111306.c
new file mode 100644
index 000..541725ebdad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr111306.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-require-effective-target avx512fp16 } */
+
+#define AVX512FP16
+#include "avx512f-helper.h"
+
+__attribute__((optimize("O2"),noipa))
+void func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) {
+  __m512h rA = _mm512_loadu_ph(a);
+  for (int i = 0; i < n; i += 32) {
+__m512h rB = _mm512_loadu_ph(b + i);
+_mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA));
+  }
+}
+
+void
+test_512 (void)
+{
+  int n = 32;
+  _Float16 a[n], b[n], c[n];
+  _Float16 exp[n];
+  for (int i = 1; i <= n; i++) {
+a[i - 1] = i & 1 ? -i : i;
+b[i - 1] = i;
+  }
+
+  func1(a, b, n, c);
+  for (int i = 0; i < n / 32; i += 2) {
+if (c[i] != a[i] * b[i] + a[i+1] * b[i+1]
+   || c[i+1] != a[i] * b[i+1] - a[i+1]*b[i])
+  __builtin_abort ();
+}
+}
+
+
-- 
2.31.1



RE: [PATCH] RISC-V: Expand fixed-vlmax/vls vector permutation in targethook

2023-09-10 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Sunday, September 10, 2023 9:38 PM
To: Juzhe-Zhong ; gcc-patches@gcc.gnu.org
Cc: kito.ch...@sifive.com; kito.ch...@gmail.com
Subject: Re: [PATCH] RISC-V: Expand fixed-vlmax/vls vector permutation in 
targethook



On 9/9/23 20:33, Juzhe-Zhong wrote:
> When debugging FAIL: gcc.dg/pr92301.c execution test.
> Realize a vls vector permutation situation failed to vectorize since early 
> return false:
> 
> -  /* For constant size indices, we dont't need to handle it here.
> - Just leave it to vec_perm.  */
> -  if (d->perm.length ().is_constant ())
> -return false;
> 
> To avoid more potential failed vectorization case. Now expand it in 
> targethook.
> 
> gcc/ChangeLog:
> 
>   * config/riscv/riscv-v.cc (shuffle_generic_patterns): Expand 
> fixed-vlmax/vls vector permutation.
OK.
jeff


RE: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm

2023-09-10 Thread Li, Pan2 via Gcc-patches
Committed, thanks Jeff.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Jeff Law via Gcc-patches
Sent: Sunday, September 10, 2023 11:25 PM
To: Juzhe-Zhong ; gcc-patches@gcc.gnu.org
Cc: kito.ch...@sifive.com; kito.ch...@gmail.com
Subject: Re: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern 
of vec_perm



On 9/10/23 08:07, Juzhe-Zhong wrote:
> gcc/ChangeLog:
> 
>   * config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid 
> unnecessary slideup.
OK
jeff


[PATCH 2/2] libstdc++: Add dg-require-thread-fence in several tests

2023-09-10 Thread Christophe Lyon via Gcc-patches
Some targets like arm-eabi with newlib and default settings rely on
__sync_synchronize() to ensure synchronization.  Newlib does not
implement it by default, to make users aware they have to take special
care.

This makes a few tests fail to link.

This patch requires the missing thread-fence effective target in the
tests that need it, making them UNSUPPORTED instead of FAIL and
UNRESOLVED.

2023-09-10  Christophe Lyon  

libstdc++-v3/
* testsuite/20_util/to_address/debug.cc: Require thread-fence effective 
target.
* testsuite/21_strings/basic_string/cons/char/self_move.cc: Likewise.
* testsuite/21_strings/basic_string/debug/1_neg.cc: Likewise.
* testsuite/21_strings/basic_string/debug/2_neg.cc: Likewise.
* testsuite/21_strings/basic_string/debug/find1_neg.cc: Likewise.
* testsuite/21_strings/basic_string/debug/find2_neg.cc: Likewise.
* testsuite/21_strings/basic_string/hash/debug.cc: Likewise.
* testsuite/21_strings/basic_string/requirements/citerators.cc: 
Likewise.
* testsuite/21_strings/basic_string/requirements/exception/basic.cc: 
Likewise.
* 
testsuite/21_strings/basic_string/requirements/exception/generation_prohibited.cc:
Likewise.
* 
testsuite/21_strings/basic_string/requirements/exception/propagation_consistent.cc:
Likewise.
* testsuite/21_strings/debug/shrink_to_fit.cc: Likewise.
* testsuite/23_containers/array/debug/back1_neg.cc: Likewise.
* testsuite/23_containers/array/debug/back2_neg.cc: Likewise.
* testsuite/23_containers/array/debug/front1_neg.cc: Likewise.
* testsuite/23_containers/array/debug/front2_neg.cc: Likewise.
* testsuite/23_containers/array/debug/square_brackets_operator1_neg.cc: 
Likewise.
* testsuite/23_containers/array/debug/square_brackets_operator2_neg.cc: 
Likewise.
* testsuite/23_containers/deque/cons/self_move.cc: Likewise.
* testsuite/23_containers/deque/debug/98466.cc: Likewise.
* testsuite/23_containers/deque/debug/assign4_neg.cc: Likewise.
* testsuite/23_containers/deque/debug/construct4_neg.cc: Likewise.
* testsuite/23_containers/deque/debug/insert4_neg.cc: Likewise.
* testsuite/23_containers/deque/debug/invalidation/1.cc: Likewise.
* testsuite/23_containers/deque/debug/invalidation/2.cc: Likewise.
* testsuite/23_containers/deque/debug/invalidation/3.cc: Likewise.
* testsuite/23_containers/deque/debug/invalidation/4.cc: Likewise.
* testsuite/23_containers/forward_list/cons/self_move.cc: Likewise.
* testsuite/23_containers/forward_list/debug/construct4_neg.cc: 
Likewise.
* testsuite/23_containers/forward_list/debug/move_assign_neg.cc: 
Likewise.
* testsuite/23_containers/forward_list/debug/move_neg.cc: Likewise.
* testsuite/23_containers/list/cons/self_move.cc: Likewise.
* testsuite/23_containers/list/debug/assign4_neg.cc: Likewise.
* testsuite/23_containers/list/debug/construct4_neg.cc: Likewise.
* testsuite/23_containers/list/debug/insert4_neg.cc: Likewise.
* testsuite/23_containers/list/debug/invalidation/1.cc: Likewise.
* testsuite/23_containers/list/debug/invalidation/2.cc: Likewise.
* testsuite/23_containers/list/debug/invalidation/3.cc: Likewise.
* testsuite/23_containers/list/debug/invalidation/4.cc: Likewise.
* testsuite/23_containers/map/debug/construct4_neg.cc: Likewise.
* testsuite/23_containers/map/debug/construct5_neg.cc: Likewise.
* testsuite/23_containers/map/debug/insert4_neg.cc: Likewise.
* testsuite/23_containers/map/debug/invalidation/1.cc: Likewise.
* testsuite/23_containers/map/debug/invalidation/2.cc: Likewise.
* testsuite/23_containers/map/debug/move_assign_neg.cc: Likewise.
* testsuite/23_containers/map/debug/move_neg.cc: Likewise.
* testsuite/23_containers/map/modifiers/erase/end_neg.cc: Likewise.
* testsuite/23_containers/map/modifiers/insert/16813.cc: Likewise.
* testsuite/23_containers/multimap/debug/construct4_neg.cc: Likewise.
* testsuite/23_containers/multimap/debug/construct5_neg.cc: Likewise.
* testsuite/23_containers/multimap/debug/insert4_neg.cc: Likewise.
* testsuite/23_containers/multimap/debug/invalidation/1.cc: Likewise.
* testsuite/23_containers/multimap/debug/invalidation/2.cc: Likewise.
* testsuite/23_containers/multimap/debug/move_assign_neg.cc: Likewise.
* testsuite/23_containers/multimap/debug/move_neg.cc: Likewise.
* testsuite/23_containers/multiset/debug/construct4_neg.cc: Likewise.
* testsuite/23_containers/multiset/debug/construct5_neg.cc: Likewise.
* testsuite/23_containers/multiset/debug/insert4_neg.cc: Likewise.
* testsuite/23_containers/multiset/debug/invalidation/1.cc: Likewise.
* 

[PATCH 1/2] testsuite: Add and use thread_fence effective-target

2023-09-10 Thread Christophe Lyon via Gcc-patches
Some targets like arm-eabi with newlib and default settings rely on
__sync_synchronize() to ensure synchronization.  Newlib does not
implement it by default, to make users aware they have to take special
care.

This makes a few tests fail to link.

This patch adds a new thread_fence effective target (similar to the
corresponding one in libstdc++ testsuite), and uses it in the tests
that need it, making them UNSUPPORTED instead of FAIL and UNRESOLVED.

2023-09-10  Christophe Lyon  

gcc/
* doc/sourcebuild.texi (Other attributes): Document thread_fence
effective-target.

gcc/testsuite/
* g++.dg/init/array54.C: Require thread_fence.
* gcc.dg/c2x-nullptr-1.c: Likewise.
* gcc.dg/pr103721-2.c: Likewise.
* lib/target-supports.exp (check_effective_target_thread_fence):
New.
---
 gcc/doc/sourcebuild.texi  |  4 
 gcc/testsuite/g++.dg/init/array54.C   |  1 +
 gcc/testsuite/gcc.dg/c2x-nullptr-1.c  |  1 +
 gcc/testsuite/gcc.dg/pr103721-2.c |  1 +
 gcc/testsuite/lib/target-supports.exp | 12 
 5 files changed, 19 insertions(+)

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 1a78b3c1abb..a5f61c29f3b 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2860,6 +2860,10 @@ Compiler has been configured to support link-time 
optimization (LTO).
 Compiler and linker support link-time optimization relocatable linking
 with @option{-r} and @option{-flto} options.
 
+@item thread_fence
+Target implements @code{__atomic_thread_fence} without relying on
+non-implemented @code{__sync_synchronize()}.
+
 @item naked_functions
 Target supports the @code{naked} function attribute.
 
diff --git a/gcc/testsuite/g++.dg/init/array54.C 
b/gcc/testsuite/g++.dg/init/array54.C
index f6be350ba72..5241e451d6d 100644
--- a/gcc/testsuite/g++.dg/init/array54.C
+++ b/gcc/testsuite/g++.dg/init/array54.C
@@ -1,5 +1,6 @@
 // PR c++/90947
 // { dg-do run { target c++11 } }
+// { dg-require-effective-target thread_fence }
 
 #include 
 
diff --git a/gcc/testsuite/gcc.dg/c2x-nullptr-1.c 
b/gcc/testsuite/gcc.dg/c2x-nullptr-1.c
index 4e440234d52..97a31c27409 100644
--- a/gcc/testsuite/gcc.dg/c2x-nullptr-1.c
+++ b/gcc/testsuite/gcc.dg/c2x-nullptr-1.c
@@ -1,5 +1,6 @@
 /* Test valid usage of C23 nullptr.  */
 /* { dg-do run } */
+// { dg-require-effective-target thread_fence }
 /* { dg-options "-std=c2x -pedantic-errors -Wall -Wextra -Wno-unused-variable" 
} */
 
 #include 
diff --git a/gcc/testsuite/gcc.dg/pr103721-2.c 
b/gcc/testsuite/gcc.dg/pr103721-2.c
index aefa1f0f147..e059b1cfc2d 100644
--- a/gcc/testsuite/gcc.dg/pr103721-2.c
+++ b/gcc/testsuite/gcc.dg/pr103721-2.c
@@ -1,4 +1,5 @@
 // { dg-do run }
+// { dg-require-effective-target thread_fence }
 // { dg-options "-O2" }
 
 extern void abort ();
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index d353cc0aaf0..7ac9e7530cc 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9107,6 +9107,18 @@ proc check_effective_target_sync_char_short { } {
 || [check_effective_target_mips_llsc] }}]
 }
 
+# Return 1 if thread_fence does not rely on __sync_synchronize
+# library function
+
+proc check_effective_target_thread_fence {} {
+return [check_no_compiler_messages thread_fence executable {
+   int main () {
+   __atomic_thread_fence (__ATOMIC_SEQ_CST);
+   return 0;
+   }
+} ""]
+}
+
 # Return 1 if the target uses a ColdFire FPU.
 
 proc check_effective_target_coldfire_fpu { } {
-- 
2.34.1



[PATCH v2] swap: Fix incorrect lane extraction by vec_extract() [PR106770]

2023-09-10 Thread Surya Kumari Jangala via Gcc-patches
swap: Fix incorrect lane extraction by vec_extract() [PR106770]

In the routine rs6000_analyze_swaps(), special handling of swappable
instructions is done even if the webs that contain the swappable
instructions are not optimized, i.e., the webs do not contain any
permuting load/store instructions along with the associated register
swap instructions. Doing special handling in such webs will result in
the extracted lane being adjusted unnecessarily for vec_extract.

Another issue is that existing code treats non-permuting loads/stores
as special swappables. Non-permuting loads/stores (that have not yet
been split into a permuting load/store and a swap) are handled by
converting them into a permuting load/store (which effectively removes
the swap). As a result, if special swappables are handled only in webs
containing permuting loads/stores, then non-optimal code is generated
for non-permuting loads/stores.

Hence, in this patch, all webs containing either permuting loads/
stores or non-permuting loads/stores are marked as requiring special
handling of swappables. Swaps associated with permuting loads/stores
are marked for removal, and non-permuting loads/stores are converted to
permuting loads/stores. Then the special swappables in the webs are
fixed up.

Another issue with always handling swappable instructions is that it is
incorrect to do so in webs where loads/stores on quad word aligned
addresses are changed to lvx/stvx. Similarly, in webs where
swap(load(vector constant)) instructions are replaced with
load(swapped vector constant), the swappable instructions should not be
modified.

2023-09-10  Surya Kumari Jangala  

gcc/
PR rtl-optimization/PR106770
* config/rs6000/rs6000-p8swap.cc (non_permuting_mem_insn): New
function.
(handle_non_permuting_mem_insn): New function.
(rs6000_analyze_swaps): Handle swappable instructions only in
certain webs.
(web_requires_special_handling): New instance variable.
(handle_special_swappables): Remove handling of non-permuting
load/store instructions.

gcc/testsuite/
PR rtl-optimization/PR106770
* gcc.target/powerpc/pr106770.c: New test.
---

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
b/gcc/config/rs6000/rs6000-p8swap.cc
index 0388b9bd736..3a695aa1318 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -179,6 +179,13 @@ class swap_web_entry : public web_entry_base
   unsigned int special_handling : 4;
   /* Set if the web represented by this entry cannot be optimized.  */
   unsigned int web_not_optimizable : 1;
+  /* Set if the swappable insns in the web represented by this entry
+ have to be fixed. Swappable insns have to be fixed in :
+   - webs containing permuting loads/stores and the swap insns
+in such webs have been marked for removal
+   - webs where non-permuting loads/stores have been converted
+to permuting loads/stores  */
+  unsigned int web_requires_special_handling : 1;
   /* Set if this insn should be deleted.  */
   unsigned int will_delete : 1;
 };
@@ -1468,14 +1475,6 @@ handle_special_swappables (swap_web_entry *insn_entry, 
unsigned i)
   if (dump_file)
fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
   break;
-case SH_NOSWAP_LD:
-  /* Convert a non-permuting load to a permuting one.  */
-  permute_load (insn);
-  break;
-case SH_NOSWAP_ST:
-  /* Convert a non-permuting store to a permuting one.  */
-  permute_store (insn);
-  break;
 case SH_EXTRACT:
   /* Change the lane on an extract operation.  */
   adjust_extract (insn);
@@ -2401,6 +2400,25 @@ recombine_lvx_stvx_patterns (function *fun)
   free (to_delete);
 }
 
+/* Return true if insn is a non-permuting load/store.  */
+static bool
+non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i)
+{
+  return (insn_entry[i].special_handling == SH_NOSWAP_LD ||
+ insn_entry[i].special_handling == SH_NOSWAP_ST);
+}
+
+/* Convert a non-permuting load/store insn to a permuting one.  */
+static void
+handle_non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i)
+{
+  rtx_insn *insn = insn_entry[i].insn;
+  if (insn_entry[i].special_handling == SH_NOSWAP_LD)
+permute_load (insn);
+  else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
+permute_store (insn);
+}
+
 /* Main entry point for this pass.  */
 unsigned int
 rs6000_analyze_swaps (function *fun)
@@ -2624,25 +2642,56 @@ rs6000_analyze_swaps (function *fun)
   dump_swap_insn_table (insn_entry);
 }
 
-  /* For each load and store in an optimizable web (which implies
- the loads and stores are permuting), find the associated
- register swaps and mark them for removal.  Due to various
- optimizations we may mark the same swap more than once.  Also
- perform special handling for swappable insns that require it.  */
+  /* There are two kinds of optimizations 

PING^4: [PATCH] rtl-optimization/110939 Really fix narrow comparison of memory and constant

2023-09-10 Thread Xi Ruoyao via Gcc-patches
Ping.

> > > On Thu, Aug 10, 2023 at 03:04:03PM +0200, Stefan Schulze Frielinghaus 
> > > wrote:
> > > > In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> > > > completely missed the fact that the normal form of a generated constant 
> > > > for a
> > > > mode with fewer bits than in HOST_WIDE_INT is a sign extended version 
> > > > of the
> > > > actual constant.  This even holds true for unsigned constants.
> > > > 
> > > > Fixed by masking out the upper bits for the incoming constant and sign
> > > > extending the resulting unsigned constant.
> > > > 
> > > > Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> > > > 
> > > > While reading existing optimizations in combine I stumbled across two
> > > > optimizations where either my intuition about the representation of
> > > > unsigned integers via a const_int rtx is wrong, which then in turn would
> > > > probably also mean that this patch is wrong, or that the optimizations
> > > > are missed sometimes.  In other words in the following I would assume
> > > > that the upper bits are masked out:
> > > > 
> > > > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > > > index 468b7fde911..80c4ff0fbaf 100644
> > > > --- a/gcc/combine.cc
> > > > +++ b/gcc/combine.cc
> > > > @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, 
> > > > machine_mode mode,
> > > >    /* (unsigned) < 0x8000 is equivalent to >= 0.  */
> > > >    else if (is_a  (mode, _mode)
> > > >    && GET_MODE_PRECISION (int_mode) - 1 < 
> > > > HOST_BITS_PER_WIDE_INT
> > > > -  && ((unsigned HOST_WIDE_INT) const_op
> > > > +  && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK 
> > > > (int_mode))
> > > >    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION 
> > > > (int_mode) - 1)))
> > > >     {
> > > >   const_op = 0;
> > > > @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, 
> > > > machine_mode mode,
> > > >    /* (unsigned) >= 0x8000 is equivalent to < 0.  */
> > > >    else if (is_a  (mode, _mode)
> > > >    && GET_MODE_PRECISION (int_mode) - 1 < 
> > > > HOST_BITS_PER_WIDE_INT
> > > > -  && ((unsigned HOST_WIDE_INT) const_op
> > > > +  && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK 
> > > > (int_mode))
> > > >    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION 
> > > > (int_mode) - 1)))
> > > >     {
> > > >   const_op = 0;
> > > > 
> > > > For example, while bootstrapping on x64 the optimization is missed since
> > > > a LTU comparison in QImode is done and the constant equals
> > > > 0xff80.
> > > > 
> > > > Sorry for inlining another patch, but I would really like to make sure
> > > > that my understanding is correct, now, before I come up with another
> > > > patch.  Thus it would be great if someone could shed some light on this.
> > > > 
> > > > gcc/ChangeLog:
> > > > 
> > > > * combine.cc (simplify_compare_const): Properly handle unsigned
> > > > constants while narrowing comparison of memory and constants.
> > > > ---
> > > >  gcc/combine.cc | 19 ++-
> > > >  1 file changed, 10 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > > > index e46d202d0a7..468b7fde911 100644
> > > > --- a/gcc/combine.cc
> > > > +++ b/gcc/combine.cc
> > > > @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, 
> > > > machine_mode mode,
> > > >    && !MEM_VOLATILE_P (op0)
> > > >    /* The optimization makes only sense for constants which are big 
> > > > enough
> > > >  so that we have a chance to chop off something at all.  */
> > > > -  && (unsigned HOST_WIDE_INT) const_op > 0xff
> > > > -  /* Bail out, if the constant does not fit into INT_MODE.  */
> > > > -  && (unsigned HOST_WIDE_INT) const_op
> > > > -    < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 
> > > > 1) - 1)
> > > > +  && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK 
> > > > (int_mode)) > 0xff
> > > >    /* Ensure that we do not overflow during normalization.  */
> > > > -  && (code != GTU || (unsigned HOST_WIDE_INT) const_op < 
> > > > HOST_WIDE_INT_M1U))
> > > > +  && (code != GTU
> > > > + || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK 
> > > > (int_mode))
> > > > +    < HOST_WIDE_INT_M1U)
> > > > +  && trunc_int_for_mode (const_op, int_mode) == const_op)
> > > >  {
> > > > -  unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> > > > +  unsigned HOST_WIDE_INT n
> > > > +   = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
> > > >    enum rtx_code adjusted_code;
> > > >  
> > > >    /* Normalize code to either LEU or GEU.  */
> > > > @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, 
> > > > machine_mode mode,
> > > > 

Re: [PATCH] Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

2023-09-10 Thread Jeff Law via Gcc-patches




On 9/8/23 06:39, Andrew Pinski via Gcc-patches wrote:

The problem here is after r6-7425-ga9fee7cdc3c62d0e51730,
the comparison to see if the transformation could be done was using the
wrong value. Instead of see if the inner was LE (for MIN and GE for MAX)
the outer value, it was comparing the inner to the value used in the comparison
which was wrong.
The match pattern copied the same logic mistake when they were added in
r14-1411-g17cca3c43e2f49 .

OK? Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111331
* match.pd (`(a CMP CST1) ? max : a`):
Fix the LE/GE comparison to the correct value.
* tree-ssa-phiopt.cc (minmax_replacement):
Fix the LE/GE comparison for the
`(a CMP CST1) ? max : a` optimization.

gcc/testsuite/ChangeLog:

PR tree-optimization/111331
* gcc.c-torture/execute/pr111331-1.c: New test.
* gcc.c-torture/execute/pr111331-2.c: New test.
* gcc.c-torture/execute/pr111331-3.c: New test.

OK
jeff


Re: [PATCH] RISC-V Add Types to Un-Typed Thead Instructions:

2023-09-10 Thread Jeff Law via Gcc-patches




On 8/31/23 11:36, Edwin Lu wrote:

Related Discussion:
https://inbox.sourceware.org/gcc-patches/12fb5088-3f28-0a69-de1e-f387371a5...@gmail.com/

This patch updates the THEAD instructions to ensure that no insn is left
without a type attribute.

Tested for regressions using rv32/64 multilib for linux/newlib.

gcc/Changelog:

* config/riscv/thead.md: Update types
OK.  THe first could arguably be "multi", but both instructions it 
generates appear to be move/conversions, so "fmove" is reasonable as well.


Ok for the trunk.  And I think that's should allow us to turn on the 
assertion, right?


jeff


Re: [PATCH] [11/12/13/14 Regression] ABI break in _Hash_node_value_base since GCC 11 [PR 111050]

2023-09-10 Thread Sam James via Gcc-patches


François Dumont via Gcc-patches  writes:

> Following confirmation of the fix by TC here is the patch where I'm
> simply adding a 'constexpr' on _M_next().
>
> Please let me know this ChangeLog entry is correct. I would prefer
> this patch to be assigned to 'TC' with me as co-author but I don't
> know how to do such a thing. Unless I need to change my user git
> identity to do so ?

git commit --author="TC " --amend

>
>     libstdc++: Add constexpr qualification to _Hash_node::_M_next()
>
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b6f0476837205932613ddb2b3429a55c26c409d
>     changed _Hash_node_value_base to no longer derive from
> _Hash_node_base, which means
>     that its member functions expect _M_storage to be at a different
> offset. So explosions
>     result if an out-of-line definition is emitted for any of the
> member functions (say,
>     in a non-optimized build) and the resulting object file is then
> linked with code built
>     using older version of GCC/libstdc++.
>
>     libstdc++-v3/ChangeLog:
>
>     * include/bits/hashtable_policy.h
>     (_Hash_node_value_base<>::_M_valptr(),
> _Hash_node_value_base<>::_M_v())
>     Add [[__gnu__::__always_inline__]].
>     (_Hash_node<>::_M_next()): Add constexpr.
>
>     Co-authored-by: TC 
>
> Ok to commit and backport to GCC 11, 12, 13 branches ?
>
> François
>
> [2. text/x-patch; pr111050.patch]...



Re: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm

2023-09-10 Thread Jeff Law via Gcc-patches




On 9/10/23 08:07, Juzhe-Zhong wrote:

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid 
unnecessary slideup.

OK
jeff


Re: Re: [PATCH] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm

2023-09-10 Thread 钟居哲
Address comment: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress 
pattern of vec_perm (gnu.org)



juzhe.zh...@rivai.ai
 
From: Jeff Law
Date: 2023-09-10 21:34
To: Juzhe-Zhong; gcc-patches
CC: kito.cheng; kito.cheng; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Avoid unnecessary slideup in compress pattern of 
vec_perm
 
 
On 9/9/23 21:55, Juzhe-Zhong wrote:
> If a const vector all elements are same, the slide up is unnecessary.
> 
> gcc/ChangeLog:
> 
> * config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid unnecessary 
> slideup.
> 
> ---
>   gcc/config/riscv/riscv-v.cc | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index bee60de1d26..7ef884907b8 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2697,7 +2697,7 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d)
> rtx mask = force_reg (mask_mode, builder.build ());
>   
> rtx merge = d->op1;
> -  if (need_slideup_p)
> +  if (need_slideup_p && !const_vec_duplicate_p (d->op1))
>   {
> int slideup_cnt = vlen - (d->perm[vlen - 1].to_constant () % vlen) - 
> 1;
> rtx ops[] = {d->target, d->op1, gen_int_mode (slideup_cnt, Pmode)};
Would it be better to adjust how we compute need_slidup_p to check 
!const_vec_duplicate_p (d->op1) instead of doing it here?
 
That way the name "need_slideup_p" stays consistent with the intent of 
the code.  It would also mean we wouldn't need to duplicate the 
additional check if we wanted to model the use of slideup in the cost 
calculations.
 
Jeff
 


[pushed] Darwin: Partial reversion of r14-3648 (Inits Section).

2023-09-10 Thread Iain Sandoe via Gcc-patches
Tested on x86_64-darwin21 and i686-darwin9 with both dwarfutils and
llvm-based dsymutil implementations.  Pushed to trunk, thanks
Iain

--- 8< ---

Although the Darwin ABI places both hot and cold partitions in the same
section (the linker can partition by name), this does not work with the
current dwarf2out implementation.

Since we do see global initialization code getting hot/cold splits, this
patch places the cold parts into text_cold, and keeps the hot part in
the correct Init section per ABI.

TODO: figure out a way to allow us to match the ABI fully.

gcc/ChangeLog:

* config/darwin.cc (darwin_function_section): Place unlikely
executed global init code into the standard cold section.

Signed-off-by: Iain Sandoe 
---
 gcc/config/darwin.cc | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc
index 95d6194cf22..154a2b2755a 100644
--- a/gcc/config/darwin.cc
+++ b/gcc/config/darwin.cc
@@ -3893,19 +3893,22 @@ darwin_function_section (tree decl, enum node_frequency 
freq,
   if (decl && DECL_SECTION_NAME (decl) != NULL)
 return get_named_section (decl, NULL, 0);
 
+  /* We always put unlikely executed stuff in the cold section; we have to put
+ this ahead of the global init section, since partitioning within a section
+ breaks some assumptions made in the DWARF handling.  */
+  if (freq == NODE_FREQUENCY_UNLIKELY_EXECUTED)
+return (use_coal) ? darwin_sections[text_cold_coal_section]
+ : darwin_sections[text_cold_section];
+
   /* Intercept functions in global init; these are placed in separate sections.
- FIXME: there should be some neater way to do this.  */
+ FIXME: there should be some neater way to do this, FIXME we should be able
+ to partition within a section.  */
   if (DECL_NAME (decl)
   && (startswith (IDENTIFIER_POINTER (DECL_NAME (decl)), "_GLOBAL__sub_I")
  || startswith (IDENTIFIER_POINTER (DECL_NAME (decl)),
 "__static_initialization_and_destruction")))
 return  darwin_sections[static_init_section];
 
-  /* We always put unlikely executed stuff in the cold section.  */
-  if (freq == NODE_FREQUENCY_UNLIKELY_EXECUTED)
-return (use_coal) ? darwin_sections[text_cold_coal_section]
- : darwin_sections[text_cold_section];
-
   /* If we have LTO *and* feedback information, then let LTO handle
  the function ordering, it makes a better job (for normal, hot,
  startup and exit - hence the bailout for cold above).  */
-- 
2.39.2 (Apple Git-143)



Re: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm

2023-09-10 Thread 钟居哲
Address comment: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress 
pattern of vec_perm (gnu.org)



juzhe.zh...@rivai.ai
 
From: Juzhe-Zhong
Date: 2023-09-10 22:07
To: gcc-patches
CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of 
vec_perm
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid unnecessary 
slideup.
 
---
gcc/config/riscv/riscv-v.cc | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index bee60de1d26..3cd1f61de0e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2647,7 +2647,8 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d)
For index = { 0, 2, 5, 6}, we need to slide op1 up before
we apply compress approach.  */
-  bool need_slideup_p = maybe_ne (d->perm[vlen - 1], 2 * vec_len - 1);
+  bool need_slideup_p = maybe_ne (d->perm[vlen - 1], 2 * vec_len - 1)
+ && !const_vec_duplicate_p (d->op1);
   /* If we leave it directly be handled by general gather,
  the code sequence will be:
-- 
2.36.3
 


[PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm

2023-09-10 Thread Juzhe-Zhong
gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid 
unnecessary slideup.

---
 gcc/config/riscv/riscv-v.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index bee60de1d26..3cd1f61de0e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2647,7 +2647,8 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d)
 
For index = { 0, 2, 5, 6}, we need to slide op1 up before
we apply compress approach.  */
-  bool need_slideup_p = maybe_ne (d->perm[vlen - 1], 2 * vec_len - 1);
+  bool need_slideup_p = maybe_ne (d->perm[vlen - 1], 2 * vec_len - 1)
+   && !const_vec_duplicate_p (d->op1);
 
   /* If we leave it directly be handled by general gather,
  the code sequence will be:
-- 
2.36.3



[PATCH] [11/12/13/14 Regression] ABI break in _Hash_node_value_base since GCC 11 [PR 111050]

2023-09-10 Thread François Dumont via Gcc-patches
Following confirmation of the fix by TC here is the patch where I'm 
simply adding a 'constexpr' on _M_next().


Please let me know this ChangeLog entry is correct. I would prefer this 
patch to be assigned to 'TC' with me as co-author but I don't know how 
to do such a thing. Unless I need to change my user git identity to do so ?


    libstdc++: Add constexpr qualification to _Hash_node::_M_next()

https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b6f0476837205932613ddb2b3429a55c26c409d
    changed _Hash_node_value_base to no longer derive from 
_Hash_node_base, which means
    that its member functions expect _M_storage to be at a different 
offset. So explosions
    result if an out-of-line definition is emitted for any of the 
member functions (say,
    in a non-optimized build) and the resulting object file is then 
linked with code built

    using older version of GCC/libstdc++.

    libstdc++-v3/ChangeLog:

    * include/bits/hashtable_policy.h
    (_Hash_node_value_base<>::_M_valptr(), 
_Hash_node_value_base<>::_M_v())

    Add [[__gnu__::__always_inline__]].
    (_Hash_node<>::_M_next()): Add constexpr.

    Co-authored-by: TC 

Ok to commit and backport to GCC 11, 12, 13 branches ?

François

diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index 347d468ea86..101c5eb639c 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -327,18 +327,22 @@ namespace __detail
 
   __gnu_cxx::__aligned_buffer<_Value> _M_storage;
 
+  [[__gnu__::__always_inline__]]
   _Value*
   _M_valptr() noexcept
   { return _M_storage._M_ptr(); }
 
+  [[__gnu__::__always_inline__]]
   const _Value*
   _M_valptr() const noexcept
   { return _M_storage._M_ptr(); }
 
+  [[__gnu__::__always_inline__]]
   _Value&
   _M_v() noexcept
   { return *_M_valptr(); }
 
+  [[__gnu__::__always_inline__]]
   const _Value&
   _M_v() const noexcept
   { return *_M_valptr(); }
@@ -372,7 +376,7 @@ namespace __detail
 : _Hash_node_base
 , _Hash_node_value<_Value, _Cache_hash_code>
 {
-  _Hash_node*
+  constexpr _Hash_node*
   _M_next() const noexcept
   { return static_cast<_Hash_node*>(this->_M_nxt); }
 };


Re: [PATCH] RISC-V: Expand fixed-vlmax/vls vector permutation in targethook

2023-09-10 Thread Jeff Law via Gcc-patches




On 9/9/23 20:33, Juzhe-Zhong wrote:

When debugging FAIL: gcc.dg/pr92301.c execution test.
Realize a vls vector permutation situation failed to vectorize since early 
return false:

-  /* For constant size indices, we dont't need to handle it here.
- Just leave it to vec_perm.  */
-  if (d->perm.length ().is_constant ())
-return false;

To avoid more potential failed vectorization case. Now expand it in targethook.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_generic_patterns): Expand 
fixed-vlmax/vls vector permutation.

OK.
jeff


Re: [PATCH] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm

2023-09-10 Thread Jeff Law via Gcc-patches




On 9/9/23 21:55, Juzhe-Zhong wrote:

If a const vector all elements are same, the slide up is unnecessary.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid 
unnecessary slideup.

---
  gcc/config/riscv/riscv-v.cc | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index bee60de1d26..7ef884907b8 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2697,7 +2697,7 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d)
rtx mask = force_reg (mask_mode, builder.build ());
  
rtx merge = d->op1;

-  if (need_slideup_p)
+  if (need_slideup_p && !const_vec_duplicate_p (d->op1))
  {
int slideup_cnt = vlen - (d->perm[vlen - 1].to_constant () % vlen) - 1;
rtx ops[] = {d->target, d->op1, gen_int_mode (slideup_cnt, Pmode)};
Would it be better to adjust how we compute need_slidup_p to check 
!const_vec_duplicate_p (d->op1) instead of doing it here?


That way the name "need_slideup_p" stays consistent with the intent of 
the code.  It would also mean we wouldn't need to duplicate the 
additional check if we wanted to model the use of slideup in the cost 
calculations.


Jeff


[C PATCH 1/6 v2] c: reorganize recursive type checking

2023-09-10 Thread Martin Uecker via Gcc-patches


Thanks Joseph, below is a a revised version of this patch
with slight additional changes to the comment of
tagged_types_tu_compatible_p.

ok for trunk? 

Martin

Am Mittwoch, dem 06.09.2023 um 20:59 + schrieb Joseph Myers:
> On Sat, 26 Aug 2023, Martin Uecker via Gcc-patches wrote:
> 
> > -static int
> > +static bool
> >  comp_target_types (location_t location, tree ttl, tree ttr)
> 
> The comment above this function should be updated to refer to returning 
> true, not to returning 1.  And other comments on common_pointer_type and 
> inside that function should be updated to refer to comp_target_types 
> returning true, not nonzero.
> 
> > @@ -1395,17 +1382,13 @@ free_all_tagged_tu_seen_up_to (const struct 
> > tagged_tu_seen_cache *tu_til)
> >  
> >  /* Return 1 if two 'struct', 'union', or 'enum' types T1 and T2 are
> > compatible.  If the two types are not the same (which has been
> > -   checked earlier), this can only happen when multiple translation
> > -   units are being compiled.  See C99 6.2.7 paragraph 1 for the exact
> > -   rules.  ENUM_AND_INT_P and DIFFERENT_TYPES_P are as in
> > -   comptypes_internal.  */
> > +   checked earlier).  */
> >  
> > -static int
> > +static bool
> >  tagged_types_tu_compatible_p (const_tree t1, const_tree t2,
> > - bool *enum_and_int_p, bool *different_types_p)
> > + struct comptypes_data* data)
> 
> Similarly, this comment should be updated for the new return type.  Also 
> the GNU style is "struct comptypes_data *data" with space before not after 
> '*'.
> 
> > @@ -1631,9 +1603,9 @@ tagged_types_tu_compatible_p (const_tree t1, 
> > const_tree t2,
> > Otherwise, the argument types must match.
> > ENUM_AND_INT_P and DIFFERENT_TYPES_P are as in comptypes_internal.  */
> >  
> > -static int
> > +static bool
> >  function_types_compatible_p (const_tree f1, const_tree f2,
> > -bool *enum_and_int_p, bool *different_types_p)
> > +struct comptypes_data *data)
> 
> Another comment to update for a changed return type.
> 
> >  /* Check two lists of types for compatibility, returning 0 for
> > -   incompatible, 1 for compatible, or 2 for compatible with
> > -   warning.  ENUM_AND_INT_P and DIFFERENT_TYPES_P are as in
> > -   comptypes_internal.  */
> > +   incompatible, 1 for compatible.  ENUM_AND_INT_P and
> > +   DIFFERENT_TYPES_P are as in comptypes_internal.  */
> >  
> > -static int
> > +static bool
> >  type_lists_compatible_p (const_tree args1, const_tree args2,
> > -bool *enum_and_int_p, bool *different_types_p)
> > +struct comptypes_data *data)
> 
> This one also needs updating to remove references to parameters that no 
> longer exist.
> 

c: reorganize recursive type checking

Reorganize recursive type checking to use a structure to
store information collected during the recursion and
returned to the caller (warning_needed, enum_and_init_p,
different_types_p).

gcc/c:
* c-typeck.cc (struct comptypes_data): Add structure.
(tagged_types_tu_compatible_p,
function_types_compatible_p, type_lists_compatible_p,
comptypes_internal): Add structure to interface, change
return type to bool, and adapt calls.
(comptarget_types): Change return type too bool.
(comptypes, comptypes_check_enum_int,
comptypes_check_different_types): Adapt calls.
---
 gcc/c/c-typeck.cc | 282 --
 1 file changed, 121 insertions(+), 161 deletions(-)

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index e2bfd2caf85..e55e887da14 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -90,12 +90,14 @@ static bool require_constant_elements;
 static bool require_constexpr_value;
 
 static tree qualify_type (tree, tree);
-static int tagged_types_tu_compatible_p (const_tree, const_tree, bool *,
-bool *);
-static int comp_target_types (location_t, tree, tree);
-static int function_types_compatible_p (const_tree, const_tree, bool *,
-   bool *);
-static int type_lists_compatible_p (const_tree, const_tree, bool *, bool *);
+struct comptypes_data;
+static bool tagged_types_tu_compatible_p (const_tree, const_tree,
+ struct comptypes_data *);
+static bool comp_target_types (location_t, tree, tree);
+static bool function_types_compatible_p (const_tree, const_tree,
+struct comptypes_data *);
+static bool type_lists_compatible_p (const_tree, const_tree,
+struct comptypes_data *);
 static tree lookup_field (tree, tree);
 static int convert_arguments (location_t, vec, tree,
  vec *, vec *, tree,
@@ -125,7 +127,8 @@ static tree find_init_member (tree, struct obstack *);
 static void readonly_warning (tree, enum lvalue_use);
 static int