[gcc r15-4284] testsuite/i386: Add vector sat_sub testcases [PR112600]

2024-10-12 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:a564261245ad3002d53916e017b85939ace816a6

commit r15-4284-ga564261245ad3002d53916e017b85939ace816a6
Author: Uros Bizjak 
Date:   Sat Oct 12 10:04:03 2024 +0200

testsuite/i386: Add vector sat_sub testcases [PR112600]

PR middle-end/112600

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112600-4a.c: New test.
* gcc.target/i386/pr112600-4b.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr112600-4a.c | 25 +
 gcc/testsuite/gcc.target/i386/pr112600-4b.c | 25 +
 2 files changed, 50 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr112600-4a.c 
b/gcc/testsuite/gcc.target/i386/pr112600-4a.c
new file mode 100644
index ..89c5ea611e04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-4a.c
@@ -0,0 +1,25 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define MIN -128
+#define MAX 127
+
+typedef char T;
+typedef unsigned char UT;
+
+void foo (T *out, T *op_1, T *op_2, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+{
+  T x = op_1[i];
+  T y = op_2[i];
+  T dif = (UT) x - (UT) y;
+
+  out[i] = (x ^ y) >= 0 ? dif : (dif ^ x) >= 0 ? dif : x < 0 ? MIN : MAX;
+}
+}
+
+/* { dg-final { scan-assembler "psubsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-4b.c 
b/gcc/testsuite/gcc.target/i386/pr112600-4b.c
new file mode 100644
index ..08644846e0d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-4b.c
@@ -0,0 +1,25 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define MIN -32768
+#define MAX 32767
+
+typedef short T;
+typedef unsigned short UT;
+
+void foo (T *out, T *op_1, T *op_2, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+{
+  T x = op_1[i];
+  T y = op_2[i];
+  T dif = (UT) x - (UT) y;
+
+  out[i] = (x ^ y) >= 0 ? dif : (dif ^ x) >= 0 ? dif : x < 0 ? MIN : MAX;
+}
+}
+
+/* { dg-final { scan-assembler "psubsw" } } */


[gcc r14-10723] i386: Modernize AMD processor types

2024-09-29 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:4697543b765dbfaa9dc12be0537861e586e48202

commit r14-10723-g4697543b765dbfaa9dc12be0537861e586e48202
Author: Uros Bizjak 
Date:   Fri Sep 27 15:58:17 2024 +0200

i386: Modernize AMD processor types

Use iterative PTA definitions for members of the same AMD processor family.

Also, fix a couple of related M_CPU_TYPE/M_CPU_SUBTYPE inconsistencies.

No functional changes intended.

gcc/ChangeLog:

* config/i386/i386.h: Add PTA_BDVER1, PTA_BDVER2, PTA_BDVER3,
PTA_BDVER4, PTA_BTVER1 and PTA_BTVER2.
* common/config/i386/i386-common.cc (processor_alias_table)
<"bdver1">: Use PTA_BDVER1.
<"bdver2">: Use PTA_BDVER2.
<"bdver3">: Use PTA_BDVER3.
<"bdver4">: Use PTA_BDVER4.
<"btver1">: Use PTA_BTVER1.  Use M_CPU_TYPE (AMD_BTVER1).
<"btver2">: Use PTA_BTVER2.
<"shanghai>: Use M_CPU_SUBTYPE (AMDFAM10H_SHANGHAI).
<"istanbul>: Use M_CPU_SUBTYPE (AMDFAM10H_ISTANBUL).

(cherry picked from commit a72108920805a024b6bbee5acdd32914382c47a1)

Diff:
---
 gcc/common/config/i386/i386-common.cc | 46 +--
 gcc/config/i386/i386.h| 32 +++-
 2 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index d578918dfb79..e535a3d516db 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -2395,34 +2395,16 @@ const pta processor_alias_table[] =
   | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR,
 M_CPU_SUBTYPE (AMDFAM10H_BARCELONA), P_PROC_DYNAMIC},
   {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-  | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE,
-M_CPU_TYPE (AMDFAM15H_BDVER1), P_PROC_XOP},
+PTA_BDVER1,
+M_CPU_SUBTYPE (AMDFAM15H_BDVER1), P_PROC_XOP},
   {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-  | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
-  | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE,
-M_CPU_TYPE (AMDFAM15H_BDVER2), P_PROC_FMA},
+PTA_BDVER2,
+M_CPU_SUBTYPE (AMDFAM15H_BDVER2), P_PROC_FMA},
   {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-  | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
-  | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
-  | PTA_XSAVEOPT | PTA_FSGSBASE,
+PTA_BDVER3,
 M_CPU_SUBTYPE (AMDFAM15H_BDVER3), P_PROC_FMA},
   {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
-  | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
-  | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
-  | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
-  | PTA_MOVBE | PTA_MWAITX,
+PTA_BDVER4,
 M_CPU_SUBTYPE (AMDFAM15H_BDVER4), P_PROC_AVX2},
   {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
 PTA_ZNVER1,
@@ -2440,16 +2422,10 @@ const pta processor_alias_table[] =
 PTA_ZNVER5,
 M_CPU_SUBTYPE (AMDFAM1AH_ZNVER5), P_PROC_AVX512F},
   {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_PRFCHW
-  | PTA_FXSR | PTA_XSAVE,
-   M_CPU_SUBTYPE (AMDFAM15H_BDVER1), P_PROC_SSE4_A},
+PTA_BTVER1,
+M_CPU_TYPE (AMD_BTVER1), P_PROC_SSE4_A},
   {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
-  | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
-  | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT,
+PTA_BTVER2,
 M_CPU_TYPE (AMD_BTVER2), P_PROC_BMI},
 
   {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
@@ -2468,9 +2444,9 @@ const pta processor_alias_table[] =
   {"amdfam19h", PROCESSOR_GENERIC, CPU_GENERIC, 0,
 M_CPU_TYPE (AMDFAM19H), P_NONE},
   {"shanghai", PROCESSOR_GENERIC, CPU_GENERIC, 0,
-M_CPU_TYPE (AMDFAM10H_SHANGHAI), P_NONE},
+M_CPU_SUBTYPE (AMDFAM10H_SHANGHAI), P_NONE},
   {"istanbul", PROCESSOR_GENERIC, CPU_GENERIC, 0,
-M_CPU_TYPE (AMDFAM10H_ISTANBUL), P_NONE},
+M_CPU_SUBTYPE (AMDFAM10H_ISTANBUL), P_NONE},
 };
 
 /* NB: process

[gcc r15-3927] i386: Modernize AMD processor types

2024-09-27 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:a72108920805a024b6bbee5acdd32914382c47a1

commit r15-3927-ga72108920805a024b6bbee5acdd32914382c47a1
Author: Uros Bizjak 
Date:   Fri Sep 27 15:58:17 2024 +0200

i386: Modernize AMD processor types

Use iterative PTA definitions for members of the same AMD processor family.

Also, fix a couple of related M_CPU_TYPE/M_CPU_SUBTYPE inconsistencies.

No functional changes intended.

gcc/ChangeLog:

* config/i386/i386.h: Add PTA_BDVER1, PTA_BDVER2, PTA_BDVER3,
PTA_BDVER4, PTA_BTVER1 and PTA_BTVER2.
* common/config/i386/i386-common.cc (processor_alias_table)
<"bdver1">: Use PTA_BDVER1.
<"bdver2">: Use PTA_BDVER2.
<"bdver3">: Use PTA_BDVER3.
<"bdver4">: Use PTA_BDVER4.
<"btver1">: Use PTA_BTVER1.  Use M_CPU_TYPE (AMD_BTVER1).
<"btver2">: Use PTA_BTVER2.
<"shanghai>: Use M_CPU_SUBTYPE (AMDFAM10H_SHANGHAI).
<"istanbul>: Use M_CPU_SUBTYPE (AMDFAM10H_ISTANBUL).

Diff:
---
 gcc/common/config/i386/i386-common.cc | 46 +--
 gcc/config/i386/i386.h| 32 +++-
 2 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index fb744319b05e..3f2fc599009a 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -2348,34 +2348,16 @@ const pta processor_alias_table[] =
   | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR,
 M_CPU_SUBTYPE (AMDFAM10H_BARCELONA), P_PROC_DYNAMIC},
   {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-  | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE,
-M_CPU_TYPE (AMDFAM15H_BDVER1), P_PROC_XOP},
+PTA_BDVER1,
+M_CPU_SUBTYPE (AMDFAM15H_BDVER1), P_PROC_XOP},
   {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-  | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
-  | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE,
-M_CPU_TYPE (AMDFAM15H_BDVER2), P_PROC_FMA},
+PTA_BDVER2,
+M_CPU_SUBTYPE (AMDFAM15H_BDVER2), P_PROC_FMA},
   {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-  | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
-  | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
-  | PTA_XSAVEOPT | PTA_FSGSBASE,
+PTA_BDVER3,
 M_CPU_SUBTYPE (AMDFAM15H_BDVER3), P_PROC_FMA},
   {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
-  | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
-  | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
-  | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
-  | PTA_MOVBE | PTA_MWAITX,
+PTA_BDVER4,
 M_CPU_SUBTYPE (AMDFAM15H_BDVER4), P_PROC_AVX2},
   {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
 PTA_ZNVER1,
@@ -2393,16 +2375,10 @@ const pta processor_alias_table[] =
 PTA_ZNVER5,
 M_CPU_SUBTYPE (AMDFAM1AH_ZNVER5), P_PROC_AVX512F},
   {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_PRFCHW
-  | PTA_FXSR | PTA_XSAVE,
-   M_CPU_SUBTYPE (AMDFAM15H_BDVER1), P_PROC_SSE4_A},
+PTA_BTVER1,
+M_CPU_TYPE (AMD_BTVER1), P_PROC_SSE4_A},
   {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
-PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-  | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
-  | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
-  | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT,
+PTA_BTVER2,
 M_CPU_TYPE (AMD_BTVER2), P_PROC_BMI},
 
   {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
@@ -2421,9 +2397,9 @@ const pta processor_alias_table[] =
   {"amdfam19h", PROCESSOR_GENERIC, CPU_GENERIC, 0,
 M_CPU_TYPE (AMDFAM19H), P_NONE},
   {"shanghai", PROCESSOR_GENERIC, CPU_GENERIC, 0,
-M_CPU_TYPE (AMDFAM10H_SHANGHAI), P_NONE},
+M_CPU_SUBTYPE (AMDFAM10H_SHANGHAI), P_NONE},
   {"istanbul", PROCESSOR_GENERIC, CPU_GENERIC, 0,
-M_CPU_TYPE (AMDFAM10H_ISTANBUL), P_NONE},
+M_CPU_SUBTYPE (AMDFAM10H_ISTANBUL), P_NONE},
 };
 
 /* NB: processor_alias_table stops at the "generic" entry.  */
diff --git a/gcc/config/i386/i

[gcc r15-3612] i386: Implement SAT_ADD for signed vector integers

2024-09-12 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:19d751601d012bbe31512d26f968e75873a408ab

commit r15-3612-g19d751601d012bbe31512d26f968e75873a408ab
Author: Uros Bizjak 
Date:   Thu Sep 12 20:34:28 2024 +0200

i386: Implement SAT_ADD for signed vector integers

Enable V4QI, V2QI and V2HI mode signed saturated arithmetic insn patterns
and add a couple of testcases to test for PADDSB and PADDSW instructions.

PR target/112600

gcc/ChangeLog:

* config/i386/mmx.md (3): Rename
from *3.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112600-3a.c: New test.
* gcc.target/i386/pr112600-3b.c: New test.

Diff:
---
 gcc/config/i386/mmx.md  |  2 +-
 gcc/testsuite/gcc.target/i386/pr112600-3a.c | 25 +
 gcc/testsuite/gcc.target/i386/pr112600-3b.c | 25 +
 3 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2f8d958dd5f0..e88a06c441fa 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3218,7 +3218,7 @@
(set_attr "type" "mmxadd,sseadd,sseadd")
(set_attr "mode" "DI,TI,TI")])
 
-(define_insn "*3"
+(define_insn "3"
   [(set (match_operand:VI_16_32 0 "register_operand" "=x,Yw")
 (sat_plusminus:VI_16_32
  (match_operand:VI_16_32 1 "register_operand" "0,Yw")
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-3a.c 
b/gcc/testsuite/gcc.target/i386/pr112600-3a.c
new file mode 100644
index ..0c38659643da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-3a.c
@@ -0,0 +1,25 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define MIN -128
+#define MAX 127
+
+typedef char T;
+typedef unsigned char UT;
+
+void foo (T *out, T *op_1, T *op_2, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+{
+  T x = op_1[i];
+  T y = op_2[i];
+  T sum = (UT) x + (UT) y;
+
+  out[i] = (x ^ y) < 0 ? sum : (sum ^ x) >= 0 ? sum : x < 0 ? MIN : MAX;
+}
+}
+
+/* { dg-final { scan-assembler "paddsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-3b.c 
b/gcc/testsuite/gcc.target/i386/pr112600-3b.c
new file mode 100644
index ..746c422ceb94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-3b.c
@@ -0,0 +1,25 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define MIN -32768
+#define MAX 32767
+
+typedef short T;
+typedef unsigned short UT;
+
+void foo (T *out, T *op_1, T *op_2, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+{
+  T x = op_1[i];
+  T y = op_2[i];
+  T sum = (UT) x + (UT) y;
+
+  out[i] = (x ^ y) < 0 ? sum : (sum ^ x) >= 0 ? sum : x < 0 ? MIN : MAX;
+}
+}
+
+/* { dg-final { scan-assembler "paddsw" } } */


[gcc r15-3604] i386: Use offsetable address constraint for double-word memory operands, part 2

2024-09-12 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:8c01976b8e34eaa2483ab37d1bd18ebc5c8ada95

commit r15-3604-g8c01976b8e34eaa2483ab37d1bd18ebc5c8ada95
Author: Uros Bizjak 
Date:   Thu Sep 12 16:28:10 2024 +0200

i386: Use offsetable address constraint for double-word memory operands, 
part 2

Double-word memory operands are accessed as their high and low part, so the
memory location has to be offsettable.  Use "o" constraint instead of "m"
for double-word memory operands.

gcc/ChangeLog:

* config/i386/i386.md (*insvti_lowpart_1): Use "o" constraint
instead of "m" for double-word mode memory operands.

Diff:
---
 gcc/config/i386/i386.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8d269feee837..c04415149490 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3685,7 +3685,7 @@
   [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
(any_or_plus:TI
  (and:TI
-   (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
+   (match_operand:TI 1 "nonimmediate_operand" "r,o,r,o")
(match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
  (zero_extend:TI
(match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"]


[gcc r15-3552] i386: Use offsetable address constraint for double-word memory operands

2024-09-09 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:1da79de5275de82bc810d2f8d70fbc98dbce3da5

commit r15-3552-g1da79de5275de82bc810d2f8d70fbc98dbce3da5
Author: Uros Bizjak 
Date:   Mon Sep 9 22:33:52 2024 +0200

i386: Use offsetable address constraint for double-word memory operands

Double-word memory operands are accessed as their high and low part, so the
memory location has to be offsettable.  Use "o" constraint instead of "m"
for double-word memory operands.

gcc/ChangeLog:

* config/i386/i386.md (*insvdi_lowpart_1): Use "o" constraint
instead of "m" for double-word mode memory operands.
(*add3_doubleword_zext): Ditto.
(*addv4_doubleword_1): Use "jO" constraint instead of "jM"
for double-word mode memory operands.

Diff:
---
 gcc/config/i386/i386.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0fae3c1eb878..8d269feee837 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3707,7 +3707,7 @@
   [(set (match_operand:DI 0 "nonimmediate_operand" "=ro,r,r,&r")
(any_or_plus:DI
  (and:DI
-   (match_operand:DI 1 "nonimmediate_operand" "r,m,r,m")
+   (match_operand:DI 1 "nonimmediate_operand" "r,o,r,o")
(match_operand:DI 3 "const_int_operand" "n,n,n,n"))
  (zero_extend:DI
(match_operand:SI 2 "nonimmediate_operand" "r,r,m,m"]
@@ -6461,7 +6461,7 @@
(plus:
  (zero_extend:
(match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))
- (match_operand: 1 "nonimmediate_operand" "0,0,r,m")))
+ (match_operand: 1 "nonimmediate_operand" "0,0,r,o")))
(clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (UNKNOWN, mode, operands, TARGET_APX_NDD)"
   "#"
@@ -7703,7 +7703,7 @@
(eq:CCO
  (plus:
(sign_extend:
- (match_operand: 1 "nonimmediate_operand" "%0,rjM"))
+ (match_operand: 1 "nonimmediate_operand" "%0,rjO"))
(match_operand: 3 "const_scalar_int_operand" "n,n"))
  (sign_extend:
(plus:


[gcc r15-2419] i386/testsuite: Add testcase for fixed PR [PR51492]

2024-07-30 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:8b737ec289da83e9e2a9672be0336980616e8932

commit r15-2419-g8b737ec289da83e9e2a9672be0336980616e8932
Author: Uros Bizjak 
Date:   Tue Jul 30 20:02:36 2024 +0200

i386/testsuite: Add testcase for fixed PR [PR51492]

PR target/51492

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr51492.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr51492.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr51492.c 
b/gcc/testsuite/gcc.target/i386/pr51492.c
new file mode 100644
index ..0892e0c79a7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr51492.c
@@ -0,0 +1,19 @@
+/* PR target/51492 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define SIZE 65536
+#define WSIZE 64
+unsigned short head[SIZE] __attribute__((aligned(64)));
+
+void
+f(void)
+{
+  for (unsigned n = 0; n < SIZE; ++n) {
+unsigned short m = head[n];
+head[n] = (unsigned short)(m >= WSIZE ? m-WSIZE : 0);
+  }
+}
+
+/* { dg-final { scan-assembler "psubusw" } } */
+/* { dg-final { scan-assembler-not "paddw" } } */


[gcc r15-2147] libatomic: Handle AVX+CX16 ZHAOXIN like Intel for 16b atomic [PR104688]

2024-07-18 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:9846b0916c1a9b9f3e9df4657670ef4419617134

commit r15-2147-g9846b0916c1a9b9f3e9df4657670ef4419617134
Author: mayshao 
Date:   Thu Jul 18 22:43:00 2024 +0200

libatomic: Handle AVX+CX16 ZHAOXIN like Intel for 16b atomic [PR104688]

PR target/104688

libatomic/ChangeLog:

* config/x86/init.c (__libat_feat1_init): Don't clear
bit_AVX on ZHAOXIN CPUs.

Diff:
---
 libatomic/config/x86/init.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/libatomic/config/x86/init.c b/libatomic/config/x86/init.c
index 26168d468324..c6ce997a5af4 100644
--- a/libatomic/config/x86/init.c
+++ b/libatomic/config/x86/init.c
@@ -41,11 +41,15 @@ __libat_feat1_init (void)
{
  /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned
 address is atomic, and AMD is going to do something similar soon.
-We don't have a guarantee from vendors of other CPUs with AVX,
-like Zhaoxin and VIA.  */
+Zhaoxin also guarantees this.  We don't have a guarantee
+from vendors of other CPUs with AVX, like VIA.  */
+ unsigned int family = (eax >> 8) & 0x0f;
  unsigned int ecx2;
  __cpuid (0, eax, ebx, ecx2, edx);
- if (ecx2 != signature_INTEL_ecx && ecx2 != signature_AMD_ecx)
+ if (ecx2 != signature_INTEL_ecx
+ && ecx2 != signature_AMD_ecx
+ && !(ecx2 == signature_CENTAUR_ecx && family > 6)
+ && ecx2 != signature_SHANGHAI_ecx)
FEAT1_REGISTER &= ~bit_AVX;
}
 #endif


[gcc r15-2142] libatomic: Improve cpuid usage in __libat_feat1_init

2024-07-18 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:f7d01e080a54ea94586c8847857e5aef17906519

commit r15-2142-gf7d01e080a54ea94586c8847857e5aef17906519
Author: Uros Bizjak 
Date:   Thu Jul 18 16:58:09 2024 +0200

libatomic: Improve cpuid usage in __libat_feat1_init

Check the result of __get_cpuid and process FEAT1_REGISTER only when
__get_cpuid returns success.  Use __cpuid instead of nested __get_cpuid.

libatomic/ChangeLog:

* config/x86/init.c (__libat_feat1_init): Check the result of
__get_cpuid and process FEAT1_REGISTER only when __get_cpuid
returns success.  Use __cpuid instead of nested __get_cpuid.

Diff:
---
 libatomic/config/x86/init.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/libatomic/config/x86/init.c b/libatomic/config/x86/init.c
index a75be3f175c3..26168d468324 100644
--- a/libatomic/config/x86/init.c
+++ b/libatomic/config/x86/init.c
@@ -33,21 +33,23 @@ __libat_feat1_init (void)
 {
   unsigned int eax, ebx, ecx, edx;
   FEAT1_REGISTER = 0;
-  __get_cpuid (1, &eax, &ebx, &ecx, &edx);
-#ifdef __x86_64__
-  if ((FEAT1_REGISTER & (bit_AVX | bit_CMPXCHG16B))
-  == (bit_AVX | bit_CMPXCHG16B))
+  if (__get_cpuid (1, &eax, &ebx, &ecx, &edx))
 {
-  /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned address
-is atomic, and AMD is going to do something similar soon.
-We don't have a guarantee from vendors of other CPUs with AVX,
-like Zhaoxin and VIA.  */
-  unsigned int ecx2 = 0;
-  __get_cpuid (0, &eax, &ebx, &ecx2, &edx);
-  if (ecx2 != signature_INTEL_ecx && ecx2 != signature_AMD_ecx)
-   FEAT1_REGISTER &= ~bit_AVX;
-}
+#ifdef __x86_64__
+  if ((FEAT1_REGISTER & (bit_AVX | bit_CMPXCHG16B))
+ == (bit_AVX | bit_CMPXCHG16B))
+   {
+ /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned
+address is atomic, and AMD is going to do something similar soon.
+We don't have a guarantee from vendors of other CPUs with AVX,
+like Zhaoxin and VIA.  */
+ unsigned int ecx2;
+ __cpuid (0, eax, ebx, ecx2, edx);
+ if (ecx2 != signature_INTEL_ecx && ecx2 != signature_AMD_ecx)
+   FEAT1_REGISTER &= ~bit_AVX;
+   }
 #endif
+}
   /* See the load in load_feat1.  */
   __atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
   return FEAT1_REGISTER;


[gcc r12-10623] alpha: Fix duplicate !tlsgd!62 assemble error [PR115526]

2024-07-18 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:c5a26fc24b0af61498fae65ccad69d51d63d2a8b

commit r12-10623-gc5a26fc24b0af61498fae65ccad69d51d63d2a8b
Author: Uros Bizjak 
Date:   Wed Jul 17 18:11:26 2024 +0200

alpha: Fix duplicate !tlsgd!62 assemble error [PR115526]

Add missing "cannot_copy" attribute to instructions that have to
stay in 1-1 correspondence with another insn.

PR target/115526

gcc/ChangeLog:

* config/alpha/alpha.md (movdi_er_high_g): Add cannot_copy 
attribute.
(movdi_er_tlsgd): Ditto.
(movdi_er_tlsldm): Ditto.
(call_value_osf_): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/alpha/pr115526.c: New test.

(cherry picked from commit 0841fd4c42ab053be951b7418233f0478282d020)

Diff:
---
 gcc/config/alpha/alpha.md | 10 +--
 gcc/testsuite/gcc.target/alpha/pr115526.c | 46 +++
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 442953fe50e1..b6795e1d2638 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -3933,7 +3933,8 @@
   else
 return "ldq %0,%2(%1)\t\t!literal!%3";
 }
-  [(set_attr "type" "ldsym")])
+  [(set_attr "type" "ldsym")
+   (set_attr "cannot_copy" "true")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
@@ -3957,7 +3958,8 @@
 return "lda %0,%2(%1)\t\t!tlsgd";
   else
 return "lda %0,%2(%1)\t\t!tlsgd!%3";
-})
+}
+  [(set_attr "cannot_copy" "true")])
 
 (define_insn "movdi_er_tlsldm"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -3970,7 +3972,8 @@
 return "lda %0,%&(%1)\t\t!tlsldm";
   else
 return "lda %0,%&(%1)\t\t!tlsldm!%2";
-})
+}
+  [(set_attr "cannot_copy" "true")])
 
 (define_insn "*movdi_er_gotdtp"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5939,6 +5942,7 @@
   "HAVE_AS_TLS"
   "ldq $27,%1($29)\t\t!literal!%2\;jsr $26,($27),%1\t\t!lituse_!%2\;ldah 
$29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
   [(set_attr "type" "jsr")
+   (set_attr "cannot_copy" "true")
(set_attr "length" "16")])
 
 ;; We must use peep2 instead of a split because we need accurate life
diff --git a/gcc/testsuite/gcc.target/alpha/pr115526.c 
b/gcc/testsuite/gcc.target/alpha/pr115526.c
new file mode 100644
index ..2f57903fec34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/pr115526.c
@@ -0,0 +1,46 @@
+/* PR target/115526 */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -Wno-attributes -fvisibility=hidden -fPIC -mcpu=ev4" } */
+
+struct _ts {
+  struct _dtoa_state *interp;
+};
+struct Bigint {
+  int k;
+} *_Py_dg_strtod_bs;
+struct _dtoa_state {
+  struct Bigint p5s;
+  struct Bigint *freelist[];
+};
+extern _Thread_local struct _ts _Py_tss_tstate;
+typedef struct Bigint Bigint;
+int pow5mult_k;
+long _Py_dg_strtod_ndigits;
+void PyMem_Free();
+void Bfree(Bigint *v) {
+  if (v)
+{
+  if (v->k)
+   PyMem_Free();
+  else {
+   struct _dtoa_state *interp = _Py_tss_tstate.interp;
+   interp->freelist[v->k] = v;
+  }
+}
+}
+static Bigint *pow5mult(Bigint *b) {
+  for (;;) {
+if (pow5mult_k & 1) {
+  Bfree(b);
+  if (b == 0)
+return 0;
+}
+if (!(pow5mult_k >>= 1))
+  break;
+  }
+  return 0;
+}
+void _Py_dg_strtod() {
+  if (_Py_dg_strtod_ndigits)
+pow5mult(_Py_dg_strtod_bs);
+}


[gcc r13-8920] alpha: Fix duplicate !tlsgd!62 assemble error [PR115526]

2024-07-17 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:37bd7d5c4e17c97d2b7d50f630b1cf8b347a31f4

commit r13-8920-g37bd7d5c4e17c97d2b7d50f630b1cf8b347a31f4
Author: Uros Bizjak 
Date:   Wed Jul 17 18:11:26 2024 +0200

alpha: Fix duplicate !tlsgd!62 assemble error [PR115526]

Add missing "cannot_copy" attribute to instructions that have to
stay in 1-1 correspondence with another insn.

PR target/115526

gcc/ChangeLog:

* config/alpha/alpha.md (movdi_er_high_g): Add cannot_copy 
attribute.
(movdi_er_tlsgd): Ditto.
(movdi_er_tlsldm): Ditto.
(call_value_osf_): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/alpha/pr115526.c: New test.

(cherry picked from commit 0841fd4c42ab053be951b7418233f0478282d020)

Diff:
---
 gcc/config/alpha/alpha.md | 10 +--
 gcc/testsuite/gcc.target/alpha/pr115526.c | 46 +++
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 17dfc4a58689..0752c5a001ca 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -3933,7 +3933,8 @@
   else
 return "ldq %0,%2(%1)\t\t!literal!%3";
 }
-  [(set_attr "type" "ldsym")])
+  [(set_attr "type" "ldsym")
+   (set_attr "cannot_copy" "true")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
@@ -3957,7 +3958,8 @@
 return "lda %0,%2(%1)\t\t!tlsgd";
   else
 return "lda %0,%2(%1)\t\t!tlsgd!%3";
-})
+}
+  [(set_attr "cannot_copy" "true")])
 
 (define_insn "movdi_er_tlsldm"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -3970,7 +3972,8 @@
 return "lda %0,%&(%1)\t\t!tlsldm";
   else
 return "lda %0,%&(%1)\t\t!tlsldm!%2";
-})
+}
+  [(set_attr "cannot_copy" "true")])
 
 (define_insn "*movdi_er_gotdtp"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5939,6 +5942,7 @@
   "HAVE_AS_TLS"
   "ldq $27,%1($29)\t\t!literal!%2\;jsr $26,($27),%1\t\t!lituse_!%2\;ldah 
$29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
   [(set_attr "type" "jsr")
+   (set_attr "cannot_copy" "true")
(set_attr "length" "16")])
 
 ;; We must use peep2 instead of a split because we need accurate life
diff --git a/gcc/testsuite/gcc.target/alpha/pr115526.c 
b/gcc/testsuite/gcc.target/alpha/pr115526.c
new file mode 100644
index ..2f57903fec34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/pr115526.c
@@ -0,0 +1,46 @@
+/* PR target/115526 */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -Wno-attributes -fvisibility=hidden -fPIC -mcpu=ev4" } */
+
+struct _ts {
+  struct _dtoa_state *interp;
+};
+struct Bigint {
+  int k;
+} *_Py_dg_strtod_bs;
+struct _dtoa_state {
+  struct Bigint p5s;
+  struct Bigint *freelist[];
+};
+extern _Thread_local struct _ts _Py_tss_tstate;
+typedef struct Bigint Bigint;
+int pow5mult_k;
+long _Py_dg_strtod_ndigits;
+void PyMem_Free();
+void Bfree(Bigint *v) {
+  if (v)
+{
+  if (v->k)
+   PyMem_Free();
+  else {
+   struct _dtoa_state *interp = _Py_tss_tstate.interp;
+   interp->freelist[v->k] = v;
+  }
+}
+}
+static Bigint *pow5mult(Bigint *b) {
+  for (;;) {
+if (pow5mult_k & 1) {
+  Bfree(b);
+  if (b == 0)
+return 0;
+}
+if (!(pow5mult_k >>= 1))
+  break;
+  }
+  return 0;
+}
+void _Py_dg_strtod() {
+  if (_Py_dg_strtod_ndigits)
+pow5mult(_Py_dg_strtod_bs);
+}


[gcc r14-10448] alpha: Fix duplicate !tlsgd!62 assemble error [PR115526]

2024-07-17 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:3a963d441a68797956a5f67dcb351b2dbd4ac1d0

commit r14-10448-g3a963d441a68797956a5f67dcb351b2dbd4ac1d0
Author: Uros Bizjak 
Date:   Wed Jul 17 18:11:26 2024 +0200

alpha: Fix duplicate !tlsgd!62 assemble error [PR115526]

Add missing "cannot_copy" attribute to instructions that have to
stay in 1-1 correspondence with another insn.

PR target/115526

gcc/ChangeLog:

* config/alpha/alpha.md (movdi_er_high_g): Add cannot_copy 
attribute.
(movdi_er_tlsgd): Ditto.
(movdi_er_tlsldm): Ditto.
(call_value_osf_): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/alpha/pr115526.c: New test.

(cherry picked from commit 0841fd4c42ab053be951b7418233f0478282d020)

Diff:
---
 gcc/config/alpha/alpha.md | 10 +--
 gcc/testsuite/gcc.target/alpha/pr115526.c | 46 +++
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 1e2de5a4d15b..bd92392878e2 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -3902,7 +3902,8 @@
   else
 return "ldq %0,%2(%1)\t\t!literal!%3";
 }
-  [(set_attr "type" "ldsym")])
+  [(set_attr "type" "ldsym")
+   (set_attr "cannot_copy" "true")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
@@ -3926,7 +3927,8 @@
 return "lda %0,%2(%1)\t\t!tlsgd";
   else
 return "lda %0,%2(%1)\t\t!tlsgd!%3";
-})
+}
+  [(set_attr "cannot_copy" "true")])
 
 (define_insn "movdi_er_tlsldm"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -3939,7 +3941,8 @@
 return "lda %0,%&(%1)\t\t!tlsldm";
   else
 return "lda %0,%&(%1)\t\t!tlsldm!%2";
-})
+}
+  [(set_attr "cannot_copy" "true")])
 
 (define_insn "*movdi_er_gotdtp"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5908,6 +5911,7 @@
   "HAVE_AS_TLS"
   "ldq $27,%1($29)\t\t!literal!%2\;jsr $26,($27),%1\t\t!lituse_!%2\;ldah 
$29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
   [(set_attr "type" "jsr")
+   (set_attr "cannot_copy" "true")
(set_attr "length" "16")])
 
 ;; We must use peep2 instead of a split because we need accurate life
diff --git a/gcc/testsuite/gcc.target/alpha/pr115526.c 
b/gcc/testsuite/gcc.target/alpha/pr115526.c
new file mode 100644
index ..2f57903fec34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/pr115526.c
@@ -0,0 +1,46 @@
+/* PR target/115526 */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -Wno-attributes -fvisibility=hidden -fPIC -mcpu=ev4" } */
+
+struct _ts {
+  struct _dtoa_state *interp;
+};
+struct Bigint {
+  int k;
+} *_Py_dg_strtod_bs;
+struct _dtoa_state {
+  struct Bigint p5s;
+  struct Bigint *freelist[];
+};
+extern _Thread_local struct _ts _Py_tss_tstate;
+typedef struct Bigint Bigint;
+int pow5mult_k;
+long _Py_dg_strtod_ndigits;
+void PyMem_Free();
+void Bfree(Bigint *v) {
+  if (v)
+{
+  if (v->k)
+   PyMem_Free();
+  else {
+   struct _dtoa_state *interp = _Py_tss_tstate.interp;
+   interp->freelist[v->k] = v;
+  }
+}
+}
+static Bigint *pow5mult(Bigint *b) {
+  for (;;) {
+if (pow5mult_k & 1) {
+  Bfree(b);
+  if (b == 0)
+return 0;
+}
+if (!(pow5mult_k >>= 1))
+  break;
+  }
+  return 0;
+}
+void _Py_dg_strtod() {
+  if (_Py_dg_strtod_ndigits)
+pow5mult(_Py_dg_strtod_bs);
+}


[gcc r15-2104] alpha: Fix duplicate !tlsgd!62 assemble error [PR115526]

2024-07-17 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:0841fd4c42ab053be951b7418233f0478282d020

commit r15-2104-g0841fd4c42ab053be951b7418233f0478282d020
Author: Uros Bizjak 
Date:   Wed Jul 17 18:11:26 2024 +0200

alpha: Fix duplicate !tlsgd!62 assemble error [PR115526]

Add missing "cannot_copy" attribute to instructions that have to
stay in 1-1 correspondence with another insn.

PR target/115526

gcc/ChangeLog:

* config/alpha/alpha.md (movdi_er_high_g): Add cannot_copy 
attribute.
(movdi_er_tlsgd): Ditto.
(movdi_er_tlsldm): Ditto.
(call_value_osf_): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/alpha/pr115526.c: New test.

Diff:
---
 gcc/config/alpha/alpha.md | 10 +--
 gcc/testsuite/gcc.target/alpha/pr115526.c | 46 +++
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 1e2de5a4d15b..bd92392878e2 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -3902,7 +3902,8 @@
   else
 return "ldq %0,%2(%1)\t\t!literal!%3";
 }
-  [(set_attr "type" "ldsym")])
+  [(set_attr "type" "ldsym")
+   (set_attr "cannot_copy" "true")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
@@ -3926,7 +3927,8 @@
 return "lda %0,%2(%1)\t\t!tlsgd";
   else
 return "lda %0,%2(%1)\t\t!tlsgd!%3";
-})
+}
+  [(set_attr "cannot_copy" "true")])
 
 (define_insn "movdi_er_tlsldm"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -3939,7 +3941,8 @@
 return "lda %0,%&(%1)\t\t!tlsldm";
   else
 return "lda %0,%&(%1)\t\t!tlsldm!%2";
-})
+}
+  [(set_attr "cannot_copy" "true")])
 
 (define_insn "*movdi_er_gotdtp"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5908,6 +5911,7 @@
   "HAVE_AS_TLS"
   "ldq $27,%1($29)\t\t!literal!%2\;jsr $26,($27),%1\t\t!lituse_!%2\;ldah 
$29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
   [(set_attr "type" "jsr")
+   (set_attr "cannot_copy" "true")
(set_attr "length" "16")])
 
 ;; We must use peep2 instead of a split because we need accurate life
diff --git a/gcc/testsuite/gcc.target/alpha/pr115526.c 
b/gcc/testsuite/gcc.target/alpha/pr115526.c
new file mode 100644
index ..2f57903fec34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/pr115526.c
@@ -0,0 +1,46 @@
+/* PR target/115526 */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -Wno-attributes -fvisibility=hidden -fPIC -mcpu=ev4" } */
+
+struct _ts {
+  struct _dtoa_state *interp;
+};
+struct Bigint {
+  int k;
+} *_Py_dg_strtod_bs;
+struct _dtoa_state {
+  struct Bigint p5s;
+  struct Bigint *freelist[];
+};
+extern _Thread_local struct _ts _Py_tss_tstate;
+typedef struct Bigint Bigint;
+int pow5mult_k;
+long _Py_dg_strtod_ndigits;
+void PyMem_Free();
+void Bfree(Bigint *v) {
+  if (v)
+{
+  if (v->k)
+   PyMem_Free();
+  else {
+   struct _dtoa_state *interp = _Py_tss_tstate.interp;
+   interp->freelist[v->k] = v;
+  }
+}
+}
+static Bigint *pow5mult(Bigint *b) {
+  for (;;) {
+if (pow5mult_k & 1) {
+  Bfree(b);
+  if (b == 0)
+return 0;
+}
+if (!(pow5mult_k >>= 1))
+  break;
+  }
+  return 0;
+}
+void _Py_dg_strtod() {
+  if (_Py_dg_strtod_ndigits)
+pow5mult(_Py_dg_strtod_bs);
+}


[gcc r15-1954] i386: Swap compare operands in ustrunc patterns

2024-07-10 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:aae535f3a870659d1f002f82bd585de0bcec7905

commit r15-1954-gaae535f3a870659d1f002f82bd585de0bcec7905
Author: Uros Bizjak 
Date:   Wed Jul 10 23:00:00 2024 +0200

i386: Swap compare operands in ustrunc patterns

A last minute change led to a wrong operand order in the compare insn.

gcc/ChangeLog:

* config/i386/i386.md (ustruncdi2): Swap compare operands.
(ustruncsi2): Ditto.
(ustrunchiqi2): Ditto.

Diff:
---
 gcc/config/i386/i386.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e2f30695d70e..de9f4ba04962 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9990,7 +9990,7 @@
   rtx sat = force_reg (DImode, GEN_INT (GET_MODE_MASK (mode)));
   rtx dst;
 
-  emit_insn (gen_cmpdi_1 (op1, sat));
+  emit_insn (gen_cmpdi_1 (sat, op1));
 
   if (TARGET_CMOVE)
 {
@@ -10026,7 +10026,7 @@
   rtx sat = force_reg (SImode, GEN_INT (GET_MODE_MASK (mode)));
   rtx dst;
 
-  emit_insn (gen_cmpsi_1 (op1, sat));
+  emit_insn (gen_cmpsi_1 (sat, op1));
 
   if (TARGET_CMOVE)
 {
@@ -10062,7 +10062,7 @@
   rtx sat = force_reg (HImode, GEN_INT (GET_MODE_MASK (QImode)));
   rtx dst;
 
-  emit_insn (gen_cmphi_1 (op1, sat));
+  emit_insn (gen_cmphi_1 (sat, op1));
 
   if (TARGET_CMOVE)
 {


[gcc r11-11568] middle-end: Fix stalled swapped condition code value [PR115836]

2024-07-10 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:d67566cefe7325998cc2471a28e9d3a3016455a0

commit r11-11568-gd67566cefe7325998cc2471a28e9d3a3016455a0
Author: Uros Bizjak 
Date:   Wed Jul 10 09:27:27 2024 +0200

middle-end: Fix stalled swapped condition code value [PR115836]

emit_store_flag_1 calculates scode (swapped condition code) at the
beginning of the function from the value of code variable.  However,
code variable may change before scode usage site, resulting in
invalid stalled scode value.

Move calculation of scode value just before its only usage site to
avoid stalled scode value.

PR middle-end/115836

gcc/ChangeLog:

* expmed.c (emit_store_flag_1): Move calculation of
scode just before its only usage site.

(cherry picked from commit 44933fdeb338e00c972e42224b9a83d3f8f6a757)

Diff:
---
 gcc/expmed.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/expmed.c b/gcc/expmed.c
index 3143f38e0570..2c916eab43b6 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -5589,11 +5589,9 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
   enum insn_code icode;
   machine_mode compare_mode;
   enum mode_class mclass;
-  enum rtx_code scode;
 
   if (unsignedp)
 code = unsigned_condition (code);
-  scode = swap_condition (code);
 
   /* If one operand is constant, make it the second one.  Only do this
  if the other operand is not constant as well.  */
@@ -5761,6 +5759,8 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
 
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
+ enum rtx_code scode = swap_condition (code);
+
  tem = emit_cstore (target, icode, scode, mode, compare_mode,
 unsignedp, op1, op0, normalizep, target_mode);
  if (tem)


[gcc r12-10610] middle-end: Fix stalled swapped condition code value [PR115836]

2024-07-10 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:10904e051f1b970cd8e030dff7dec8374c946b12

commit r12-10610-g10904e051f1b970cd8e030dff7dec8374c946b12
Author: Uros Bizjak 
Date:   Wed Jul 10 09:27:27 2024 +0200

middle-end: Fix stalled swapped condition code value [PR115836]

emit_store_flag_1 calculates scode (swapped condition code) at the
beginning of the function from the value of code variable.  However,
code variable may change before scode usage site, resulting in
invalid stalled scode value.

Move calculation of scode value just before its only usage site to
avoid stalled scode value.

PR middle-end/115836

gcc/ChangeLog:

* expmed.cc (emit_store_flag_1): Move calculation of
scode just before its only usage site.

(cherry picked from commit 44933fdeb338e00c972e42224b9a83d3f8f6a757)

Diff:
---
 gcc/expmed.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 1bb4da8d094e..39e53faec70e 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -5601,11 +5601,9 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
   enum insn_code icode;
   machine_mode compare_mode;
   enum mode_class mclass;
-  enum rtx_code scode;
 
   if (unsignedp)
 code = unsigned_condition (code);
-  scode = swap_condition (code);
 
   /* If one operand is constant, make it the second one.  Only do this
  if the other operand is not constant as well.  */
@@ -5773,6 +5771,8 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
 
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
+ enum rtx_code scode = swap_condition (code);
+
  tem = emit_cstore (target, icode, scode, mode, compare_mode,
 unsignedp, op1, op0, normalizep, target_mode);
  if (tem)


[gcc r13-8903] middle-end: Fix stalled swapped condition code value [PR115836]

2024-07-10 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:cc47ad09e571016f498710fbd8a19f302c9004de

commit r13-8903-gcc47ad09e571016f498710fbd8a19f302c9004de
Author: Uros Bizjak 
Date:   Wed Jul 10 09:27:27 2024 +0200

middle-end: Fix stalled swapped condition code value [PR115836]

emit_store_flag_1 calculates scode (swapped condition code) at the
beginning of the function from the value of code variable.  However,
code variable may change before scode usage site, resulting in
invalid stalled scode value.

Move calculation of scode value just before its only usage site to
avoid stalled scode value.

PR middle-end/115836

gcc/ChangeLog:

* expmed.cc (emit_store_flag_1): Move calculation of
scode just before its only usage site.

(cherry picked from commit 44933fdeb338e00c972e42224b9a83d3f8f6a757)

Diff:
---
 gcc/expmed.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 1553ea8e31eb..e06cdd47e9e6 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -5607,11 +5607,9 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
   enum insn_code icode;
   machine_mode compare_mode;
   enum mode_class mclass;
-  enum rtx_code scode;
 
   if (unsignedp)
 code = unsigned_condition (code);
-  scode = swap_condition (code);
 
   /* If one operand is constant, make it the second one.  Only do this
  if the other operand is not constant as well.  */
@@ -5726,6 +5724,8 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
 
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
+ enum rtx_code scode = swap_condition (code);
+
  tem = emit_cstore (target, icode, scode, mode, compare_mode,
 unsignedp, op1, op0, normalizep, target_mode);
  if (tem)


[gcc r14-10404] middle-end: Fix stalled swapped condition code value [PR115836]

2024-07-10 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:47a8b464d2dd9a586a9e15242c9825e39e1ecd4c

commit r14-10404-g47a8b464d2dd9a586a9e15242c9825e39e1ecd4c
Author: Uros Bizjak 
Date:   Wed Jul 10 09:27:27 2024 +0200

middle-end: Fix stalled swapped condition code value [PR115836]

emit_store_flag_1 calculates scode (swapped condition code) at the
beginning of the function from the value of code variable.  However,
code variable may change before scode usage site, resulting in
invalid stalled scode value.

Move calculation of scode value just before its only usage site to
avoid stalled scode value.

PR middle-end/115836

gcc/ChangeLog:

* expmed.cc (emit_store_flag_1): Move calculation of
scode just before its only usage site.

(cherry picked from commit 44933fdeb338e00c972e42224b9a83d3f8f6a757)

Diff:
---
 gcc/expmed.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 4ec035e4843b..19765311b954 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -5617,11 +5617,9 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
   enum insn_code icode;
   machine_mode compare_mode;
   enum mode_class mclass;
-  enum rtx_code scode;
 
   if (unsignedp)
 code = unsigned_condition (code);
-  scode = swap_condition (code);
 
   /* If one operand is constant, make it the second one.  Only do this
  if the other operand is not constant as well.  */
@@ -5736,6 +5734,8 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
 
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
+ enum rtx_code scode = swap_condition (code);
+
  tem = emit_cstore (target, icode, scode, mode, compare_mode,
 unsignedp, op1, op0, normalizep, target_mode);
  if (tem)


[gcc r15-1939] middle-end: Fix stalled swapped condition code value [PR115836]

2024-07-10 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:44933fdeb338e00c972e42224b9a83d3f8f6a757

commit r15-1939-g44933fdeb338e00c972e42224b9a83d3f8f6a757
Author: Uros Bizjak 
Date:   Wed Jul 10 09:27:27 2024 +0200

middle-end: Fix stalled swapped condition code value [PR115836]

emit_store_flag_1 calculates scode (swapped condition code) at the
beginning of the function from the value of code variable.  However,
code variable may change before scode usage site, resulting in
invalid stalled scode value.

Move calculation of scode value just before its only usage site to
avoid stalled scode value.

PR middle-end/115836

gcc/ChangeLog:

* expmed.cc (emit_store_flag_1): Move calculation of
scode just before its only usage site.

Diff:
---
 gcc/expmed.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 8bbbc94a98cb..154964bd0687 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -5632,11 +5632,9 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
   enum insn_code icode;
   machine_mode compare_mode;
   enum mode_class mclass;
-  enum rtx_code scode;
 
   if (unsignedp)
 code = unsigned_condition (code);
-  scode = swap_condition (code);
 
   /* If one operand is constant, make it the second one.  Only do this
  if the other operand is not constant as well.  */
@@ -5751,6 +5749,8 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
 
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
+ enum rtx_code scode = swap_condition (code);
+
  tem = emit_cstore (target, icode, scode, mode, compare_mode,
 unsignedp, op1, op0, normalizep, target_mode);
  if (tem)


[gcc r15-1914] i386: Implement .SAT_TRUNC for unsigned integers

2024-07-09 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:d17889dbffd5dcdb2df22d42586ac0363704e1f1

commit r15-1914-gd17889dbffd5dcdb2df22d42586ac0363704e1f1
Author: Uros Bizjak 
Date:   Tue Jul 9 17:34:25 2024 +0200

i386: Implement .SAT_TRUNC for unsigned integers

The following testcase:

unsigned short foo (unsigned int x)
{
  _Bool overflow = x > (unsigned int)(unsigned short)(-1);
  return ((unsigned short)x | (unsigned short)-overflow);
}

currently compiles (-O2) to:

foo:
xorl%eax, %eax
cmpl$65535, %edi
seta%al
negl%eax
orl %edi, %eax
ret

We can expand through ustrunc{m}{n}2 optab to use carry flag from the
comparison and generate code using SBB:

foo:
cmpl$65535, %edi
sbbl%eax, %eax
orl %edi, %eax
ret

or CMOV instruction:

foo:
movl$65535, %eax
cmpl%eax, %edi
cmovnc  %edi, %eax
ret

gcc/ChangeLog:

* config/i386/i386.md (@cmp_1): Use SWI mode iterator.
(ustruncdi2): New expander.
(ustruncsi2): Ditto.
(ustrunchiqi2): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/sattrunc-1.c: New test.

Diff:
---
 gcc/config/i386/i386.md| 112 -
 gcc/testsuite/gcc.target/i386/sattrunc-1.c |  24 +++
 2 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 214cb2e239ae..e2f30695d70e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1533,8 +1533,8 @@
 
 (define_expand "@cmp_1"
   [(set (reg:CC FLAGS_REG)
-   (compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
-   (match_operand:SWI48 1 "")))])
+   (compare:CC (match_operand:SWI 0 "nonimmediate_operand")
+   (match_operand:SWI 1 "")))])
 
 (define_mode_iterator SWI1248_AVX512BWDQ_64
   [(QI "TARGET_AVX512DQ") HI
@@ -9981,6 +9981,114 @@
   DONE;
 })
 
+(define_expand "ustruncdi2"
+  [(set (match_operand:SWI124 0 "register_operand")
+   (us_truncate:DI (match_operand:DI 1 "nonimmediate_operand")))]
+  "TARGET_64BIT"
+{
+  rtx op1 = force_reg (DImode, operands[1]);
+  rtx sat = force_reg (DImode, GEN_INT (GET_MODE_MASK (mode)));
+  rtx dst;
+
+  emit_insn (gen_cmpdi_1 (op1, sat));
+
+  if (TARGET_CMOVE)
+{
+  rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+const0_rtx);
+
+  dst = force_reg (mode, operands[0]);
+  emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+ gen_lowpart (SImode, op1),
+ gen_lowpart (SImode, sat)));
+}
+  else
+{
+  rtx msk = gen_reg_rtx (mode);
+
+  emit_insn (gen_x86_movcc_0_m1_neg (msk));
+  dst = expand_simple_binop (mode, IOR,
+gen_lowpart (mode, op1), msk,
+operands[0], 1, OPTAB_WIDEN);
+}
+
+  if (!rtx_equal_p (dst, operands[0]))
+emit_move_insn (operands[0], dst);
+  DONE;
+})
+
+(define_expand "ustruncsi2"
+  [(set (match_operand:SWI12 0 "register_operand")
+   (us_truncate:SI (match_operand:SI 1 "nonimmediate_operand")))]
+  ""
+{
+  rtx op1 = force_reg (SImode, operands[1]);
+  rtx sat = force_reg (SImode, GEN_INT (GET_MODE_MASK (mode)));
+  rtx dst;
+
+  emit_insn (gen_cmpsi_1 (op1, sat));
+
+  if (TARGET_CMOVE)
+{
+  rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+const0_rtx);
+
+  dst = force_reg (mode, operands[0]);
+  emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+ gen_lowpart (SImode, op1),
+ gen_lowpart (SImode, sat)));
+}
+  else
+{
+  rtx msk = gen_reg_rtx (mode);
+
+  emit_insn (gen_x86_movcc_0_m1_neg (msk));
+  dst = expand_simple_binop (mode, IOR,
+gen_lowpart (mode, op1), msk,
+operands[0], 1, OPTAB_WIDEN);
+}
+
+  if (!rtx_equal_p (dst, operands[0]))
+emit_move_insn (operands[0], dst);
+  DONE;
+})
+
+(define_expand "ustrunchiqi2"
+  [(set (match_operand:QI 0 "register_operand")
+   (us_truncate:HI (match_operand:HI 1 "nonimmediate_operand")))]
+  ""
+{
+  rtx op1 = force_reg (HImode, operands[1]);
+  rtx sat = force_reg (HImode, GEN_INT (GET_MODE_MASK (QImode)));
+  rtx dst;
+
+  emit_insn (gen_cmphi_1 (op1, sat));
+
+  if (TARGET_CMOVE)
+{
+  rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+const0_rtx);
+
+  dst = force_reg (QImode, operands[0]);
+  emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+ gen_lowpart (SImode, op1),
+ ge

[gcc r15-1899] i386: Promote {QI, HI}mode x86_movcc_0_m1_neg to SImode

2024-07-08 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:2b3027bea3f218599d36379d3d593841df7a1559

commit r15-1899-g2b3027bea3f218599d36379d3d593841df7a1559
Author: Uros Bizjak 
Date:   Mon Jul 8 20:47:52 2024 +0200

i386: Promote {QI,HI}mode x86_movcc_0_m1_neg to SImode

Promote HImode x86_movcc_0_m1_neg insn to SImode to avoid
redundant prefixes. Also promote QImode insn when TARGET_PROMOTE_QImode
is set. This is similar to promotable_binary_operator splitter, where we
promote the result to SImode.

Also correct insn condition for splitters to SImode of NEG and NOT
instructions. The sizes of QImode and SImode instructions are always
the same, so there is no need for optimize_insn_for_size bypass.

gcc/ChangeLog:

* config/i386/i386.md (x86_movcc_0_m1_neg splitter to SImode):
New splitter.
(NEG and NOT splitter to SImode): Remove optimize_insn_for_size_p
predicate from insn condition.

Diff:
---
 gcc/config/i386/i386.md | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b24c4fe58750..214cb2e239ae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -26576,9 +26576,7 @@
(clobber (reg:CC FLAGS_REG))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
&& (GET_MODE (operands[0]) == HImode
-   || (GET_MODE (operands[0]) == QImode
-  && (TARGET_PROMOTE_QImode
-  || optimize_insn_for_size_p ("
+   || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
   [(parallel [(set (match_dup 0)
   (neg:SI (match_dup 1)))
  (clobber (reg:CC FLAGS_REG))])]
@@ -26593,15 +26591,30 @@
(not (match_operand 1 "general_reg_operand")))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
&& (GET_MODE (operands[0]) == HImode
-   || (GET_MODE (operands[0]) == QImode
-  && (TARGET_PROMOTE_QImode
-  || optimize_insn_for_size_p ("
+   || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
   [(set (match_dup 0)
(not:SI (match_dup 1)))]
 {
   operands[0] = gen_lowpart (SImode, operands[0]);
   operands[1] = gen_lowpart (SImode, operands[1]);
 })
+
+(define_split
+  [(set (match_operand 0 "general_reg_operand")
+   (neg (match_operator 1 "ix86_carry_flag_operator"
+ [(reg FLAGS_REG) (const_int 0)])))
+   (clobber (reg:CC FLAGS_REG))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && (GET_MODE (operands[0]) == HImode
+   || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
+  [(parallel [(set (match_dup 0)
+  (neg:SI (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = shallow_copy_rtx (operands[1]);
+  PUT_MODE (operands[1], SImode);
+})
 
 ;; RTL Peephole optimizations, run before sched2.  These primarily look to
 ;; transform a complex memory operation into two memory to register operations.


[gcc r15-1711] i386: Cleanup tmp variable usage in ix86_expand_move

2024-06-28 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:7419b4fe48b48e44b27e2dadc9ff870f5e049077

commit r15-1711-g7419b4fe48b48e44b27e2dadc9ff870f5e049077
Author: Uros Bizjak 
Date:   Fri Jun 28 17:49:43 2024 +0200

i386: Cleanup tmp variable usage in ix86_expand_move

Remove extra assignment, extra temp variable and variable shadowing.

No functional changes intended.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_expand_move): Remove extra
assignment to tmp variable, reuse tmp variable instead of
declaring new temporary variable and remove tmp variable shadowing.

Diff:
---
 gcc/config/i386/i386-expand.cc | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a4434c19272..a773b45bf03 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -414,9 +414,6 @@ ix86_expand_move (machine_mode mode, rtx operands[])
{
 #if TARGET_PECOFF
  tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
-#else
- tmp = NULL_RTX;
-#endif
 
  if (tmp)
{
@@ -425,6 +422,7 @@ ix86_expand_move (machine_mode mode, rtx operands[])
break;
}
  else
+#endif
{
  op1 = operands[1];
  break;
@@ -482,12 +480,12 @@ ix86_expand_move (machine_mode mode, rtx operands[])
  /* dynamic-no-pic */
  if (MACHOPIC_INDIRECT)
{
- rtx temp = (op0 && REG_P (op0) && mode == Pmode)
-? op0 : gen_reg_rtx (Pmode);
- op1 = machopic_indirect_data_reference (op1, temp);
+ tmp = (op0 && REG_P (op0) && mode == Pmode)
+   ? op0 : gen_reg_rtx (Pmode);
+ op1 = machopic_indirect_data_reference (op1, tmp);
  if (MACHOPIC_PURE)
op1 = machopic_legitimize_pic_address (op1, mode,
-  temp == op1 ? 0 : temp);
+  tmp == op1 ? 0 : tmp);
}
  if (op0 != op1 && GET_CODE (op0) != MEM)
{
@@ -542,9 +540,9 @@ ix86_expand_move (machine_mode mode, rtx operands[])
  op1 = validize_mem (force_const_mem (mode, op1));
  if (!register_operand (op0, mode))
{
- rtx temp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (temp, op1));
- emit_move_insn (op0, temp);
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (tmp, op1));
+ emit_move_insn (op0, tmp);
  return;
}
}
@@ -565,7 +563,7 @@ ix86_expand_move (machine_mode mode, rtx operands[])
   if (SUBREG_BYTE (op0) == 0)
{
  wide_int mask = wi::mask (64, true, 128);
- rtx tmp = immed_wide_int_const (mask, TImode);
+ tmp = immed_wide_int_const (mask, TImode);
  op0 = SUBREG_REG (op0);
  tmp = gen_rtx_AND (TImode, copy_rtx (op0), tmp);
  if (mode == DFmode)
@@ -577,7 +575,7 @@ ix86_expand_move (machine_mode mode, rtx operands[])
   else if (SUBREG_BYTE (op0) == 8)
{
  wide_int mask = wi::mask (64, false, 128);
- rtx tmp = immed_wide_int_const (mask, TImode);
+ tmp = immed_wide_int_const (mask, TImode);
  op0 = SUBREG_REG (op0);
  tmp = gen_rtx_AND (TImode, copy_rtx (op0), tmp);
  if (mode == DFmode)


[gcc r15-1454] i386: Zhaoxin shijidadao enablement

2024-06-19 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:6f6ea27d17e9bbc917b94ffea1c933755e736bdc

commit r15-1454-g6f6ea27d17e9bbc917b94ffea1c933755e736bdc
Author: mayshao 
Date:   Wed Jun 19 16:03:25 2024 +0200

i386: Zhaoxin shijidadao enablement

This patch enables -march/-mtune=shijidadao, costs and tunings are set
according to the characteristics of the processor.

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_zhaoxin_cpu): Recognize 
shijidadao.
* common/config/i386/i386-common.cc: Add shijidadao.
* common/config/i386/i386-cpuinfo.h (enum processor_subtypes):
Add ZHAOXIN_FAM7H_SHIJIDADAO.
* config.gcc: Add shijidadao.
* config/i386/driver-i386.cc (host_detect_local_cpu):
Let -march=native recognize shijidadao processors.
* config/i386/i386-c.cc (ix86_target_macros_internal): Add 
shijidadao.
* config/i386/i386-options.cc (m_ZHAOXIN): Add m_SHIJIDADAO.
(m_SHIJIDADAO): New definition.
* config/i386/i386.h (enum processor_type): Add 
PROCESSOR_SHIJIDADAO.
* config/i386/x86-tune-costs.h (struct processor_costs):
Add shijidadao_cost.
* config/i386/x86-tune-sched.cc (ix86_issue_rate): Add shijidadao.
(ix86_adjust_cost): Ditto.
* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Add 
m_SHIJIDADAO.
(X86_TUNE_USE_GATHER_4PARTS): Ditto.
(X86_TUNE_USE_GATHER_8PARTS): Ditto.
(X86_TUNE_AVOID_128FMA_CHAINS): Ditto.
* doc/extend.texi: Add details about shijidadao.
* doc/invoke.texi: Ditto.

gcc/testsuite/ChangeLog:

* g++.target/i386/mv32.C: Handle new -march
* gcc.target/i386/funcspec-56.inc: Ditto.

Diff:
---
 gcc/common/config/i386/cpuinfo.h  |   8 +-
 gcc/common/config/i386/i386-common.cc |   8 +-
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/config.gcc|  14 +++-
 gcc/config/i386/driver-i386.cc|  11 ++-
 gcc/config/i386/i386-c.cc |   7 ++
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.h|   1 +
 gcc/config/i386/x86-tune-costs.h  | 116 ++
 gcc/config/i386/x86-tune-sched.cc |   2 +
 gcc/config/i386/x86-tune.def  |   8 +-
 gcc/doc/extend.texi   |   3 +
 gcc/doc/invoke.texi   |   6 ++
 gcc/testsuite/g++.target/i386/mv32.C  |   6 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 15 files changed, 183 insertions(+), 14 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 4610bf6d6a45..936039725ab6 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -667,12 +667,18 @@ get_zhaoxin_cpu (struct __processor_model *cpu_model,
  reset_cpu_feature (cpu_model, cpu_features2, FEATURE_F16C);
  cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI;
}
- else if (model >= 0x5b)
+ else if (model == 0x5b)
{
  cpu = "yongfeng";
  CHECK___builtin_cpu_is ("yongfeng");
  cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_YONGFENG;
}
+ else if (model >= 0x6b)
+   {
+ cpu = "shijidadao";
+ CHECK___builtin_cpu_is ("shijidadao");
+ cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_SHIJIDADAO;
+   }
   break;
 default:
   break;
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 5d9c188c9c7d..e38b1b22ffb1 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -2066,6 +2066,7 @@ const char *const processor_names[] =
   "intel",
   "lujiazui",
   "yongfeng",
+  "shijidadao",
   "geode",
   "k6",
   "athlon",
@@ -2271,10 +2272,13 @@ const pta processor_alias_table[] =
   | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR, 0, P_NONE},
   {"lujiazui", PROCESSOR_LUJIAZUI, CPU_LUJIAZUI,
PTA_LUJIAZUI,
-   M_CPU_SUBTYPE (ZHAOXIN_FAM7H_LUJIAZUI), P_NONE},
+   M_CPU_SUBTYPE (ZHAOXIN_FAM7H_LUJIAZUI), P_PROC_BMI},
   {"yongfeng", PROCESSOR_YONGFENG, CPU_YONGFENG,
PTA_YONGFENG,
-   M_CPU_SUBTYPE (ZHAOXIN_FAM7H_YONGFENG), P_NONE},
+   M_CPU_SUBTYPE (ZHAOXIN_FAM7H_YONGFENG), P_PROC_AVX2},
+  {"shijidadao", PROCESSOR_SHIJIDADAO, CPU_YONGFENG,
+   PTA_YONGFENG,
+   M_CPU_SUBTYPE (ZHAOXIN_FAM7H_SHIJIDADAO), P_PROC_AVX2},
   {"k8", PROCESSOR_K8, CPU_K8,
 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
   | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR, 0, P_NONE},
diff --git a/gcc/common/config/i386/i386-cpuinfo.h 
b/gcc/common/config/i386/i386-cpuinfo.h
index 3ec9e005a6ad..ccc6deb63853 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -104,6 +104,7

[gcc r15-1183] i386: Use CMOV in .SAT_{ADD|SUB} expansion for TARGET_CMOV [PR112600]

2024-06-11 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:05b95238be648c9cf8af2516930af6a7b637a2b8

commit r15-1183-g05b95238be648c9cf8af2516930af6a7b637a2b8
Author: Uros Bizjak 
Date:   Tue Jun 11 16:00:31 2024 +0200

i386: Use CMOV in .SAT_{ADD|SUB} expansion for TARGET_CMOV [PR112600]

For TARGET_CMOV targets emit insn sequence involving conditonal move.

.SAT_ADD:

addl%esi, %edi
movl$-1, %eax
cmovnc  %edi, %eax
ret

.SAT_SUB:

subl%esi, %edi
movl$0, %eax
cmovnc  %edi, %eax
ret

PR target/112600

gcc/ChangeLog:

* config/i386/i386.md (usadd3): Emit insn sequence
involving conditional move for TARGET_CMOVE targets.
(ussub3): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112600-a.c: Also scan for cmov.
* gcc.target/i386/pr112600-b.c: Ditto.

Diff:
---
 gcc/config/i386/i386.md| 62 +-
 gcc/testsuite/gcc.target/i386/pr112600-a.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr112600-b.c |  2 +-
 3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d69bc8d6e482..a64f2ad4f5f0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9885,13 +9885,35 @@
   ""
 {
   rtx res = gen_reg_rtx (mode);
-  rtx msk = gen_reg_rtx (mode);
   rtx dst;
 
   emit_insn (gen_add3_cc_overflow_1 (res, operands[1], operands[2]));
-  emit_insn (gen_x86_movcc_0_m1_neg (msk));
-  dst = expand_simple_binop (mode, IOR, res, msk,
-operands[0], 1, OPTAB_WIDEN);
+
+  if (TARGET_CMOVE)
+{
+  rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+const0_rtx);
+
+  if ( < GET_MODE_SIZE (SImode))
+   {
+ dst = force_reg (mode, operands[0]);
+ emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+ gen_lowpart (SImode, res), constm1_rtx));
+   }
+   else
+   {
+ dst = operands[0];
+ emit_insn (gen_movcc (dst, cmp, res, constm1_rtx));
+   }
+}
+  else
+{
+  rtx msk = gen_reg_rtx (mode);
+
+  emit_insn (gen_x86_movcc_0_m1_neg (msk));
+  dst = expand_simple_binop (mode, IOR, res, msk,
+operands[0], 1, OPTAB_WIDEN);
+}
 
   if (!rtx_equal_p (dst, operands[0]))
 emit_move_insn (operands[0], dst);
@@ -9905,14 +9927,36 @@
   ""
 {
   rtx res = gen_reg_rtx (mode);
-  rtx msk = gen_reg_rtx (mode);
   rtx dst;
 
   emit_insn (gen_sub_3 (res, operands[1], operands[2]));
-  emit_insn (gen_x86_movcc_0_m1_neg (msk));
-  msk = expand_simple_unop (mode, NOT, msk, NULL, 1);
-  dst = expand_simple_binop (mode, AND, res, msk,
-operands[0], 1, OPTAB_WIDEN);
+
+  if (TARGET_CMOVE)
+{
+  rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+const0_rtx);
+
+  if ( < GET_MODE_SIZE (SImode))
+   {
+ dst = force_reg (mode, operands[0]);
+ emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
+ gen_lowpart (SImode, res), const0_rtx));
+   }
+   else
+   {
+ dst = operands[0];
+ emit_insn (gen_movcc (dst, cmp, res, const0_rtx));
+   }
+}
+  else
+{
+  rtx msk = gen_reg_rtx (mode);
+
+  emit_insn (gen_x86_movcc_0_m1_neg (msk));
+  msk = expand_simple_unop (mode, NOT, msk, NULL, 1);
+  dst = expand_simple_binop (mode, AND, res, msk,
+operands[0], 1, OPTAB_WIDEN);
+}
 
   if (!rtx_equal_p (dst, operands[0]))
 emit_move_insn (operands[0], dst);
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-a.c 
b/gcc/testsuite/gcc.target/i386/pr112600-a.c
index fa122bc7a3fd..2b0848604512 100644
--- a/gcc/testsuite/gcc.target/i386/pr112600-a.c
+++ b/gcc/testsuite/gcc.target/i386/pr112600-a.c
@@ -1,7 +1,7 @@
 /* PR target/112600 */
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
-/* { dg-final { scan-assembler-times "sbb" 4 } } */
+/* { dg-final { scan-assembler-times "sbb|cmov" 4 } } */
 
 unsigned char
 add_sat_char (unsigned char x, unsigned char y)
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-b.c 
b/gcc/testsuite/gcc.target/i386/pr112600-b.c
index ea14bb9738b7..ac4e26423b6f 100644
--- a/gcc/testsuite/gcc.target/i386/pr112600-b.c
+++ b/gcc/testsuite/gcc.target/i386/pr112600-b.c
@@ -1,7 +1,7 @@
 /* PR target/112600 */
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
-/* { dg-final { scan-assembler-times "sbb" 4 } } */
+/* { dg-final { scan-assembler-times "sbb|cmov" 4 } } */
 
 unsigned char
 sub_sat_char (unsigned char x, unsigned char y)


[gcc r15-1122] i386: Implement .SAT_SUB for unsigned scalar integers [PR112600]

2024-06-09 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:8bb6b2f4ae19c3aab7d7a5e5c8f5965f89d90e01

commit r15-1122-g8bb6b2f4ae19c3aab7d7a5e5c8f5965f89d90e01
Author: Uros Bizjak 
Date:   Sun Jun 9 12:09:13 2024 +0200

i386: Implement .SAT_SUB for unsigned scalar integers [PR112600]

The following testcase:

unsigned
sub_sat (unsigned x, unsigned y)
{
  unsigned res;
  res = x - y;
  res &= -(x >= y);
  return res;
}

currently compiles (-O2) to:

sub_sat:
movl%edi, %edx
xorl%eax, %eax
subl%esi, %edx
cmpl%esi, %edi
setnb   %al
negl%eax
andl%edx, %eax
ret

We can expand through ussub{m}3 optab to use carry flag from the subtraction
and generate code using SBB instruction implementing:

unsigned res = x - y;
res &= ~(-(x < y));

sub_sat:
subl%esi, %edi
sbbl%eax, %eax
notl%eax
andl%edi, %eax
ret

PR target/112600

gcc/ChangeLog:

* config/i386/i386.md (ussub3): New expander.
(sub_3): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112600-b.c: New test.

Diff:
---
 gcc/config/i386/i386.md| 31 ++-
 gcc/testsuite/gcc.target/i386/pr112600-b.c | 40 ++
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index bc2ef819df6..d69bc8d6e48 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8436,6 +8436,14 @@
   "ix86_fixup_binary_operands_no_copy (MINUS, mode, operands,
   TARGET_APX_NDD);")
 
+(define_expand "sub_3"
+  [(parallel [(set (reg:CC FLAGS_REG)
+  (compare:CC
+(match_operand:SWI 1 "nonimmediate_operand")
+(match_operand:SWI 2 "")))
+ (set (match_operand:SWI 0 "register_operand")
+  (minus:SWI (match_dup 1) (match_dup 2)))])])
+
 (define_insn "*sub_3"
   [(set (reg FLAGS_REG)
(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
@@ -9883,7 +9891,28 @@
   emit_insn (gen_add3_cc_overflow_1 (res, operands[1], operands[2]));
   emit_insn (gen_x86_movcc_0_m1_neg (msk));
   dst = expand_simple_binop (mode, IOR, res, msk,
-operands[0], 1, OPTAB_DIRECT);
+operands[0], 1, OPTAB_WIDEN);
+
+  if (!rtx_equal_p (dst, operands[0]))
+emit_move_insn (operands[0], dst);
+  DONE;
+})
+
+(define_expand "ussub3"
+  [(set (match_operand:SWI 0 "register_operand")
+   (us_minus:SWI (match_operand:SWI 1 "register_operand")
+ (match_operand:SWI 2 "")))]
+  ""
+{
+  rtx res = gen_reg_rtx (mode);
+  rtx msk = gen_reg_rtx (mode);
+  rtx dst;
+
+  emit_insn (gen_sub_3 (res, operands[1], operands[2]));
+  emit_insn (gen_x86_movcc_0_m1_neg (msk));
+  msk = expand_simple_unop (mode, NOT, msk, NULL, 1);
+  dst = expand_simple_binop (mode, AND, res, msk,
+operands[0], 1, OPTAB_WIDEN);
 
   if (!rtx_equal_p (dst, operands[0]))
 emit_move_insn (operands[0], dst);
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-b.c 
b/gcc/testsuite/gcc.target/i386/pr112600-b.c
new file mode 100644
index 000..ea14bb9738b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-b.c
@@ -0,0 +1,40 @@
+/* PR target/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "sbb" 4 } } */
+
+unsigned char
+sub_sat_char (unsigned char x, unsigned char y)
+{
+  unsigned char res;
+  res = x - y;
+  res &= -(x >= y);
+  return res;
+}
+
+unsigned short
+sub_sat_short (unsigned short x, unsigned short y)
+{
+  unsigned short res;
+  res = x - y;
+  res &= -(x >= y);
+  return res;
+}
+
+unsigned int
+sub_sat_int (unsigned int x, unsigned int y)
+{
+  unsigned int res;
+  res = x - y;
+  res &= -(x >= y);
+  return res;
+}
+
+unsigned long
+sub_sat_long (unsigned long x, unsigned long y)
+{
+  unsigned long res;
+  res = x - y;
+  res &= -(x >= y);
+  return res;
+}


[gcc r15-1113] i386: Implement .SAT_ADD for unsigned scalar integers [PR112600]

2024-06-08 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:de05e44b2ad9638d04173393b1eae3c38b2c3864

commit r15-1113-gde05e44b2ad9638d04173393b1eae3c38b2c3864
Author: Uros Bizjak 
Date:   Sat Jun 8 12:17:11 2024 +0200

i386: Implement .SAT_ADD for unsigned scalar integers [PR112600]

The following testcase:

unsigned
add_sat(unsigned x, unsigned y)
{
unsigned z;
return __builtin_add_overflow(x, y, &z) ? -1u : z;
}

currently compiles (-O2) to:

add_sat:
addl%esi, %edi
jc  .L3
movl%edi, %eax
ret
.L3:
orl $-1, %eax
ret

We can expand through usadd{m}3 optab to use carry flag from the addition
and generate branchless code using SBB instruction implementing:

unsigned res = x + y;
res |= -(res < x);

add_sat:
addl%esi, %edi
sbbl%eax, %eax
orl %edi, %eax
ret

PR target/112600

gcc/ChangeLog:

* config/i386/i386.md (usadd3): New expander.
(x86_movcc_0_m1_neg): Use SWI mode iterator.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112600-a.c: New test.

Diff:
---
 gcc/config/i386/i386.md| 24 --
 gcc/testsuite/gcc.target/i386/pr112600-a.c | 32 ++
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ffcf63e1cba..bc2ef819df6 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9870,6 +9870,26 @@
 operands[1] = force_reg (mode, operands[1]);
 })
 
+(define_expand "usadd3"
+  [(set (match_operand:SWI 0 "register_operand")
+   (us_plus:SWI (match_operand:SWI 1 "register_operand")
+(match_operand:SWI 2 "")))]
+  ""
+{
+  rtx res = gen_reg_rtx (mode);
+  rtx msk = gen_reg_rtx (mode);
+  rtx dst;
+
+  emit_insn (gen_add3_cc_overflow_1 (res, operands[1], operands[2]));
+  emit_insn (gen_x86_movcc_0_m1_neg (msk));
+  dst = expand_simple_binop (mode, IOR, res, msk,
+operands[0], 1, OPTAB_DIRECT);
+
+  if (!rtx_equal_p (dst, operands[0]))
+emit_move_insn (operands[0], dst);
+  DONE;
+})
+
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "xf3"
@@ -24945,8 +24965,8 @@
 
 (define_expand "x86_movcc_0_m1_neg"
   [(parallel
-[(set (match_operand:SWI48 0 "register_operand")
- (neg:SWI48 (ltu:SWI48 (reg:CCC FLAGS_REG) (const_int 0
+[(set (match_operand:SWI 0 "register_operand")
+ (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0
  (clobber (reg:CC FLAGS_REG))])])
 
 (define_split
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-a.c 
b/gcc/testsuite/gcc.target/i386/pr112600-a.c
new file mode 100644
index 000..fa122bc7a3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-a.c
@@ -0,0 +1,32 @@
+/* PR target/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "sbb" 4 } } */
+
+unsigned char
+add_sat_char (unsigned char x, unsigned char y)
+{
+  unsigned char z;
+  return __builtin_add_overflow(x, y, &z) ? -1u : z;
+}
+
+unsigned short
+add_sat_short (unsigned short x, unsigned short y)
+{
+  unsigned short z;
+  return __builtin_add_overflow(x, y, &z) ? -1u : z;
+}
+
+unsigned int
+add_sat_int (unsigned int x, unsigned int y)
+{
+  unsigned int z;
+  return __builtin_add_overflow(x, y, &z) ? -1u : z;
+}
+
+unsigned long
+add_sat_long (unsigned long x, unsigned long y)
+{
+  unsigned long z;
+  return __builtin_add_overflow(x, y, &z) ? -1ul : z;
+}


[gcc r15-1077] testsuite/i386: Add vector sat_sub testcases [PR112600]

2024-06-06 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:366d45c8d4911dc7874d2e64cf2583c0133b8dd5

commit r15-1077-g366d45c8d4911dc7874d2e64cf2583c0133b8dd5
Author: Uros Bizjak 
Date:   Thu Jun 6 19:18:41 2024 +0200

testsuite/i386: Add vector sat_sub testcases [PR112600]

PR middle-end/112600

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112600-2a.c: New test.
* gcc.target/i386/pr112600-2b.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr112600-2a.c | 15 +++
 gcc/testsuite/gcc.target/i386/pr112600-2b.c | 15 +++
 2 files changed, 30 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr112600-2a.c 
b/gcc/testsuite/gcc.target/i386/pr112600-2a.c
new file mode 100644
index 000..4df38e5a720
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-2a.c
@@ -0,0 +1,15 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+typedef unsigned char T;
+
+void foo (T *out, T *x, T *y, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i]));
+}
+
+/* { dg-final { scan-assembler "psubusb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-2b.c 
b/gcc/testsuite/gcc.target/i386/pr112600-2b.c
new file mode 100644
index 000..0f6345de704
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-2b.c
@@ -0,0 +1,15 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+typedef unsigned short T;
+
+void foo (T *out, T *x, T *y, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i]));
+}
+
+/* { dg-final { scan-assembler "psubusw" } } */


[gcc r11-11463] alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

2024-06-03 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:835b913aff1b1a813df3b9d2bbef170ae7d8856d

commit r11-11463-g835b913aff1b1a813df3b9d2bbef170ae7d8856d
Author: Uros Bizjak 
Date:   Fri May 31 15:52:03 2024 +0200

alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

any_divmod instructions are modelled with invalid RTX:

  [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
[(match_operand:DI 1 "register_operand" "a")
 (match_operand:DI 2 "register_operand" "b")])))
   (clobber (reg:DI 23))
   (clobber (reg:DI 28))]

where SImode divmod_operator (div,mod,udiv,umod) has DImode operands.

Wrap input operand with truncate:SI to make machine modes consistent.

PR target/115297

gcc/ChangeLog:

* config/alpha/alpha.md (si3): Wrap DImode
operands 3 and 4 with truncate:SI RTX.
(*divmodsi_internal_er): Ditto for operands 1 and 2.
(*divmodsi_internal_er_1): Ditto.
(*divmodsi_internal): Ditto.
* config/alpha/constraints.md ("b"): Correct register
number in the description.

gcc/testsuite/ChangeLog:

* gcc.target/alpha/pr115297.c: New test.

(cherry picked from commit 0ac802064c2a018cf166c37841697e867de65a95)

Diff:
---
 gcc/config/alpha/alpha.md | 21 -
 gcc/config/alpha/constraints.md   |  2 +-
 gcc/testsuite/gcc.target/alpha/pr115297.c | 13 +
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 98d09d43721..6ee8eb81df8 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -756,7 +756,8 @@
(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand")))
(parallel [(set (match_dup 5)
   (sign_extend:DI
-   (any_divmod:SI (match_dup 3) (match_dup 4
+   (any_divmod:SI (truncate:SI (match_dup 3))
+  (truncate:SI (match_dup 4)
  (clobber (reg:DI 23))
  (clobber (reg:DI 28))])
(set (match_operand:SI 0 "nonimmediate_operand")
@@ -782,9 +783,10 @@
 
 (define_insn_and_split "*divmodsi_internal_er"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
@@ -826,8 +828,8 @@
 (define_insn "*divmodsi_internal_er_1"
   [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
-[(match_operand:DI 1 "register_operand" "a")
- (match_operand:DI 2 "register_operand" "b")])))
+[(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+ (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(use (match_operand:DI 4 "register_operand" "c"))
(use (match_operand 5 "const_int_operand"))
(clobber (reg:DI 23))
@@ -839,9 +841,10 @@
 
 (define_insn "*divmodsi_internal"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_ABI_OSF"
diff --git a/gcc/config/alpha/constraints.md b/gcc/config/alpha/constraints.md
index e75a1489b4b..4b245233644 100644
--- a/gcc/config/alpha/constraints.md
+++ b/gcc/config/alpha/constraints.md
@@ -27,7 +27,7 @@
  "General register 24, input to division routine")
 
 (define_register_constraint "b" "R25_REG"
- "General register 24, input to division routine")
+ "General register 25, input to division routine")
 
 (define_register_constraint "c" "R27_REG"
  "General register 27, function call address")
diff --git a/gcc/testsuite/gcc.target/alpha/pr115297.c 
b/gcc/testsuite/gcc.target/alpha/pr115297.c
new file mode 100644
index 000..4d5890ec8d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/pr115297.c
@@ -0,0 +1,13 @@
+/* PR target/115297 */
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+enum { BPF_F_USER_BUILD_ID } __bpf_get_stack_s

[gcc r12-10486] alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

2024-06-03 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:c6c2a6cebabc5f78cef3d81cedb4b3b578478b9f

commit r12-10486-gc6c2a6cebabc5f78cef3d81cedb4b3b578478b9f
Author: Uros Bizjak 
Date:   Fri May 31 15:52:03 2024 +0200

alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

any_divmod instructions are modelled with invalid RTX:

  [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
[(match_operand:DI 1 "register_operand" "a")
 (match_operand:DI 2 "register_operand" "b")])))
   (clobber (reg:DI 23))
   (clobber (reg:DI 28))]

where SImode divmod_operator (div,mod,udiv,umod) has DImode operands.

Wrap input operand with truncate:SI to make machine modes consistent.

PR target/115297

gcc/ChangeLog:

* config/alpha/alpha.md (si3): Wrap DImode
operands 3 and 4 with truncate:SI RTX.
(*divmodsi_internal_er): Ditto for operands 1 and 2.
(*divmodsi_internal_er_1): Ditto.
(*divmodsi_internal): Ditto.
* config/alpha/constraints.md ("b"): Correct register
number in the description.

gcc/testsuite/ChangeLog:

* gcc.target/alpha/pr115297.c: New test.

(cherry picked from commit 0ac802064c2a018cf166c37841697e867de65a95)

Diff:
---
 gcc/config/alpha/alpha.md | 21 -
 gcc/config/alpha/constraints.md   |  2 +-
 gcc/testsuite/gcc.target/alpha/pr115297.c | 13 +
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 87514330c22..442953fe50e 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -756,7 +756,8 @@
(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand")))
(parallel [(set (match_dup 5)
   (sign_extend:DI
-   (any_divmod:SI (match_dup 3) (match_dup 4
+   (any_divmod:SI (truncate:SI (match_dup 3))
+  (truncate:SI (match_dup 4)
  (clobber (reg:DI 23))
  (clobber (reg:DI 28))])
(set (match_operand:SI 0 "nonimmediate_operand")
@@ -782,9 +783,10 @@
 
 (define_insn_and_split "*divmodsi_internal_er"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
@@ -826,8 +828,8 @@
 (define_insn "*divmodsi_internal_er_1"
   [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
-[(match_operand:DI 1 "register_operand" "a")
- (match_operand:DI 2 "register_operand" "b")])))
+[(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+ (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(use (match_operand:DI 4 "register_operand" "c"))
(use (match_operand 5 "const_int_operand"))
(clobber (reg:DI 23))
@@ -839,9 +841,10 @@
 
 (define_insn "*divmodsi_internal"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_ABI_OSF"
diff --git a/gcc/config/alpha/constraints.md b/gcc/config/alpha/constraints.md
index a41b6471b9c..fd93525e36c 100644
--- a/gcc/config/alpha/constraints.md
+++ b/gcc/config/alpha/constraints.md
@@ -27,7 +27,7 @@
  "General register 24, input to division routine")
 
 (define_register_constraint "b" "R25_REG"
- "General register 24, input to division routine")
+ "General register 25, input to division routine")
 
 (define_register_constraint "c" "R27_REG"
  "General register 27, function call address")
diff --git a/gcc/testsuite/gcc.target/alpha/pr115297.c 
b/gcc/testsuite/gcc.target/alpha/pr115297.c
new file mode 100644
index 000..4d5890ec8d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/pr115297.c
@@ -0,0 +1,13 @@
+/* PR target/115297 */
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+enum { BPF_F_USER_BUILD_ID } __bpf_get_stack_s

[gcc r13-8820] alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

2024-06-03 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:ed06ca80bae174f1179222ff8e6b93464006e86a

commit r13-8820-ged06ca80bae174f1179222ff8e6b93464006e86a
Author: Uros Bizjak 
Date:   Fri May 31 15:52:03 2024 +0200

alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

any_divmod instructions are modelled with invalid RTX:

  [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
[(match_operand:DI 1 "register_operand" "a")
 (match_operand:DI 2 "register_operand" "b")])))
   (clobber (reg:DI 23))
   (clobber (reg:DI 28))]

where SImode divmod_operator (div,mod,udiv,umod) has DImode operands.

Wrap input operand with truncate:SI to make machine modes consistent.

PR target/115297

gcc/ChangeLog:

* config/alpha/alpha.md (si3): Wrap DImode
operands 3 and 4 with truncate:SI RTX.
(*divmodsi_internal_er): Ditto for operands 1 and 2.
(*divmodsi_internal_er_1): Ditto.
(*divmodsi_internal): Ditto.
* config/alpha/constraints.md ("b"): Correct register
number in the description.

gcc/testsuite/ChangeLog:

* gcc.target/alpha/pr115297.c: New test.

(cherry picked from commit 0ac802064c2a018cf166c37841697e867de65a95)

Diff:
---
 gcc/config/alpha/alpha.md | 21 -
 gcc/config/alpha/constraints.md   |  2 +-
 gcc/testsuite/gcc.target/alpha/pr115297.c | 13 +
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index d91742496d0..17dfc4a5868 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -756,7 +756,8 @@
(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand")))
(parallel [(set (match_dup 5)
   (sign_extend:DI
-   (any_divmod:SI (match_dup 3) (match_dup 4
+   (any_divmod:SI (truncate:SI (match_dup 3))
+  (truncate:SI (match_dup 4)
  (clobber (reg:DI 23))
  (clobber (reg:DI 28))])
(set (match_operand:SI 0 "nonimmediate_operand")
@@ -782,9 +783,10 @@
 
 (define_insn_and_split "*divmodsi_internal_er"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
@@ -826,8 +828,8 @@
 (define_insn "*divmodsi_internal_er_1"
   [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
-[(match_operand:DI 1 "register_operand" "a")
- (match_operand:DI 2 "register_operand" "b")])))
+[(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+ (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(use (match_operand:DI 4 "register_operand" "c"))
(use (match_operand 5 "const_int_operand"))
(clobber (reg:DI 23))
@@ -839,9 +841,10 @@
 
 (define_insn "*divmodsi_internal"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_ABI_OSF"
diff --git a/gcc/config/alpha/constraints.md b/gcc/config/alpha/constraints.md
index ac3a5293732..2c0c276d491 100644
--- a/gcc/config/alpha/constraints.md
+++ b/gcc/config/alpha/constraints.md
@@ -27,7 +27,7 @@
  "General register 24, input to division routine")
 
 (define_register_constraint "b" "R25_REG"
- "General register 24, input to division routine")
+ "General register 25, input to division routine")
 
 (define_register_constraint "c" "R27_REG"
  "General register 27, function call address")
diff --git a/gcc/testsuite/gcc.target/alpha/pr115297.c 
b/gcc/testsuite/gcc.target/alpha/pr115297.c
new file mode 100644
index 000..4d5890ec8d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/pr115297.c
@@ -0,0 +1,13 @@
+/* PR target/115297 */
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+enum { BPF_F_USER_BUILD_ID } __bpf_get_stack_si

[gcc r15-993] i386: Force operand 1 of bswapsi2 to a register for !TARGET_BSWAP [PR115321]

2024-06-03 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:6ab5145825ca7e96fcbe3aa505d42e4ae8f81009

commit r15-993-g6ab5145825ca7e96fcbe3aa505d42e4ae8f81009
Author: Uros Bizjak 
Date:   Mon Jun 3 15:48:18 2024 +0200

i386: Force operand 1 of bswapsi2 to a register for !TARGET_BSWAP [PR115321]

PR target/115321

gcc/ChangeLog:

* config/i386/i386.md (bswapsi2): Force operand 1
to a register also for !TARGET_BSWAP.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr115321.c: New test.

Diff:
---
 gcc/config/i386/i386.md  | 21 +++--
 gcc/testsuite/gcc.target/i386/pr115321.c |  4 
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2c95395b7be..ef83984d00e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -21193,18 +21193,19 @@
(bswap:SI (match_operand:SI 1 "nonimmediate_operand")))]
   ""
 {
-  if (TARGET_MOVBE)
-;
-  else if (TARGET_BSWAP)
-operands[1] = force_reg (SImode, operands[1]);
-  else
+  if (!TARGET_MOVBE)
 {
-  rtx x = gen_reg_rtx (SImode);
+  operands[1] = force_reg (SImode, operands[1]);
 
-  emit_insn (gen_bswaphisi2_lowpart (x, operands[1]));
-  emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
-  emit_insn (gen_bswaphisi2_lowpart (operands[0], x));
-  DONE;
+  if (!TARGET_BSWAP)
+   {
+ rtx x = gen_reg_rtx (SImode);
+
+ emit_insn (gen_bswaphisi2_lowpart (x, operands[1]));
+ emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
+ emit_insn (gen_bswaphisi2_lowpart (operands[0], x));
+ DONE;
+   }
 }
 })
 
diff --git a/gcc/testsuite/gcc.target/i386/pr115321.c 
b/gcc/testsuite/gcc.target/i386/pr115321.c
new file mode 100644
index 000..0ddab9bd7a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115321.c
@@ -0,0 +1,4 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-march=i386" } */
+
+unsigned foo (unsigned x) { return __builtin_bswap32 (x); }


[gcc r14-10264] alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

2024-05-31 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:ec92744de552303a1424085203e1311bd9146f21

commit r14-10264-gec92744de552303a1424085203e1311bd9146f21
Author: Uros Bizjak 
Date:   Fri May 31 15:52:03 2024 +0200

alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

any_divmod instructions are modelled with invalid RTX:

  [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
[(match_operand:DI 1 "register_operand" "a")
 (match_operand:DI 2 "register_operand" "b")])))
   (clobber (reg:DI 23))
   (clobber (reg:DI 28))]

where SImode divmod_operator (div,mod,udiv,umod) has DImode operands.

Wrap input operand with truncate:SI to make machine modes consistent.

PR target/115297

gcc/ChangeLog:

* config/alpha/alpha.md (si3): Wrap DImode
operands 3 and 4 with truncate:SI RTX.
(*divmodsi_internal_er): Ditto for operands 1 and 2.
(*divmodsi_internal_er_1): Ditto.
(*divmodsi_internal): Ditto.
* config/alpha/constraints.md ("b"): Correct register
number in the description.

gcc/testsuite/ChangeLog:

* gcc.target/alpha/pr115297.c: New test.

(cherry picked from commit 0ac802064c2a018cf166c37841697e867de65a95)

Diff:
---
 gcc/config/alpha/alpha.md | 21 -
 gcc/config/alpha/constraints.md   |  2 +-
 gcc/testsuite/gcc.target/alpha/pr115297.c | 13 +
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 79f12c53c16..1e2de5a4d15 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -725,7 +725,8 @@
(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand")))
(parallel [(set (match_dup 5)
   (sign_extend:DI
-   (any_divmod:SI (match_dup 3) (match_dup 4
+   (any_divmod:SI (truncate:SI (match_dup 3))
+  (truncate:SI (match_dup 4)
  (clobber (reg:DI 23))
  (clobber (reg:DI 28))])
(set (match_operand:SI 0 "nonimmediate_operand")
@@ -751,9 +752,10 @@
 
 (define_insn_and_split "*divmodsi_internal_er"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
@@ -795,8 +797,8 @@
 (define_insn "*divmodsi_internal_er_1"
   [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
-[(match_operand:DI 1 "register_operand" "a")
- (match_operand:DI 2 "register_operand" "b")])))
+[(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+ (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(use (match_operand:DI 4 "register_operand" "c"))
(use (match_operand 5 "const_int_operand"))
(clobber (reg:DI 23))
@@ -808,9 +810,10 @@
 
 (define_insn "*divmodsi_internal"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_ABI_OSF"
diff --git a/gcc/config/alpha/constraints.md b/gcc/config/alpha/constraints.md
index 0d001ba26f1..4383f1fa895 100644
--- a/gcc/config/alpha/constraints.md
+++ b/gcc/config/alpha/constraints.md
@@ -27,7 +27,7 @@
  "General register 24, input to division routine")
 
 (define_register_constraint "b" "R25_REG"
- "General register 24, input to division routine")
+ "General register 25, input to division routine")
 
 (define_register_constraint "c" "R27_REG"
  "General register 27, function call address")
diff --git a/gcc/testsuite/gcc.target/alpha/pr115297.c 
b/gcc/testsuite/gcc.target/alpha/pr115297.c
new file mode 100644
index 000..4d5890ec8d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/pr115297.c
@@ -0,0 +1,13 @@
+/* PR target/115297 */
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+enum { BPF_F_USER_BUILD_ID } __bpf_get_stack_s

[gcc r15-943] alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

2024-05-31 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:0ac802064c2a018cf166c37841697e867de65a95

commit r15-943-g0ac802064c2a018cf166c37841697e867de65a95
Author: Uros Bizjak 
Date:   Fri May 31 15:52:03 2024 +0200

alpha: Fix invalid RTX in divmodsi insn patterns [PR115297]

any_divmod instructions are modelled with invalid RTX:

  [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
[(match_operand:DI 1 "register_operand" "a")
 (match_operand:DI 2 "register_operand" "b")])))
   (clobber (reg:DI 23))
   (clobber (reg:DI 28))]

where SImode divmod_operator (div,mod,udiv,umod) has DImode operands.

Wrap input operand with truncate:SI to make machine modes consistent.

PR target/115297

gcc/ChangeLog:

* config/alpha/alpha.md (si3): Wrap DImode
operands 3 and 4 with truncate:SI RTX.
(*divmodsi_internal_er): Ditto for operands 1 and 2.
(*divmodsi_internal_er_1): Ditto.
(*divmodsi_internal): Ditto.
* config/alpha/constraints.md ("b"): Correct register
number in the description.

gcc/testsuite/ChangeLog:

* gcc.target/alpha/pr115297.c: New test.

Diff:
---
 gcc/config/alpha/alpha.md | 21 -
 gcc/config/alpha/constraints.md   |  2 +-
 gcc/testsuite/gcc.target/alpha/pr115297.c | 13 +
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 79f12c53c16..1e2de5a4d15 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -725,7 +725,8 @@
(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand")))
(parallel [(set (match_dup 5)
   (sign_extend:DI
-   (any_divmod:SI (match_dup 3) (match_dup 4
+   (any_divmod:SI (truncate:SI (match_dup 3))
+  (truncate:SI (match_dup 4)
  (clobber (reg:DI 23))
  (clobber (reg:DI 28))])
(set (match_operand:SI 0 "nonimmediate_operand")
@@ -751,9 +752,10 @@
 
 (define_insn_and_split "*divmodsi_internal_er"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
@@ -795,8 +797,8 @@
 (define_insn "*divmodsi_internal_er_1"
   [(set (match_operand:DI 0 "register_operand" "=c")
(sign_extend:DI (match_operator:SI 3 "divmod_operator"
-[(match_operand:DI 1 "register_operand" "a")
- (match_operand:DI 2 "register_operand" "b")])))
+[(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+ (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(use (match_operand:DI 4 "register_operand" "c"))
(use (match_operand 5 "const_int_operand"))
(clobber (reg:DI 23))
@@ -808,9 +810,10 @@
 
 (define_insn "*divmodsi_internal"
   [(set (match_operand:DI 0 "register_operand" "=c")
-   (sign_extend:DI (match_operator:SI 3 "divmod_operator"
-   [(match_operand:DI 1 "register_operand" "a")
-(match_operand:DI 2 "register_operand" "b")])))
+   (sign_extend:DI
+(match_operator:SI 3 "divmod_operator"
+ [(truncate:SI (match_operand:DI 1 "register_operand" "a"))
+  (truncate:SI (match_operand:DI 2 "register_operand" "b"))])))
(clobber (reg:DI 23))
(clobber (reg:DI 28))]
   "TARGET_ABI_OSF"
diff --git a/gcc/config/alpha/constraints.md b/gcc/config/alpha/constraints.md
index 0d001ba26f1..4383f1fa895 100644
--- a/gcc/config/alpha/constraints.md
+++ b/gcc/config/alpha/constraints.md
@@ -27,7 +27,7 @@
  "General register 24, input to division routine")
 
 (define_register_constraint "b" "R25_REG"
- "General register 24, input to division routine")
+ "General register 25, input to division routine")
 
 (define_register_constraint "c" "R27_REG"
  "General register 27, function call address")
diff --git a/gcc/testsuite/gcc.target/alpha/pr115297.c 
b/gcc/testsuite/gcc.target/alpha/pr115297.c
new file mode 100644
index 000..4d5890ec8d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/pr115297.c
@@ -0,0 +1,13 @@
+/* PR target/115297 */
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+enum { BPF_F_USER_BUILD_ID } __bpf_get_stack_size;
+long __bpf_get_stack_flags, bpf_get_stack___trans_tmp_2;
+
+void bpf_get_s

[gcc r15-930] i386: Rewrite bswaphi2 handling [PR115102]

2024-05-30 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:e715204f203d318524ae86f3f2a1e8d5d7cb08dc

commit r15-930-ge715204f203d318524ae86f3f2a1e8d5d7cb08dc
Author: Uros Bizjak 
Date:   Thu May 30 21:27:42 2024 +0200

i386: Rewrite bswaphi2 handling [PR115102]

Introduce *bswaphi2 instruction pattern and enable bswaphi2 expander
also for non-movbe targets.  The testcase:

unsigned short bswap8 (unsigned short val)
{
  return ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
}

now expands through bswaphi2 named expander.

Rewrite bswaphi_lowpart insn pattern as bswaphisi2_lowpart in the RTX form
that combine pass can use to simplify:

Trying 6, 9, 8 -> 10:
6: r99:SI=bswap(r103:SI)
9: {r107:SI=r103:SI&0x;clobber flags:CC;}
  REG_DEAD r103:SI
  REG_UNUSED flags:CC
8: {r106:SI=r99:SI 0>>0x10;clobber flags:CC;}
  REG_DEAD r99:SI
  REG_UNUSED flags:CC
   10: {r104:SI=r106:SI|r107:SI;clobber flags:CC;}
  REG_DEAD r107:SI
  REG_DEAD r106:SI
  REG_UNUSED flags:CC

Successfully matched this instruction:
(set (reg:SI 104 [ _8 ])
(ior:SI (and:SI (reg/v:SI 103 [ val ])
(const_int -65536 [0x]))
(lshiftrt:SI (bswap:SI (reg/v:SI 103 [ val ]))
(const_int 16 [0x10]
allowing combination of insns 6, 8, 9 and 10

when compiling the following testcase:

unsigned int bswap8 (unsigned int val)
{
  return (val & 0x) | ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
}

to produce:

movl%edi, %eax
xchgb   %ah, %al
ret

The expansion now always goes through a clobberless form of the bswaphi
instruction.  The instruction is conditionally converted to a rotate at
peephole2 pass.  This significantly simplifies bswaphisi2_lowpart
insn pattern attributes.

PR target/115102

gcc/ChangeLog:

* config/i386/i386.md (bswaphi2): Also enable for !TARGET_MOVBE.
(*bswaphi2): New insn pattern.
(bswaphisi2_lowpart): Rename from bswaphi_lowpart.  Rewrite
insn RTX to match the expected form of the combine pass.
Remove rol{w} alternative and corresponding attributes.
(bswsaphisi2_lowpart peephole2): New peephole2 pattern to
conditionally convert bswaphisi2_lowpart to rotlhi3_1_slp.
(bswapsi2): Update expander for rename.
(rotlhi3_1_slp splitter): Conditionally split to bswaphi2.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr115102.c: New test.

Diff:
---
 gcc/config/i386/i386.md  | 77 +---
 gcc/testsuite/gcc.target/i386/pr115102.c | 10 +
 2 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c162cd42386..375654cf74e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17210,9 +17210,7 @@
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed
   && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
- [(parallel [(set (strict_low_part (match_dup 0))
- (bswap:HI (match_dup 0)))
-(clobber (reg:CC FLAGS_REG))])])
+ [(set (match_dup 0) (bswap:HI (match_dup 0)))])
 
 ;; Rotations through carry flag
 (define_insn "rcrsi2"
@@ -20730,12 +20728,11 @@
 operands[1] = force_reg (SImode, operands[1]);
   else
 {
-  rtx x = operands[0];
+  rtx x = gen_reg_rtx (SImode);
 
-  emit_move_insn (x, operands[1]);
-  emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+  emit_insn (gen_bswaphisi2_lowpart (x, operands[1]));
   emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
-  emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+  emit_insn (gen_bswaphisi2_lowpart (operands[0], x));
   DONE;
 }
 })
@@ -20767,7 +20764,11 @@
 (define_expand "bswaphi2"
   [(set (match_operand:HI 0 "register_operand")
(bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
-  "TARGET_MOVBE")
+  ""
+{
+  if (!TARGET_MOVBE)
+operands[1] = force_reg (HImode, operands[1]);
+})
 
 (define_insn "*bswaphi2_movbe"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
@@ -20788,33 +20789,55 @@
(set_attr "bdver1_decode" "double,*,*")
(set_attr "mode" "QI,HI,HI")])
 
+(define_insn "*bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=Q")
+   (bswap:HI (match_operand:HI 1 "register_operand" "0")))]
+  "!TARGET_MOVBE"
+  "xchg{b}\t{%h0, %b0|%b0, %h0}"
+  [(set_attr "type" "imov")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "mode" "QI")])
+
 (define_peephole2
   [(set (match_operand:HI 0 "general_reg_operand")
(bswap:HI (match_dup 0)))]
-  "TARGET_MO

[gcc r15-876] i386: Improve access to _Atomic DImode location via XMM regs for SSE4.1 x86_32 targets

2024-05-28 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:91d79053f2b416cb9e97d9c0c3fb5b73075289e6

commit r15-876-g91d79053f2b416cb9e97d9c0c3fb5b73075289e6
Author: Uros Bizjak 
Date:   Tue May 28 20:25:14 2024 +0200

i386: Improve access to _Atomic DImode location via XMM regs for SSE4.1 
x86_32 targets

Use MOVD/PEXTRD and MOVD/PINSRD insn sequences to move DImode value
between XMM and GPR register sets for SSE4.1 x86_32 targets in order
to avoid spilling the value to stack.

The load from _Atomic location a improves from:

movqa, %xmm0
movq%xmm0, (%esp)
movl(%esp), %eax
movl4(%esp), %edx

to:
movqa, %xmm0
movd%xmm0, %eax
pextrd  $1, %xmm0, %edx

The store to _Atomic location b improves from:

movl%eax, (%esp)
movl%edx, 4(%esp)
movq(%esp), %xmm0
movq%xmm0, b

to:
movd%eax, %xmm0
pinsrd  $1, %edx, %xmm0
movq%xmm0, b

gcc/ChangeLog:

* config/i386/sync.md (atomic_loaddi_fpu): Use movd/pextrd
to move DImode value from XMM to GPR for TARGET_SSE4_1.
(atomic_storedi_fpu): Use movd/pinsrd to move DImode value
from GPR to XMM for TARGET_SSE4_1.

Diff:
---
 gcc/config/i386/sync.md | 36 
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 8317581ebe2..f2b3ba0aa7a 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -215,8 +215,18 @@
}
   else
{
+ rtx tmpdi = gen_lowpart (DImode, tmp);
+
  emit_insn (gen_loaddi_via_sse (tmp, src));
- emit_insn (gen_storedi_via_sse (mem, tmp));
+
+ if (GENERAL_REG_P (dst)
+ && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_FROM_VEC)
+   {
+ emit_move_insn (dst, tmpdi);
+ DONE;
+   }
+ else
+   emit_move_insn (mem, tmpdi);
}
 
   if (mem != dst)
@@ -294,20 +304,30 @@
 emit_move_insn (dst, src);
   else
 {
-  if (REG_P (src))
-   {
- emit_move_insn (mem, src);
- src = mem;
-   }
-
   if (STACK_REG_P (tmp))
{
+ if (GENERAL_REG_P (src))
+   {
+ emit_move_insn (mem, src);
+ src = mem;
+   }
+
  emit_insn (gen_loaddi_via_fpu (tmp, src));
  emit_insn (gen_storedi_via_fpu (dst, tmp));
}
   else
{
- emit_insn (gen_loaddi_via_sse (tmp, src));
+ rtx tmpdi = gen_lowpart (DImode, tmp);
+
+ if (GENERAL_REG_P (src)
+ && !(TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC))
+   {
+ emit_move_insn (mem, src);
+ src = mem;
+   }
+
+ emit_move_insn (tmpdi, src);
+
  emit_insn (gen_storedi_via_sse (dst, tmp));
}
 }


[gcc r11-11454] ubsan: Use right address space for MEM_REF created for bool/enum sanitization [PR115172]

2024-05-28 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:d8985ea10c911c994e00dbd6a08dcae907ebc1f7

commit r11-11454-gd8985ea10c911c994e00dbd6a08dcae907ebc1f7
Author: Jakub Jelinek 
Date:   Wed May 22 09:12:28 2024 +0200

ubsan: Use right address space for MEM_REF created for bool/enum 
sanitization [PR115172]

The following testcase is miscompiled, because -fsanitize=bool,enum
creates a MEM_REF without propagating there address space qualifiers,
so what should be normally loaded using say %gs:/%fs: segment prefix
isn't.  Together with asan it then causes that load to be sanitized.

2024-05-22  Jakub Jelinek  

PR sanitizer/115172
* ubsan.c (instrument_bool_enum_load): If rhs is not in generic
address space, use qualified version of utype with the right
address space.  Formatting fix.

* gcc.dg/asan/pr115172.c: New test.

(cherry picked from commit d3c506eff54fcbac389a529c2e98da108a410b7f)

Diff:
---
 gcc/testsuite/gcc.dg/asan/pr115172.c | 20 
 gcc/ubsan.c  |  6 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/asan/pr115172.c 
b/gcc/testsuite/gcc.dg/asan/pr115172.c
new file mode 100644
index 000..8707e615733
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asan/pr115172.c
@@ -0,0 +1,20 @@
+/* PR sanitizer/115172 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fsanitize=address,bool -ffat-lto-objects 
-fdump-tree-asan1" } */
+/* { dg-final { scan-tree-dump-not "\.ASAN_CHECK " "asan1" } } */
+
+#ifdef __x86_64__
+#define SEG __seg_gs
+#else
+#define SEG __seg_fs
+#endif
+
+extern struct S { _Bool b; } s;
+void bar (void);
+
+void
+foo (void)
+{
+  if (*(volatile _Bool SEG *) (__UINTPTR_TYPE__) &s.b)
+bar ();
+}
diff --git a/gcc/ubsan.c b/gcc/ubsan.c
index 2b12651b440..f77dee5fddd 100644
--- a/gcc/ubsan.c
+++ b/gcc/ubsan.c
@@ -1703,13 +1703,17 @@ instrument_bool_enum_load (gimple_stmt_iterator *gsi)
   || TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
 return;
 
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (rhs));
+  if (as != TYPE_ADDR_SPACE (utype))
+utype = build_qualified_type (utype, TYPE_QUALS (utype)
+| ENCODE_QUAL_ADDR_SPACE (as));
   bool ends_bb = stmt_ends_bb_p (stmt);
   location_t loc = gimple_location (stmt);
   tree lhs = gimple_assign_lhs (stmt);
   tree ptype = build_pointer_type (TREE_TYPE (rhs));
   tree atype = reference_alias_ptr_type (rhs);
   gimple *g = gimple_build_assign (make_ssa_name (ptype),
- build_fold_addr_expr (rhs));
+  build_fold_addr_expr (rhs));
   gimple_set_location (g, loc);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
   tree mem = build2 (MEM_REF, utype, gimple_assign_lhs (g),


[gcc r12-10477] ubsan: Use right address space for MEM_REF created for bool/enum sanitization [PR115172]

2024-05-28 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:da9b7a507ef38287cc16bc88e808293019f9f531

commit r12-10477-gda9b7a507ef38287cc16bc88e808293019f9f531
Author: Jakub Jelinek 
Date:   Wed May 22 09:12:28 2024 +0200

ubsan: Use right address space for MEM_REF created for bool/enum 
sanitization [PR115172]

The following testcase is miscompiled, because -fsanitize=bool,enum
creates a MEM_REF without propagating there address space qualifiers,
so what should be normally loaded using say %gs:/%fs: segment prefix
isn't.  Together with asan it then causes that load to be sanitized.

2024-05-22  Jakub Jelinek  

PR sanitizer/115172
* ubsan.cc (instrument_bool_enum_load): If rhs is not in generic
address space, use qualified version of utype with the right
address space.  Formatting fix.

* gcc.dg/asan/pr115172.c: New test.

(cherry picked from commit d3c506eff54fcbac389a529c2e98da108a410b7f)

Diff:
---
 gcc/testsuite/gcc.dg/asan/pr115172.c | 20 
 gcc/ubsan.cc |  6 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/asan/pr115172.c 
b/gcc/testsuite/gcc.dg/asan/pr115172.c
new file mode 100644
index 000..8707e615733
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asan/pr115172.c
@@ -0,0 +1,20 @@
+/* PR sanitizer/115172 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fsanitize=address,bool -ffat-lto-objects 
-fdump-tree-asan1" } */
+/* { dg-final { scan-tree-dump-not "\.ASAN_CHECK " "asan1" } } */
+
+#ifdef __x86_64__
+#define SEG __seg_gs
+#else
+#define SEG __seg_fs
+#endif
+
+extern struct S { _Bool b; } s;
+void bar (void);
+
+void
+foo (void)
+{
+  if (*(volatile _Bool SEG *) (__UINTPTR_TYPE__) &s.b)
+bar ();
+}
diff --git a/gcc/ubsan.cc b/gcc/ubsan.cc
index 4d8e7cd86c5..70a5ef66bd9 100644
--- a/gcc/ubsan.cc
+++ b/gcc/ubsan.cc
@@ -1703,13 +1703,17 @@ instrument_bool_enum_load (gimple_stmt_iterator *gsi)
   || TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
 return;
 
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (rhs));
+  if (as != TYPE_ADDR_SPACE (utype))
+utype = build_qualified_type (utype, TYPE_QUALS (utype)
+| ENCODE_QUAL_ADDR_SPACE (as));
   bool ends_bb = stmt_ends_bb_p (stmt);
   location_t loc = gimple_location (stmt);
   tree lhs = gimple_assign_lhs (stmt);
   tree ptype = build_pointer_type (TREE_TYPE (rhs));
   tree atype = reference_alias_ptr_type (rhs);
   gimple *g = gimple_build_assign (make_ssa_name (ptype),
- build_fold_addr_expr (rhs));
+  build_fold_addr_expr (rhs));
   gimple_set_location (g, loc);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
   tree mem = build2 (MEM_REF, utype, gimple_assign_lhs (g),


[gcc r15-634] i386: Rename sat_plusminus expanders to standard names [PR112600]

2024-05-17 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:b59de4113262f2bee14147eb17eb3592f03d9556

commit r15-634-gb59de4113262f2bee14147eb17eb3592f03d9556
Author: Uros Bizjak 
Date:   Fri May 17 09:55:49 2024 +0200

i386: Rename sat_plusminus expanders to standard names [PR112600]

Rename _3 expander to a standard ssadd,
usadd, sssub and ussub name to enable corresponding optab expansion.

Also add named expander for MMX modes.

PR middle-end/112600

gcc/ChangeLog:

* config/i386/mmx.md (3): New expander.
* config/i386/sse.md
(_3):
Rename expander to 3.
(3): Update for rename.
* config/i386/i386-builtin.def: Update for rename.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112600-1a.c: New test.
* gcc.target/i386/pr112600-1b.c: New test.

Diff:
---
 gcc/config/i386/i386-builtin.def| 80 ++---
 gcc/config/i386/mmx.md  |  7 +++
 gcc/config/i386/sse.md  |  4 +-
 gcc/testsuite/gcc.target/i386/pr112600-1a.c | 15 ++
 gcc/testsuite/gcc.target/i386/pr112600-1b.c | 15 ++
 5 files changed, 79 insertions(+), 42 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index ab73e20121aa..927a79bb825b 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -800,14 +800,14 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_subv8hi3, 
"__builtin_ia32_psubw128", IX
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", 
IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", 
IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI)
 
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ssaddv16qi3, 
"__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) 
V16QI_FTYPE_V16QI_V16QI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ssaddv8hi3, 
"__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) 
V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_sssubv16qi3, 
"__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) 
V16QI_FTYPE_V16QI_V16QI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_sssubv8hi3, 
"__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) 
V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_usaddv16qi3, 
"__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) 
V16QI_FTYPE_V16QI_V16QI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_usaddv8hi3, 
"__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) 
V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ussubv16qi3, 
"__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) 
V16QI_FTYPE_V16QI_V16QI)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ussubv8hi3, 
"__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) 
V8HI_FTYPE_V8HI_V8HI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_ssaddv16qi3, 
"__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) 
V16QI_FTYPE_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_ssaddv8hi3, 
"__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) 
V8HI_FTYPE_V8HI_V8HI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sssubv16qi3, 
"__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) 
V16QI_FTYPE_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sssubv8hi3, 
"__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) 
V8HI_FTYPE_V8HI_V8HI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_usaddv16qi3, 
"__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) 
V16QI_FTYPE_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_usaddv8hi3, 
"__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) 
V8HI_FTYPE_V8HI_V8HI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_ussubv16qi3, 
"__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) 
V16QI_FTYPE_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_ussubv8hi3, 
"__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) 
V8HI_FTYPE_V8HI_V8HI)
 
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", 
IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_smulv8hi3_highpart, 
"__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) 
V8HI_FTYPE_V8HI_V8HI)
@@ -1193,10 +1193,10 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_addv32qi3, 
"__builtin_ia32_paddb256", I
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", 
IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", 
IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", 
I

[gcc r11-11351] ubsan: Don't -fsanitize=null instrument __seg_fs/gs pointers [PR111736]

2024-04-24 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:624c3bb9ff762f196852dc77233610d1cdf7d7be

commit r11-11351-g624c3bb9ff762f196852dc77233610d1cdf7d7be
Author: Jakub Jelinek 
Date:   Fri Mar 22 09:23:44 2024 +0100

ubsan: Don't -fsanitize=null instrument __seg_fs/gs pointers [PR111736]

On x86 and avr some address spaces allow 0 pointers (on avr actually
even generic as, but libsanitizer isn't ported to it and
I'm not convinced we should completely kill -fsanitize=null in that
case).
The following patch makes sure those aren't diagnosed for -fsanitize=null,
though they are still sanitized for -fsanitize=alignment.

2024-03-22  Jakub Jelinek  

gcc/ChangeLog:

PR sanitizer/111736
* ubsan.c (ubsan_expand_null_ifn, instrument_mem_ref): Avoid
SANITIZE_NULL instrumentation for non-generic address spaces
for which targetm.addr_space.zero_address_valid (as) is true.

gcc/testsuite/ChangeLog:

* gcc.dg/ubsan/pr111736.c: New test.

(cherry picked from commit ddd4a3ca87410886b039cc225907b4f6e650082e)

Diff:
---
 gcc/testsuite/gcc.dg/ubsan/pr111736.c | 23 +++
 gcc/ubsan.c   | 19 +--
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/ubsan/pr111736.c 
b/gcc/testsuite/gcc.dg/ubsan/pr111736.c
new file mode 100644
index 000..359b31828f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/pr111736.c
@@ -0,0 +1,23 @@
+/* PR sanitizer/111736 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fsanitize=null,alignment -fdump-tree-optimized 
-ffat-lto-objects" } */
+/* { dg-final { scan-tree-dump-times "__ubsan_handle_type_mismatch" 1 
"optimized" } } */
+/* { dg-final { scan-tree-dump-not "p_\[0-9]*.D. \[=!]= 0" "optimized" } } */
+
+#ifdef __x86_64__
+#define SEG __seg_fs
+#else
+#define SEG __seg_gs
+#endif
+
+int
+foo (int SEG *p, int *q)
+{
+  return *p;
+}
+
+__attribute__((no_sanitize("alignment"))) int
+bar (int SEG *p, int *q)
+{
+  return *p;
+}
diff --git a/gcc/ubsan.c b/gcc/ubsan.c
index 04e8c1552a7..2b12651b440 100644
--- a/gcc/ubsan.c
+++ b/gcc/ubsan.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-cfg.h"
 #include "gimple-fold.h"
 #include "varasm.h"
+#include "target.h"
 
 /* Map from a tree to a VAR_DECL tree.  */
 
@@ -784,6 +785,13 @@ ubsan_expand_null_ifn (gimple_stmt_iterator *gsip)
}
 }
   check_null = sanitize_flags_p (SANITIZE_NULL);
+  if (check_null && POINTER_TYPE_P (TREE_TYPE (ptr)))
+{
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (ptr)));
+  if (!ADDR_SPACE_GENERIC_P (as)
+ && targetm.addr_space.zero_address_valid (as))
+   check_null = false;
+}
 
   if (check_align == NULL_TREE && !check_null)
 {
@@ -1375,8 +1383,15 @@ instrument_mem_ref (tree mem, tree base, 
gimple_stmt_iterator *iter,
   if (align <= 1)
align = 0;
 }
-  if (align == 0 && !sanitize_flags_p (SANITIZE_NULL))
-return;
+  if (align == 0)
+{
+  if (!sanitize_flags_p (SANITIZE_NULL))
+   return;
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base));
+  if (!ADDR_SPACE_GENERIC_P (as)
+ && targetm.addr_space.zero_address_valid (as))
+   return;
+}
   tree t = TREE_OPERAND (base, 0);
   if (!POINTER_TYPE_P (TREE_TYPE (t)))
 return;


[gcc r11-11352] tsan: Don't instrument non-generic AS accesses [PR111736]

2024-04-24 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:09910b6753427eeb3f6dded4fae3578851da7422

commit r11-11352-g09910b6753427eeb3f6dded4fae3578851da7422
Author: Jakub Jelinek 
Date:   Tue Mar 26 11:06:15 2024 +0100

tsan: Don't instrument non-generic AS accesses [PR111736]

Similar to the asan and ubsan changes, we shouldn't instrument non-generic
address space accesses with tsan, because we just have library functions
which take address of the objects as generic address space pointers, so they
can't handle anything else.

2024-03-26  Jakub Jelinek  

gcc/ChangeLog:

PR sanitizer/111736
* tsan.c (instrument_expr): Punt on non-generic address space
accesses.

gcc/testsuite/ChangeLog:

* gcc.dg/tsan/pr111736.c: New test.

(cherry picked from commit 471967ab8b4c49338ba77defbe24b06cc51c0093)

Diff:
---
 gcc/testsuite/gcc.dg/tsan/pr111736.c | 17 +
 gcc/tsan.c   |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/tsan/pr111736.c 
b/gcc/testsuite/gcc.dg/tsan/pr111736.c
new file mode 100644
index 000..34ab88b3d4f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tsan/pr111736.c
@@ -0,0 +1,17 @@
+/* PR sanitizer/111736 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fsanitize=thread -fdump-tree-optimized -ffat-lto-objects" } 
*/
+/* { dg-final { scan-tree-dump-not "__tsan_read" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__tsan_write" "optimized" } } */
+
+#ifdef __x86_64__
+#define SEG __seg_fs
+#else
+#define SEG __seg_gs
+#endif
+
+void
+foo (int SEG *p, int SEG *q)
+{
+  *q = *p;
+}
diff --git a/gcc/tsan.c b/gcc/tsan.c
index 7db157e21e5..e426f647cb0 100644
--- a/gcc/tsan.c
+++ b/gcc/tsan.c
@@ -139,6 +139,9 @@ instrument_expr (gimple_stmt_iterator gsi, tree expr, bool 
is_write)
   if (TREE_READONLY (base) || (VAR_P (base) && DECL_HARD_REGISTER (base)))
 return false;
 
+  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (base
+return false;
+
   stmt = gsi_stmt (gsi);
   loc = gimple_location (stmt);
   rhs = is_vptr_store (stmt, expr, is_write);


[gcc r11-11350] tree-optimization/111736 - avoid address sanitizing of __seg_gs

2024-04-24 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:b4e1aee01a2fa617cf74ab04cf0ab574761aaaea

commit r11-11350-gb4e1aee01a2fa617cf74ab04cf0ab574761aaaea
Author: Richard Biener 
Date:   Thu Mar 21 08:30:39 2024 +0100

tree-optimization/111736 - avoid address sanitizing of __seg_gs

The following more thoroughly avoids address sanitizing accesses
to non-generic address-spaces.

gcc/ChangeLog:

PR tree-optimization/111736
* asan.c (instrument_derefs): Do not instrument accesses
to non-generic address-spaces.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr111736.c: New testcase.

(cherry picked from commit 134ef2a8cac1a5cc718739bd7d3b3472947c80d6)

Diff:
---
 gcc/asan.c   |  4 
 gcc/testsuite/gcc.target/i386/pr111736.c | 23 +++
 2 files changed, 27 insertions(+)

diff --git a/gcc/asan.c b/gcc/asan.c
index b92c6007c97..fe24f531a04 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -2699,6 +2699,10 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t,
   if (VAR_P (inner) && DECL_HARD_REGISTER (inner))
 return;
 
+  /* Accesses to non-generic address-spaces should not be instrumented.  */
+  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (inner
+return;
+
   poly_int64 decl_size;
   if ((VAR_P (inner) || TREE_CODE (inner) == RESULT_DECL)
   && offset == NULL_TREE
diff --git a/gcc/testsuite/gcc.target/i386/pr111736.c 
b/gcc/testsuite/gcc.target/i386/pr111736.c
new file mode 100644
index 000..231fdd07e80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr111736.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsanitize=address" } */
+
+int __seg_gs m;
+
+int foo (void)
+{
+  return m;
+}
+
+extern int  __seg_gs n;
+
+int bar (void)
+{
+  return n;
+}
+
+int baz (int __seg_gs *o)
+{
+  return *o;
+}
+
+/* { dg-final { scan-assembler-not "asan_report_load" } } */


[gcc r11-11349] sanitizer/111736 - skip ASAN for globals in alternate address-space

2024-04-24 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:b86b523fb53f5ffb0e3f3236fc526a587944d9ea

commit r11-11349-gb86b523fb53f5ffb0e3f3236fc526a587944d9ea
Author: Richard Biener 
Date:   Tue Dec 5 14:00:43 2023 +0100

sanitizer/111736 - skip ASAN for globals in alternate address-space

gcc/ChangeLog:

PR sanitizer/111736
* asan.c (asan_protect_global): Do not protect globals
in non-generic address-space.

(cherry picked from commit 7e40497805c0831596334fe474112f991276e11b)

Diff:
---
 gcc/asan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/asan.c b/gcc/asan.c
index 2aa2be13bf6..b92c6007c97 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -2238,6 +2238,8 @@ asan_protect_global (tree decl, bool 
ignore_decl_rtl_set_p)
   || (DECL_SECTION_NAME (decl) != NULL
  && !symtab_node::get (decl)->implicit_section
  && !section_sanitized_p (DECL_SECTION_NAME (decl)))
+  /* Don't protect variables in non-generic address-space.  */
+  || !ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (decl)))
   || DECL_SIZE (decl) == 0
   || ASAN_RED_ZONE_SIZE * BITS_PER_UNIT > MAX_OFILE_ALIGNMENT
   || TREE_CODE (DECL_SIZE_UNIT (decl)) != INTEGER_CST


[gcc r12-10390] tsan: Don't instrument non-generic AS accesses [PR111736]

2024-04-23 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:48fd1c5791b47717dcd4fa5615bc07cf54e964a7

commit r12-10390-g48fd1c5791b47717dcd4fa5615bc07cf54e964a7
Author: Jakub Jelinek 
Date:   Tue Mar 26 11:06:15 2024 +0100

tsan: Don't instrument non-generic AS accesses [PR111736]

Similar to the asan and ubsan changes, we shouldn't instrument non-generic
address space accesses with tsan, because we just have library functions
which take address of the objects as generic address space pointers, so they
can't handle anything else.

2024-03-26  Jakub Jelinek  

PR sanitizer/111736
* tsan.cc (instrument_expr): Punt on non-generic address space
accesses.

* gcc.dg/tsan/pr111736.c: New test.

(cherry picked from commit 471967ab8b4c49338ba77defbe24b06cc51c0093)

Diff:
---
 gcc/testsuite/gcc.dg/tsan/pr111736.c | 17 +
 gcc/tsan.cc  |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/tsan/pr111736.c 
b/gcc/testsuite/gcc.dg/tsan/pr111736.c
new file mode 100644
index 000..34ab88b3d4f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tsan/pr111736.c
@@ -0,0 +1,17 @@
+/* PR sanitizer/111736 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fsanitize=thread -fdump-tree-optimized -ffat-lto-objects" } 
*/
+/* { dg-final { scan-tree-dump-not "__tsan_read" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__tsan_write" "optimized" } } */
+
+#ifdef __x86_64__
+#define SEG __seg_fs
+#else
+#define SEG __seg_gs
+#endif
+
+void
+foo (int SEG *p, int SEG *q)
+{
+  *q = *p;
+}
diff --git a/gcc/tsan.cc b/gcc/tsan.cc
index 02e8ac7cf79..2cdda561dbf 100644
--- a/gcc/tsan.cc
+++ b/gcc/tsan.cc
@@ -139,6 +139,9 @@ instrument_expr (gimple_stmt_iterator gsi, tree expr, bool 
is_write)
   if (TREE_READONLY (base) || (VAR_P (base) && DECL_HARD_REGISTER (base)))
 return false;
 
+  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (base
+return false;
+
   stmt = gsi_stmt (gsi);
   loc = gimple_location (stmt);
   rhs = is_vptr_store (stmt, expr, is_write);


[gcc r12-10388] tree-optimization/111736 - avoid address sanitizing of __seg_gs

2024-04-23 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:e89b5ed62a5a06fb8918ffa1616f0f37c8d359c3

commit r12-10388-ge89b5ed62a5a06fb8918ffa1616f0f37c8d359c3
Author: Richard Biener 
Date:   Thu Mar 21 08:30:39 2024 +0100

tree-optimization/111736 - avoid address sanitizing of __seg_gs

The following more thoroughly avoids address sanitizing accesses
to non-generic address-spaces.

PR tree-optimization/111736
* asan.cc (instrument_derefs): Do not instrument accesses
to non-generic address-spaces.

* gcc.target/i386/pr111736.c: New testcase.

(cherry picked from commit 134ef2a8cac1a5cc718739bd7d3b3472947c80d6)

Diff:
---
 gcc/asan.cc  |  4 
 gcc/testsuite/gcc.target/i386/pr111736.c | 23 +++
 2 files changed, 27 insertions(+)

diff --git a/gcc/asan.cc b/gcc/asan.cc
index 0c5afa36cb8..24cf2b8376b 100644
--- a/gcc/asan.cc
+++ b/gcc/asan.cc
@@ -2712,6 +2712,10 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t,
   if (VAR_P (inner) && DECL_HARD_REGISTER (inner))
 return;
 
+  /* Accesses to non-generic address-spaces should not be instrumented.  */
+  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (inner
+return;
+
   poly_int64 decl_size;
   if ((VAR_P (inner) || TREE_CODE (inner) == RESULT_DECL)
   && offset == NULL_TREE
diff --git a/gcc/testsuite/gcc.target/i386/pr111736.c 
b/gcc/testsuite/gcc.target/i386/pr111736.c
new file mode 100644
index 000..231fdd07e80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr111736.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsanitize=address" } */
+
+int __seg_gs m;
+
+int foo (void)
+{
+  return m;
+}
+
+extern int  __seg_gs n;
+
+int bar (void)
+{
+  return n;
+}
+
+int baz (int __seg_gs *o)
+{
+  return *o;
+}
+
+/* { dg-final { scan-assembler-not "asan_report_load" } } */


[gcc r12-10389] ubsan: Don't -fsanitize=null instrument __seg_fs/gs pointers [PR111736]

2024-04-23 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:d6c62e4fb9a6d395599b7c78c831bace4bc7ff8f

commit r12-10389-gd6c62e4fb9a6d395599b7c78c831bace4bc7ff8f
Author: Jakub Jelinek 
Date:   Fri Mar 22 09:23:44 2024 +0100

ubsan: Don't -fsanitize=null instrument __seg_fs/gs pointers [PR111736]

On x86 and avr some address spaces allow 0 pointers (on avr actually
even generic as, but libsanitizer isn't ported to it and
I'm not convinced we should completely kill -fsanitize=null in that
case).
The following patch makes sure those aren't diagnosed for -fsanitize=null,
though they are still sanitized for -fsanitize=alignment.

2024-03-22  Jakub Jelinek  

PR sanitizer/111736
* ubsan.cc (ubsan_expand_null_ifn, instrument_mem_ref): Avoid
SANITIZE_NULL instrumentation for non-generic address spaces
for which targetm.addr_space.zero_address_valid (as) is true.

* gcc.dg/ubsan/pr111736.c: New test.

(cherry picked from commit ddd4a3ca87410886b039cc225907b4f6e650082e)

Diff:
---
 gcc/testsuite/gcc.dg/ubsan/pr111736.c | 23 +++
 gcc/ubsan.cc  | 19 +--
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/ubsan/pr111736.c 
b/gcc/testsuite/gcc.dg/ubsan/pr111736.c
new file mode 100644
index 000..359b31828f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/pr111736.c
@@ -0,0 +1,23 @@
+/* PR sanitizer/111736 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fsanitize=null,alignment -fdump-tree-optimized 
-ffat-lto-objects" } */
+/* { dg-final { scan-tree-dump-times "__ubsan_handle_type_mismatch" 1 
"optimized" } } */
+/* { dg-final { scan-tree-dump-not "p_\[0-9]*.D. \[=!]= 0" "optimized" } } */
+
+#ifdef __x86_64__
+#define SEG __seg_fs
+#else
+#define SEG __seg_gs
+#endif
+
+int
+foo (int SEG *p, int *q)
+{
+  return *p;
+}
+
+__attribute__((no_sanitize("alignment"))) int
+bar (int SEG *p, int *q)
+{
+  return *p;
+}
diff --git a/gcc/ubsan.cc b/gcc/ubsan.cc
index 0f5b372b195..4d8e7cd86c5 100644
--- a/gcc/ubsan.cc
+++ b/gcc/ubsan.cc
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-cfg.h"
 #include "gimple-fold.h"
 #include "varasm.h"
+#include "target.h"
 
 /* Map from a tree to a VAR_DECL tree.  */
 
@@ -784,6 +785,13 @@ ubsan_expand_null_ifn (gimple_stmt_iterator *gsip)
}
 }
   check_null = sanitize_flags_p (SANITIZE_NULL);
+  if (check_null && POINTER_TYPE_P (TREE_TYPE (ptr)))
+{
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (ptr)));
+  if (!ADDR_SPACE_GENERIC_P (as)
+ && targetm.addr_space.zero_address_valid (as))
+   check_null = false;
+}
 
   if (check_align == NULL_TREE && !check_null)
 {
@@ -1375,8 +1383,15 @@ instrument_mem_ref (tree mem, tree base, 
gimple_stmt_iterator *iter,
   if (align <= 1)
align = 0;
 }
-  if (align == 0 && !sanitize_flags_p (SANITIZE_NULL))
-return;
+  if (align == 0)
+{
+  if (!sanitize_flags_p (SANITIZE_NULL))
+   return;
+  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base));
+  if (!ADDR_SPACE_GENERIC_P (as)
+ && targetm.addr_space.zero_address_valid (as))
+   return;
+}
   tree t = TREE_OPERAND (base, 0);
   if (!POINTER_TYPE_P (TREE_TYPE (t)))
 return;


[gcc r12-10387] sanitizer/111736 - skip ASAN for globals in alternate address-space

2024-04-23 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:61d1962e7c3c32da6962d9cb20f6fd996501f3f2

commit r12-10387-g61d1962e7c3c32da6962d9cb20f6fd996501f3f2
Author: Richard Biener 
Date:   Tue Dec 5 14:00:43 2023 +0100

sanitizer/111736 - skip ASAN for globals in alternate address-space

PR sanitizer/111736
* asan.cc (asan_protect_global): Do not protect globals
in non-generic address-space.

(cherry picked from commit 7e40497805c0831596334fe474112f991276e11b)

Diff:
---
 gcc/asan.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/asan.cc b/gcc/asan.cc
index 72d1ef28be8..0c5afa36cb8 100644
--- a/gcc/asan.cc
+++ b/gcc/asan.cc
@@ -2251,6 +2251,8 @@ asan_protect_global (tree decl, bool 
ignore_decl_rtl_set_p)
   || (DECL_SECTION_NAME (decl) != NULL
  && !symtab_node::get (decl)->implicit_section
  && !section_sanitized_p (DECL_SECTION_NAME (decl)))
+  /* Don't protect variables in non-generic address-space.  */
+  || !ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (decl)))
   || DECL_SIZE (decl) == 0
   || ASAN_RED_ZONE_SIZE * BITS_PER_UNIT > MAX_OFILE_ALIGNMENT
   || TREE_CODE (DECL_SIZE_UNIT (decl)) != INTEGER_CST


[gcc r14-9847] combine: Fix ICE in try_combine on pr112494.c [PR112560]

2024-04-08 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:eaccdba315b86d374a4e72b9dd8fefb0fc3cc5ee

commit r14-9847-geaccdba315b86d374a4e72b9dd8fefb0fc3cc5ee
Author: Uros Bizjak 
Date:   Mon Apr 8 20:54:30 2024 +0200

combine: Fix ICE in try_combine on pr112494.c [PR112560]

The compiler, configured with --enable-checking=yes,rtl,extra ICEs with:

internal compiler error: RTL check: expected elt 0 type 'e' or 'u', have 
'E' (rtx unspec) in try_combine, at combine.cc:3237

This is

3236  /* Just replace the CC reg with a new mode.  */
3237  SUBST (XEXP (*cc_use_loc, 0), newpat_dest);
3238  undobuf.other_insn = cc_use_insn;

in combine.cc, where *cc_use_loc is

(unspec:DI [
(reg:CC 17 flags)
] UNSPEC_PUSHFL)

combine assumes CC must be used inside of a comparison and uses XEXP (..., 
0)
without checking on the RTX type of the argument.

Replace cc_use_loc with the entire new RTX only in case cc_use_loc satisfies
COMPARISON_P predicate.  Otherwise scan the entire cc_use_loc RTX for CC reg
to be updated with a new mode.

PR rtl-optimization/112560

gcc/ChangeLog:

* combine.cc (try_combine): Replace cc_use_loc with the entire
new RTX only in case cc_use_loc satisfies COMPARISON_P predicate.
Otherwise scan the entire cc_use_loc RTX for CC reg to be updated
with a new mode.
* config/i386/i386.md (@pushf2): Allow all CC modes for
operand 1.

Diff:
---
 gcc/combine.cc  | 16 +---
 gcc/config/i386/i386.md |  4 ++--
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 745391016d0..71c9abc145c 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -3222,8 +3222,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
 #endif
  /* Cases for modifying the CC-using comparison.  */
  if (compare_code != orig_compare_code
- /* ??? Do we need to verify the zero rtx?  */
- && XEXP (*cc_use_loc, 1) == const0_rtx)
+ && COMPARISON_P (*cc_use_loc))
{
  /* Replace cc_use_loc with entire new RTX.  */
  SUBST (*cc_use_loc,
@@ -3233,8 +3232,19 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
}
  else if (compare_mode != orig_compare_mode)
{
+ subrtx_ptr_iterator::array_type array;
+
  /* Just replace the CC reg with a new mode.  */
- SUBST (XEXP (*cc_use_loc, 0), newpat_dest);
+ FOR_EACH_SUBRTX_PTR (iter, array, cc_use_loc, NONCONST)
+   {
+ rtx *loc = *iter;
+ if (REG_P (*loc)
+ && REGNO (*loc) == REGNO (newpat_dest))
+   {
+ SUBST (*loc, newpat_dest);
+ iter.skip_subrtxes ();
+   }
+   }
  undobuf.other_insn = cc_use_insn;
}
}
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index bb2c72f3473..10ae3113ae8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2219,9 +2219,9 @@
 
 (define_insn "@pushfl2"
   [(set (match_operand:W 0 "push_operand" "=<")
-   (unspec:W [(match_operand:CC 1 "flags_reg_operand")]
+   (unspec:W [(match_operand 1 "flags_reg_operand")]
  UNSPEC_PUSHFL))]
-  ""
+  "GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_CC"
   "pushf{}"
   [(set_attr "type" "push")
(set_attr "mode" "")])


[gcc r12-10284] testsuite/i386: Correct pr111822.C dg-do options [PR111822]

2024-03-19 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:f6ed0466d40de496b14225fae44acf618dac1fd2

commit r12-10284-gf6ed0466d40de496b14225fae44acf618dac1fd2
Author: Uros Bizjak 
Date:   Tue Mar 19 16:57:50 2024 +0100

testsuite/i386: Correct pr111822.C dg-do options [PR111822]

PR target/111822

gcc/testsuite/ChangeLog:

* g++.target/i386/pr111822.C (dg-do): Compile only for ia32 targets.
(dg-options): Add -march=x86-64.

Diff:
---
 gcc/testsuite/g++.target/i386/pr111822.C | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.target/i386/pr111822.C 
b/gcc/testsuite/g++.target/i386/pr111822.C
index d405387b23c..0829c91f270 100644
--- a/gcc/testsuite/g++.target/i386/pr111822.C
+++ b/gcc/testsuite/g++.target/i386/pr111822.C
@@ -1,6 +1,6 @@
 /* PR target/111822 */
-/* { dg-do compile } */
-/* { dg-options "-O2 -flive-range-shrinkage -fno-dce -fnon-call-exceptions" } 
*/
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -flive-range-shrinkage -fno-dce -fnon-call-exceptions 
-march=x86-64" } */
 
 typedef union {
   int *pNativeClosure;


[gcc r13-8466] i386: Unify {general, timode}_scalar_chain::convert_op [PR111822]

2024-03-19 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:1a6d04fce7d78b9e5201333be0c0877390f81bc3

commit r13-8466-g1a6d04fce7d78b9e5201333be0c0877390f81bc3
Author: Uros Bizjak 
Date:   Tue Mar 19 16:56:11 2024 +0100

i386: Unify {general,timode}_scalar_chain::convert_op [PR111822]

Recent PR111822 fix implemented REG_EH_REGION note copying to a STV 
converted
preload instruction in general_scalar_chain::convert_op.  However, the same
issue remains in timode_scalar_chain::convert_op.  Instead of copying the
newly introduced code to timode_scalar_chain::convert_op, the patch unifies
both functions to a common function.

PR target/111822

gcc/ChangeLog:

* config/i386/i386-features.cc (smode_convert_cst): New function
to handle SImode, DImode and TImode immediates.
(scalar_chain::convert_op): Unify from
general_scalar_chain::convert_op and 
timode_scalar_chain::convert_op.
(general_scalar_chain::convert_op): Remove.
(timode_scalar_chain::convert_op): Remove.
* config/i386/i386-features.h (class scalar_chain):
Redeclare convert_op as protected class member.
(class general_calar_chain): Remove convert_op.
(class timode_scalar_chain): Ditto.

gcc/testsuite/ChangeLog:

* g++.target/i386/pr111822.C (dg-do): Compile only for ia32 targets.
(dg-options): Add -march=x86-64.

Diff:
---
 gcc/config/i386/i386-features.cc | 119 ++-
 gcc/config/i386/i386-features.h  |   4 +-
 gcc/testsuite/g++.target/i386/pr111822.C |   4 +-
 3 files changed, 39 insertions(+), 88 deletions(-)

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index ed3055b43f8..34c320d7eae 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -896,14 +896,35 @@ scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx 
src)
 REGNO (src), REGNO (dst), INSN_UID (insn));
 }
 
+/* Helper function to convert immediate constant X to vmode.  */
+static rtx
+smode_convert_cst (rtx x, enum machine_mode vmode)
+{
+  /* Prefer all ones vector in case of -1.  */
+  if (constm1_operand (x, GET_MODE (x)))
+return CONSTM1_RTX (vmode);
+
+  unsigned n = GET_MODE_NUNITS (vmode);
+  rtx *v = XALLOCAVEC (rtx, n);
+  v[0] = x;
+  for (unsigned i = 1; i < n; ++i)
+v[i] = const0_rtx;
+  return gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
+}
+
 /* Convert operand OP in INSN.  We should handle
memory operands and uninitialized registers.
All other register uses are converted during
registers conversion.  */
 
 void
-general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
+scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 {
+  rtx tmp;
+
+  if (GET_MODE (*op) == V1TImode)
+return;
+
   *op = copy_rtx_if_shared (*op);
 
   if (GET_CODE (*op) == NOT)
@@ -913,20 +934,21 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 }
   else if (MEM_P (*op))
 {
-  rtx_insn* eh_insn, *movabs = NULL;
-  rtx tmp = gen_reg_rtx (GET_MODE (*op));
+  rtx_insn *movabs = NULL;
 
   /* Emit MOVABS to load from a 64-bit absolute address to a GPR.  */
   if (!memory_operand (*op, GET_MODE (*op)))
{
- rtx tmp2 = gen_reg_rtx (GET_MODE (*op));
- movabs = emit_insn_before (gen_rtx_SET (tmp2, *op), insn);
+ tmp = gen_reg_rtx (GET_MODE (*op));
+ movabs = emit_insn_before (gen_rtx_SET (tmp, *op), insn);
 
- *op = tmp2;
+ *op = tmp;
}
 
-  eh_insn
-   = emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
+  tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (GET_MODE (*op)), 0);
+
+  rtx_insn *eh_insn
+   = emit_insn_before (gen_rtx_SET (copy_rtx (tmp),
 gen_gpr_to_xmm_move_src (vmode, *op)),
insn);
 
@@ -943,33 +965,17 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
}
}
 
-  *op = gen_rtx_SUBREG (vmode, tmp, 0);
+  *op = tmp;
 
   if (dump_file)
fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
 INSN_UID (insn), REGNO (tmp));
 }
   else if (REG_P (*op))
+*op = gen_rtx_SUBREG (vmode, *op, 0);
+  else if (CONST_SCALAR_INT_P (*op))
 {
-  *op = gen_rtx_SUBREG (vmode, *op, 0);
-}
-  else if (CONST_INT_P (*op))
-{
-  rtx vec_cst;
-  rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
-
-  /* Prefer all ones vector in case of -1.  */
-  if (constm1_operand (*op, GET_MODE (*op)))
-   vec_cst = CONSTM1_RTX (vmode);
-  else
-   {
- unsigned n = GET_MODE_NUNITS (vmode);
- rtx *v = XALLOCAVEC (rtx, n);
- v[0] = *op;
- for (unsigned i = 1; i < n; ++i)
-   v[i] = const0_rtx;
- vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec

[gcc r14-9523] i386: Unify {general, timode}_scalar_chain::convert_op [PR111822]

2024-03-18 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:b96c5436880d7926299314a33c953171082ab59e

commit r14-9523-gb96c5436880d7926299314a33c953171082ab59e
Author: Uros Bizjak 
Date:   Mon Mar 18 20:40:29 2024 +0100

i386: Unify {general,timode}_scalar_chain::convert_op [PR111822]

Recent PR111822 fix implemented REG_EH_REGION note copying to a STV 
converted
preload instruction in general_scalar_chain::convert_op.  However, the same
issue remains in timode_scalar_chain::convert_op.  Instead of copying the
newly introduced code to timode_scalar_chain::convert_op, the patch unifies
both functions to a common function.

PR target/111822

gcc/ChangeLog:

* config/i386/i386-features.cc (smode_convert_cst): New function
to handle SImode, DImode and TImode immediates, generalized from
timode_convert_cst.
(timode_convert_cst): Remove.
(scalar_chain::convert_op): Unify from
general_scalar_chain::convert_op and 
timode_scalar_chain::convert_op.
(general_scalar_chain::convert_op): Remove.
(timode_scalar_chain::convert_op): Remove.
(timode_scalar_chain::convert_insn): Update the call to
renamed timode_convert_cst.
* config/i386/i386-features.h (class scalar_chain):
Redeclare convert_op as protected class member.
(class general_calar_chain): Remove convert_op.
(class timode_scalar_chain): Ditto.

gcc/testsuite/ChangeLog:

* g++.target/i386/pr111822.C (dg-do): Compile only for ia32 targets.
(dg-options): Add -march=x86-64.

Diff:
---
 gcc/config/i386/i386-features.cc | 127 +--
 gcc/config/i386/i386-features.h  |   4 +-
 gcc/testsuite/g++.target/i386/pr111822.C |   4 +-
 3 files changed, 40 insertions(+), 95 deletions(-)

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c7d7a965901..e3e004d5526 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -980,14 +980,35 @@ scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx 
src)
 REGNO (src), REGNO (dst), INSN_UID (insn));
 }
 
+/* Helper function to convert immediate constant X to vmode.  */
+static rtx
+smode_convert_cst (rtx x, enum machine_mode vmode)
+{
+  /* Prefer all ones vector in case of -1.  */
+  if (constm1_operand (x, GET_MODE (x)))
+return CONSTM1_RTX (vmode);
+
+  unsigned n = GET_MODE_NUNITS (vmode);
+  rtx *v = XALLOCAVEC (rtx, n);
+  v[0] = x;
+  for (unsigned i = 1; i < n; ++i)
+v[i] = const0_rtx;
+  return gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
+}
+
 /* Convert operand OP in INSN.  We should handle
memory operands and uninitialized registers.
All other register uses are converted during
registers conversion.  */
 
 void
-general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
+scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 {
+  rtx tmp;
+
+  if (GET_MODE (*op) == V1TImode)
+return;
+
   *op = copy_rtx_if_shared (*op);
 
   if (GET_CODE (*op) == NOT
@@ -998,20 +1019,21 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn 
*insn)
 }
   else if (MEM_P (*op))
 {
-  rtx_insn* eh_insn, *movabs = NULL;
-  rtx tmp = gen_reg_rtx (GET_MODE (*op));
+  rtx_insn *movabs = NULL;
 
   /* Emit MOVABS to load from a 64-bit absolute address to a GPR.  */
   if (!memory_operand (*op, GET_MODE (*op)))
{
- rtx tmp2 = gen_reg_rtx (GET_MODE (*op));
- movabs = emit_insn_before (gen_rtx_SET (tmp2, *op), insn);
+ tmp = gen_reg_rtx (GET_MODE (*op));
+ movabs = emit_insn_before (gen_rtx_SET (tmp, *op), insn);
 
- *op = tmp2;
+ *op = tmp;
}
 
-  eh_insn
-   = emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
+  tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (GET_MODE (*op)), 0);
+
+  rtx_insn *eh_insn
+   = emit_insn_before (gen_rtx_SET (copy_rtx (tmp),
 gen_gpr_to_xmm_move_src (vmode, *op)),
insn);
 
@@ -1028,33 +1050,17 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn 
*insn)
}
}
 
-  *op = gen_rtx_SUBREG (vmode, tmp, 0);
+  *op = tmp;
 
   if (dump_file)
fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
 INSN_UID (insn), REGNO (tmp));
 }
   else if (REG_P (*op))
+*op = gen_rtx_SUBREG (vmode, *op, 0);
+  else if (CONST_SCALAR_INT_P (*op))
 {
-  *op = gen_rtx_SUBREG (vmode, *op, 0);
-}
-  else if (CONST_INT_P (*op))
-{
-  rtx vec_cst;
-  rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
-
-  /* Prefer all ones vector in case of -1.  */
-  if (constm1_operand (*op, GET_MODE (*op)))
-   vec_cst = CONSTM1_RTX (vmode);
-  else
-   {
- unsigned n = GET_MODE_NUNITS 

[gcc r14-9346] i386: Fix and improve insn constraint for V2QI arithmetic/shift insns

2024-03-06 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:74e8cc28eda9b1d75588fcd4017a735911b9d2b4

commit r14-9346-g74e8cc28eda9b1d75588fcd4017a735911b9d2b4
Author: Uros Bizjak 
Date:   Wed Mar 6 20:53:50 2024 +0100

i386: Fix and improve insn constraint for V2QI arithmetic/shift insns

optimize_function_for_size_p predicate is not stable during optab selection,
because it also depends on node->count/node->frequency of the current 
function,
which are updated during IPA, so they may change between early opts and
late opts.  Use optimize_size instead - optimize_size implies
optimize_function_for_size_p (cfun), so if a named pattern uses
"&& optimize_size" and the insn it splits into uses
optimize_function_for_size_p (cfun), it shouldn't fail.

PR target/114232

gcc/ChangeLog:

* config/i386/mmx.md (negv2qi2): Enable for optimize_size instead
of optimize_function_for_size_p.  Explictily enable for TARGET_SSE2.
(negv2qi SSE reg splitter): Enable for TARGET_SSE2 only.
(v2qi3): Enable for optimize_size instead
of optimize_function_for_size_p.  Explictily enable for TARGET_SSE2.
(v2qi SSE reg splitter): Enable for TARGET_SSE2 
only.
(v2qi3): Enable for optimize_size instead
of optimize_function_for_size_p.

Diff:
---
 gcc/config/i386/mmx.md | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2856ae6ffef..9a8d6030d8b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2874,11 +2874,18 @@
 (neg:V2QI
  (match_operand:V2QI 1 "register_operand" "0,Yw")))
(clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "!TARGET_PARTIAL_REG_STALL || optimize_size || TARGET_SSE2"
   "#"
   [(set_attr "isa" "*,sse2")
(set_attr "type" "multi")
-   (set_attr "mode" "QI,TI")])
+   (set_attr "mode" "QI,TI")
+   (set (attr "enabled")
+   (cond [(and (eq_attr "alternative" "0")
+   (and (match_test "TARGET_PARTIAL_REG_STALL")
+(not (match_test "optimize_function_for_size_p 
(cfun)"
+   (symbol_ref "false")
+ ]
+ (const_string "*")))])
 
 (define_split
   [(set (match_operand:V2QI 0 "general_reg_operand")
@@ -2912,8 +2919,7 @@
 (neg:V2QI
  (match_operand:V2QI 1 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && TARGET_SSE2 && reload_completed"
+  "TARGET_SSE2 && reload_completed"
   [(set (match_dup 0) (match_dup 2))
(set (match_dup 0)
(minus:V16QI (match_dup 0) (match_dup 1)))]
@@ -2975,11 +2981,18 @@
  (match_operand:V2QI 1 "register_operand" "0,0,Yw")
  (match_operand:V2QI 2 "register_operand" "Q,x,Yw")))
(clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "!TARGET_PARTIAL_REG_STALL || optimize_size || TARGET_SSE2"
   "#"
   [(set_attr "isa" "*,sse2_noavx,avx")
(set_attr "type" "multi,sseadd,sseadd")
-   (set_attr "mode" "QI,TI,TI")])
+   (set_attr "mode" "QI,TI,TI")
+   (set (attr "enabled")
+   (cond [(and (eq_attr "alternative" "0")
+   (and (match_test "TARGET_PARTIAL_REG_STALL")
+(not (match_test "optimize_function_for_size_p 
(cfun)"
+   (symbol_ref "false")
+ ]
+ (const_string "*")))])
 
 (define_split
   [(set (match_operand:V2QI 0 "general_reg_operand")
@@ -3021,8 +3034,7 @@
  (match_operand:V2QI 1 "sse_reg_operand")
  (match_operand:V2QI 2 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && TARGET_SSE2 && reload_completed"
+  "TARGET_SSE2 && reload_completed"
   [(set (match_dup 0)
 (plusminus:V16QI (match_dup 1) (match_dup 2)))]
 {
@@ -3684,9 +3696,10 @@
  (match_operand:V2QI 1 "register_operand" "0")
  (match_operand:QI 2 "nonmemory_operand" "cI")))
(clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "!TARGET_PARTIAL_REG_STALL || optimize_size"
   "#"
-  "&& reload_completed"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && reload_completed"
   [(parallel
  [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8))
   (subreg:HI


[gcc r14-9338] i386: Eliminate common code from x86_32 TARGET_MACHO part in ix86_expand_move

2024-03-06 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:e772c0c05c36d0b0539effb4256be67bbedd77fb

commit r14-9338-ge772c0c05c36d0b0539effb4256be67bbedd77fb
Author: Uros Bizjak 
Date:   Wed Mar 6 17:08:25 2024 +0100

i386: Eliminate common code from x86_32 TARGET_MACHO part in 
ix86_expand_move

Eliminate common code from x86_32 TARGET_MACHO part in ix86_expand_move and
use generic code instead.

No functional changes.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_expand_move) [TARGET_MACHO]:
Eliminate common code and use generic code instead.

Diff:
---
 gcc/config/i386/i386-expand.cc | 37 +++--
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 3b1685ae448..2210e6f7cc8 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -471,9 +471,9 @@ ix86_expand_move (machine_mode mode, rtx operands[])
   if ((flag_pic || MACHOPIC_INDIRECT)
   && symbolic_operand (op1, mode))
 {
+#if TARGET_MACHO
   if (TARGET_MACHO && !TARGET_64BIT)
{
-#if TARGET_MACHO
  /* dynamic-no-pic */
  if (MACHOPIC_INDIRECT)
{
@@ -490,33 +490,18 @@ ix86_expand_move (machine_mode mode, rtx operands[])
  emit_insn (insn);
  return;
}
- if (GET_CODE (op0) == MEM)
-   op1 = force_reg (Pmode, op1);
- else
-   {
- rtx temp = op0;
- if (GET_CODE (temp) != REG)
-   temp = gen_reg_rtx (Pmode);
- temp = legitimize_pic_address (op1, temp);
- if (temp == op0)
-   return;
- op1 = temp;
-   }
-  /* dynamic-no-pic */
-#endif
}
-  else
+#endif
+
+  if (MEM_P (op0))
+   op1 = force_reg (mode, op1);
+  else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
{
- if (MEM_P (op0))
-   op1 = force_reg (mode, op1);
- else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
-   {
- rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
- op1 = legitimize_pic_address (op1, reg);
- if (op0 == op1)
-   return;
- op1 = convert_to_mode (mode, op1, 1);
-   }
+ rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
+ op1 = legitimize_pic_address (op1, reg);
+ if (op0 == op1)
+   return;
+ op1 = convert_to_mode (mode, op1, 1);
}
 }
   else