Hi,

For a statement like:

  INT = FLOAT > FLOAT ? INT : INT.

The vcond implementation in AArch64 is broken. We will try to force
the INT value to a FLOAT register and will ICE.

This patch fixes this.

Regression suite run for aarch64-none-elf with no regressions,
and more cases added to the testsuite to ensure this is caught
in future.

Thanks,
James Greenhalgh

---
gcc/

        * config/aarch64/aarch64-simd.md
        (aarch64_vcond_internal<mode>): Rename to...
        (aarch64_vcond_internal<mode><mode>): ...This, for integer modes.
        (aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>): ...This for
        float modes. Clarify all iterator modes.
        (vcond<mode><mode>): Use new name for vcond expanders.
        (vcond<v_cmp_result><mode>): Likewise.
        (vcondu<mode><mode>: Likewise.
        * config/aarch64/iterators.md (VDQF_COND): New.

gcc/testsuite/

        * gcc.target/aarch64/vect-fcm.x: Add cases testing
        FLOAT cmp FLOAT ? INT : INT.
         * gcc.target/aarch64/vect-fcm-eq-d.c: Define IMODE.
         * gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
         * gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
         * gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
         * gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
         * gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 5626b55..6bc7dd7 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1725,7 +1725,7 @@
   DONE;
 })
 
-(define_expand "aarch64_vcond_internal<mode>"
+(define_expand "aarch64_vcond_internal<mode><mode>"
   [(set (match_operand:VDQ 0 "register_operand")
 	(if_then_else:VDQ
 	  (match_operator 3 "comparison_operator"
@@ -1820,14 +1820,14 @@
   DONE;
 })
 
-(define_expand "aarch64_vcond_internal<mode>"
-  [(set (match_operand:VDQF 0 "register_operand")
+(define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
+  [(set (match_operand:VDQF_COND 0 "register_operand")
 	(if_then_else:VDQF
 	  (match_operator 3 "comparison_operator"
 	    [(match_operand:VDQF 4 "register_operand")
 	     (match_operand:VDQF 5 "nonmemory_operand")])
-	  (match_operand:VDQF 1 "nonmemory_operand")
-	  (match_operand:VDQF 2 "nonmemory_operand")))]
+	  (match_operand:VDQF_COND 1 "nonmemory_operand")
+	  (match_operand:VDQF_COND 2 "nonmemory_operand")))]
   "TARGET_SIMD"
 {
   int inverse = 0;
@@ -1835,8 +1835,8 @@
   int swap_bsl_operands = 0;
   rtx op1 = operands[1];
   rtx op2 = operands[2];
-  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
-  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
+  rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
+  rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
 
   rtx (*base_comparison) (rtx, rtx, rtx);
   rtx (*complimentary_comparison) (rtx, rtx, rtx);
@@ -1856,7 +1856,7 @@
       /* Fall through.  */
     default:
       if (!REG_P (operands[5]))
-	operands[5] = force_reg (<MODE>mode, operands[5]);
+	operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
     }
 
   switch (GET_CODE (operands[3]))
@@ -1869,8 +1869,8 @@
     case UNGE:
     case ORDERED:
     case UNORDERED:
-      base_comparison = gen_aarch64_cmge<mode>;
-      complimentary_comparison = gen_aarch64_cmgt<mode>;
+      base_comparison = gen_aarch64_cmge<VDQF:mode>;
+      complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
       break;
     case LE:
     case UNLE:
@@ -1878,14 +1878,14 @@
       /* Fall through.  */
     case GT:
     case UNGT:
-      base_comparison = gen_aarch64_cmgt<mode>;
-      complimentary_comparison = gen_aarch64_cmge<mode>;
+      base_comparison = gen_aarch64_cmgt<VDQF:mode>;
+      complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
       break;
     case EQ:
     case NE:
     case UNEQ:
-      base_comparison = gen_aarch64_cmeq<mode>;
-      complimentary_comparison = gen_aarch64_cmeq<mode>;
+      base_comparison = gen_aarch64_cmeq<VDQF:mode>;
+      complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
       break;
     default:
       gcc_unreachable ();
@@ -1913,10 +1913,10 @@
 	  switch (GET_CODE (operands[3]))
 	    {
 	    case LT:
-	      base_comparison = gen_aarch64_cmlt<mode>;
+	      base_comparison = gen_aarch64_cmlt<VDQF:mode>;
 	      break;
 	    case LE:
-	      base_comparison = gen_aarch64_cmle<mode>;
+	      base_comparison = gen_aarch64_cmle<VDQF:mode>;
 	      break;
 	    default:
 	      /* Do nothing, other zero form cases already have the correct
@@ -1959,9 +1959,9 @@
 	 true iff !(a != b && a ORDERED b), swapping the operands to BSL
 	 will then give us (a == b ||  a UNORDERED b) as intended.  */
 
-      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
-      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[5], operands[4]));
-      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
+      emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
+      emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
       swap_bsl_operands = 1;
       break;
     case UNORDERED:
@@ -1970,9 +1970,9 @@
      swap_bsl_operands = 1;
      /* Fall through.  */
     case ORDERED:
-      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[4], operands[5]));
-      emit_insn (gen_aarch64_cmge<mode> (mask, operands[5], operands[4]));
-      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
+      emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
+      emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
       break;
     default:
       gcc_unreachable ();
@@ -1987,16 +1987,16 @@
     /* If we have (a = (b CMP c) ? -1 : 0);
        Then we can simply move the generated mask.  */
 
-    if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
-	&& op2 == CONST0_RTX (<V_cmp_result>mode))
+    if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
+	&& op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
       emit_move_insn (operands[0], mask);
     else
       {
 	if (!REG_P (op1))
-	  op1 = force_reg (<MODE>mode, op1);
+	  op1 = force_reg (<VDQF_COND:MODE>mode, op1);
 	if (!REG_P (op2))
-	  op2 = force_reg (<MODE>mode, op2);
-	emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
+	  op2 = force_reg (<VDQF_COND:MODE>mode, op2);
+	emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
 					       op1, op2));
       }
 
@@ -2013,7 +2013,7 @@
 	  (match_operand:VALL 2 "nonmemory_operand")))]
   "TARGET_SIMD"
 {
-  emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
+  emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
 					       operands[2], operands[3],
 					       operands[4], operands[5]));
   DONE;
@@ -2029,7 +2029,7 @@
 	  (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
   "TARGET_SIMD"
 {
-  emit_insn (gen_aarch64_vcond_internal<v_cmp_result> (
+  emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
 						operands[0], operands[1],
 						operands[2], operands[3],
 						operands[4], operands[5]));
@@ -2046,7 +2046,7 @@
 	  (match_operand:VDQ 2 "nonmemory_operand")))]
   "TARGET_SIMD"
 {
-  emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
+  emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
 					       operands[2], operands[3],
 					       operands[4], operands[5]));
   DONE;
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 5945d23..860d4d9 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -83,6 +83,9 @@
 ;; Vector Float modes.
 (define_mode_iterator VDQF [V2SF V4SF V2DF])
 
+;; Modes suitable to use as the return type of a vcond expression.
+(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
+
 ;; All Float modes.
 (define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
index 19ecd63..6c2e2c8 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
@@ -2,12 +2,13 @@
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
 
 #define FTYPE double
+#define ITYPE long
 #define OP ==
 #define INV_OP !=
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
index 30be5ad..5a2109c 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
@@ -2,12 +2,13 @@
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
 
 #define FTYPE float
+#define ITYPE int
 #define OP ==
 #define INV_OP !=
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
index b922833..8fad799 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
@@ -2,12 +2,13 @@
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
 
 #define FTYPE double
+#define ITYPE long
 #define OP >=
 #define INV_OP <
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
index 04d3533..7aab9e6 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
@@ -2,12 +2,13 @@
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
 
 #define FTYPE float
+#define ITYPE int
 #define OP >=
 #define INV_OP <
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
index 421a04a..d26acaa 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
@@ -2,12 +2,13 @@
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
 
 #define FTYPE double
+#define ITYPE long
 #define OP >
 #define INV_OP <=
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
index cdeab14..2797fd1 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
@@ -2,12 +2,13 @@
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
 
 #define FTYPE float
+#define ITYPE int
 #define OP >
 #define INV_OP <=
 
 #include "vect-fcm.x"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
index 803861b..614f0de 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
@@ -13,6 +13,8 @@ FTYPE input2[N] =
  2.0, -4.0, 8.0, -16.0,
  -2.125, 4.25, -8.5, 17.0};
 
+/* Float comparisons, float results.  */
+
 void
 foo (FTYPE *in1, FTYPE *in2, FTYPE *output)
 {
@@ -49,11 +51,52 @@ foobarbar (FTYPE *in1, FTYPE *in2, FTYPE *output)
     output[i] = (in1[i] INV_OP 0.0) ? 4.0 : 2.0;
 }
 
+/* Float comparisons, int results.  */
+
+void
+foo_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = (in1[i] OP in2[i]) ? 2 : 4;
+}
+
+void
+bar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = (in1[i] INV_OP in2[i]) ? 4 : 2;
+}
+
+void
+foobar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = (in1[i] OP 0.0) ? 4 : 2;
+}
+
+void
+foobarbar_int (FTYPE *in1, FTYPE *in2, ITYPE *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = (in1[i] INV_OP 0.0) ? 4 : 2;
+}
+
 int
 main (int argc, char **argv)
 {
   FTYPE out1[N];
   FTYPE out2[N];
+  ITYPE outi1[N];
+  ITYPE outi2[N];
+
   int i = 0;
   foo (input1, input2, out1);
   bar (input1, input2, out2);
@@ -65,6 +108,17 @@ main (int argc, char **argv)
   for (i = 0; i < N; i++)
     if (out1[i] == out2[i])
       abort ();
+
+  foo_int (input1, input2, outi1);
+  bar_int (input1, input2, outi2);
+  for (i = 0; i < N; i++)
+    if (outi1[i] != outi2[i])
+      abort ();
+  foobar_int (input1, input2, outi1);
+  foobarbar_int (input1, input2, outi2);
+  for (i = 0; i < N; i++)
+    if (outi1[i] == outi2[i])
+      abort ();
   return 0;
 }
 

Reply via email to