Hi, For a statement like:
INT = FLOAT > FLOAT ? INT : INT. The vcond implementation in AArch64 is broken. We will try to force the INT value to a FLOAT register and will ICE. This patch fixes this. Regression suite run for aarch64-none-elf with no regressions, and more cases added to the testsuite to ensure this is caught in future. Thanks, James Greenhalgh --- gcc/ * config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode>): Rename to... (aarch64_vcond_internal<mode><mode>): ...This, for integer modes. (aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>): ...This for float modes. Clarify all iterator modes. (vcond<mode><mode>): Use new name for vcond expanders. (vcond<v_cmp_result><mode>): Likewise. (vcondu<mode><mode>: Likewise. * config/aarch64/iterators.md (VDQF_COND): New. gcc/testsuite/ * gcc.target/aarch64/vect-fcm.x: Add cases testing FLOAT cmp FLOAT ? INT : INT. * gcc.target/aarch64/vect-fcm-eq-d.c: Define IMODE. * gcc.target/aarch64/vect-fcm-eq-f.c: Likewise. * gcc.target/aarch64/vect-fcm-ge-d.c: Likewise. * gcc.target/aarch64/vect-fcm-ge-f.c: Likewise. * gcc.target/aarch64/vect-fcm-gt-d.c: Likewise. * gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 5626b55..6bc7dd7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1725,7 +1725,7 @@ DONE; }) -(define_expand "aarch64_vcond_internal<mode>" +(define_expand "aarch64_vcond_internal<mode><mode>" [(set (match_operand:VDQ 0 "register_operand") (if_then_else:VDQ (match_operator 3 "comparison_operator" @@ -1820,14 +1820,14 @@ DONE; }) -(define_expand "aarch64_vcond_internal<mode>" - [(set (match_operand:VDQF 0 "register_operand") +(define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>" + [(set (match_operand:VDQF_COND 0 "register_operand") (if_then_else:VDQF (match_operator 3 "comparison_operator" [(match_operand:VDQF 4 "register_operand") (match_operand:VDQF 5 "nonmemory_operand")]) - (match_operand:VDQF 1 "nonmemory_operand") - (match_operand:VDQF 2 "nonmemory_operand")))] + (match_operand:VDQF_COND 1 "nonmemory_operand") + (match_operand:VDQF_COND 2 "nonmemory_operand")))] "TARGET_SIMD" { int inverse = 0; @@ -1835,8 +1835,8 @@ int swap_bsl_operands = 0; rtx op1 = operands[1]; rtx op2 = operands[2]; - rtx mask = gen_reg_rtx (<V_cmp_result>mode); - rtx tmp = gen_reg_rtx (<V_cmp_result>mode); + rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode); + rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode); rtx (*base_comparison) (rtx, rtx, rtx); rtx (*complimentary_comparison) (rtx, rtx, rtx); @@ -1856,7 +1856,7 @@ /* Fall through. */ default: if (!REG_P (operands[5])) - operands[5] = force_reg (<MODE>mode, operands[5]); + operands[5] = force_reg (<VDQF:MODE>mode, operands[5]); } switch (GET_CODE (operands[3])) @@ -1869,8 +1869,8 @@ case UNGE: case ORDERED: case UNORDERED: - base_comparison = gen_aarch64_cmge<mode>; - complimentary_comparison = gen_aarch64_cmgt<mode>; + base_comparison = gen_aarch64_cmge<VDQF:mode>; + complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>; break; case LE: case UNLE: @@ -1878,14 +1878,14 @@ /* Fall through. */ case GT: case UNGT: - base_comparison = gen_aarch64_cmgt<mode>; - complimentary_comparison = gen_aarch64_cmge<mode>; + base_comparison = gen_aarch64_cmgt<VDQF:mode>; + complimentary_comparison = gen_aarch64_cmge<VDQF:mode>; break; case EQ: case NE: case UNEQ: - base_comparison = gen_aarch64_cmeq<mode>; - complimentary_comparison = gen_aarch64_cmeq<mode>; + base_comparison = gen_aarch64_cmeq<VDQF:mode>; + complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>; break; default: gcc_unreachable (); @@ -1913,10 +1913,10 @@ switch (GET_CODE (operands[3])) { case LT: - base_comparison = gen_aarch64_cmlt<mode>; + base_comparison = gen_aarch64_cmlt<VDQF:mode>; break; case LE: - base_comparison = gen_aarch64_cmle<mode>; + base_comparison = gen_aarch64_cmle<VDQF:mode>; break; default: /* Do nothing, other zero form cases already have the correct @@ -1959,9 +1959,9 @@ true iff !(a != b && a ORDERED b), swapping the operands to BSL will then give us (a == b || a UNORDERED b) as intended. */ - emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5])); - emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[5], operands[4])); - emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); + emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5])); + emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4])); + emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp)); swap_bsl_operands = 1; break; case UNORDERED: @@ -1970,9 +1970,9 @@ swap_bsl_operands = 1; /* Fall through. */ case ORDERED: - emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[4], operands[5])); - emit_insn (gen_aarch64_cmge<mode> (mask, operands[5], operands[4])); - emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); + emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5])); + emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4])); + emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp)); break; default: gcc_unreachable (); @@ -1987,16 +1987,16 @@ /* If we have (a = (b CMP c) ? -1 : 0); Then we can simply move the generated mask. */ - if (op1 == CONSTM1_RTX (<V_cmp_result>mode) - && op2 == CONST0_RTX (<V_cmp_result>mode)) + if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode) + && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode)) emit_move_insn (operands[0], mask); else { if (!REG_P (op1)) - op1 = force_reg (<MODE>mode, op1); + op1 = force_reg (<VDQF_COND:MODE>mode, op1); if (!REG_P (op2)) - op2 = force_reg (<MODE>mode, op2); - emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, + op2 = force_reg (<VDQF_COND:MODE>mode, op2); + emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask, op1, op2)); } @@ -2013,7 +2013,7 @@ (match_operand:VALL 2 "nonmemory_operand")))] "TARGET_SIMD" { - emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1], + emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1], operands[2], operands[3], operands[4], operands[5])); DONE; @@ -2029,7 +2029,7 @@ (match_operand:<V_cmp_result> 2 "nonmemory_operand")))] "TARGET_SIMD" { - emit_insn (gen_aarch64_vcond_internal<v_cmp_result> ( + emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> ( operands[0], operands[1], operands[2], operands[3], operands[4], operands[5])); @@ -2046,7 +2046,7 @@ (match_operand:VDQ 2 "nonmemory_operand")))] "TARGET_SIMD" { - emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1], + emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1], operands[2], operands[3], operands[4], operands[5])); DONE; diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 5945d23..860d4d9 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -83,6 +83,9 @@ ;; Vector Float modes. (define_mode_iterator VDQF [V2SF V4SF V2DF]) +;; Modes suitable to use as the return type of a vcond expression. +(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI]) + ;; All Float modes. (define_mode_iterator VALLF [V2SF V4SF V2DF SF DF]) diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c index 19ecd63..6c2e2c8 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE double +#define ITYPE long #define OP == #define INV_OP != #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c index 30be5ad..5a2109c 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE float +#define ITYPE int #define OP == #define INV_OP != #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c index b922833..8fad799 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE double +#define ITYPE long #define OP >= #define INV_OP < #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ /* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c index 04d3533..7aab9e6 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE float +#define ITYPE int #define OP >= #define INV_OP < #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ /* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c index 421a04a..d26acaa 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE double +#define ITYPE long #define OP > #define INV_OP <= #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ /* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c index cdeab14..2797fd1 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE float +#define ITYPE int #define OP > #define INV_OP <= #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ /* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x index 803861b..614f0de 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x @@ -13,6 +13,8 @@ FTYPE input2[N] = 2.0, -4.0, 8.0, -16.0, -2.125, 4.25, -8.5, 17.0}; +/* Float comparisons, float results. */ + void foo (FTYPE *in1, FTYPE *in2, FTYPE *output) { @@ -49,11 +51,52 @@ foobarbar (FTYPE *in1, FTYPE *in2, FTYPE *output) output[i] = (in1[i] INV_OP 0.0) ? 4.0 : 2.0; } +/* Float comparisons, int results. */ + +void +foo_int (FTYPE *in1, FTYPE *in2, ITYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] OP in2[i]) ? 2 : 4; +} + +void +bar_int (FTYPE *in1, FTYPE *in2, ITYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] INV_OP in2[i]) ? 4 : 2; +} + +void +foobar_int (FTYPE *in1, FTYPE *in2, ITYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] OP 0.0) ? 4 : 2; +} + +void +foobarbar_int (FTYPE *in1, FTYPE *in2, ITYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] INV_OP 0.0) ? 4 : 2; +} + int main (int argc, char **argv) { FTYPE out1[N]; FTYPE out2[N]; + ITYPE outi1[N]; + ITYPE outi2[N]; + int i = 0; foo (input1, input2, out1); bar (input1, input2, out2); @@ -65,6 +108,17 @@ main (int argc, char **argv) for (i = 0; i < N; i++) if (out1[i] == out2[i]) abort (); + + foo_int (input1, input2, outi1); + bar_int (input1, input2, outi2); + for (i = 0; i < N; i++) + if (outi1[i] != outi2[i]) + abort (); + foobar_int (input1, input2, outi1); + foobarbar_int (input1, input2, outi2); + for (i = 0; i < N; i++) + if (outi1[i] == outi2[i]) + abort (); return 0; }