Hi all,
Given code:
#define MAX(a, b) (a b ? a : b)
void foo (int ilast, float* w, float* w2)
{
int i;
for (i = 0; i ilast; ++i)
{
w[i] = MAX (0.0f, w2[i]);
}
}
compiled with
-O1 -funsafe-math-optimizations -ftree-vectorize -mfpu=neon -mfloat-abi=hard
on
arm-none-eabi will cause an ICE when trying to expand the vcond pattern.
Looking at the vcond pattern in neon.md, the predicate for the
comparison operator (arm_comparison_operator) uses
maybe_get_arm_condition_code
which is not needed for vcond since we don't care about the ARM condition
code
(we can handle all the comparison cases ourselves in the expander).
Changing the predicate to comparison_operator allows the expander to proceed
but it ICEs again because the pattern doesn't handle the floating point
unordered cases! (i.e. UNGT, UNORDERED, UNLE etc).
Adding support for the unordered cases is very similar to the aarch64 port
added
here:
http://gcc.gnu.org/ml/gcc-patches/2013-01/msg00957.html
This patch adapts that code to the arm port.
Added the testcase that exposed the ICE initially and also the UNORDERED and
LTGT
variations of it.
No regressions on arm-none-eabi.
Ok for trunk?
Thanks,
Kyrill
gcc/ChangeLog
2013-03-18 Kyrylo Tkachov kyrylo.tkac...@arm.com
* config/arm/iterators.md (v_cmp_result): New mode attribute.
* config/arm/neon.md (vcondmodemode): Handle unordered cases.
gcc/testsuite/ChangeLog
2013-03-18 Kyrylo Tkachov kyrylo.tkac...@arm.com
* gcc.target/arm/neon-vcond-gt.c: New test.
* gcc.target/arm/neon-vcond-ltgt.c: Likewise.
* gcc.target/arm/neon-vcond-unordered.c: Likewise.diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 252f18b..b3ad42b 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -314,6 +314,12 @@
(V2SF V2SI) (V4SF V4SI)
(DI DI) (V2DI V2DI)])
+(define_mode_attr v_cmp_result [(V8QI v8qi) (V16QI v16qi)
+ (V4HI v4hi) (V8HI v8hi)
+ (V2SI v2si) (V4SI v4si)
+ (DI di) (V2DI v2di)
+ (V2SF v2si) (V4SF v4si)])
+
;; Get element type from double-width mode, for operations where we
;; don't care about signedness.
(define_mode_attr V_if_elem [(V8QI i8) (V16QI i8)
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 79b3f66..99fb5e8 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1721,80 +1721,144 @@
(define_expand vcondmodemode
[(set (match_operand:VDQW 0 s_register_operand )
(if_then_else:VDQW
- (match_operator 3 arm_comparison_operator
+ (match_operator 3 comparison_operator
[(match_operand:VDQW 4 s_register_operand )
(match_operand:VDQW 5 nonmemory_operand )])
(match_operand:VDQW 1 s_register_operand )
(match_operand:VDQW 2 s_register_operand )))]
TARGET_NEON (!Is_float_mode || flag_unsafe_math_optimizations)
{
- rtx mask;
- int inverse = 0, immediate_zero = 0;
- /* See the description of magic bits in the 'T' case of
- arm_print_operand. */
HOST_WIDE_INT magic_word = (MODEmode == V2SFmode || MODEmode == V4SFmode)
? 3 : 1;
rtx magic_rtx = GEN_INT (magic_word);
-
- mask = gen_reg_rtx (V_cmp_resultmode);
-
- if (operands[5] == CONST0_RTX (MODEmode))
-immediate_zero = 1;
- else if (!REG_P (operands[5]))
-operands[5] = force_reg (MODEmode, operands[5]);
-
+ int inverse = 0;
+ int swap_bsl_operands = 0;
+ rtx mask = gen_reg_rtx (V_cmp_resultmode);
+ rtx tmp = gen_reg_rtx (V_cmp_resultmode);
+
+ rtx (*base_comparison) (rtx, rtx, rtx, rtx);
+ rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
+
switch (GET_CODE (operands[3]))
{
case GE:
- emit_insn (gen_neon_vcgemode (mask, operands[4], operands[5],
- magic_rtx));
+case LE:
+case EQ:
+ if (!REG_P (operands[5])
+ (operands[5] != CONST0_RTX (MODEmode)))
+ operands[5] = force_reg (MODEmode, operands[5]);
break;
-
+default:
+ if (!REG_P (operands[5]))
+ operands[5] = force_reg (MODEmode, operands[5]);
+}
+
+ switch (GET_CODE (operands[3]))
+{
+case LT:
+case UNLT:
+ inverse = 1;
+ /* Fall through. */
+case GE:
+case UNGE:
+case ORDERED:
+case UNORDERED:
+ base_comparison = gen_neon_vcgemode;
+ complimentary_comparison = gen_neon_vcgtmode;
+ break;
+case LE:
+case UNLE:
+ inverse = 1;
+ /* Fall through. */
case GT:
- emit_insn (gen_neon_vcgtmode (mask, operands[4], operands[5],
- magic_rtx));
+case UNGT:
+ base_comparison = gen_neon_vcgtmode;
+ complimentary_comparison = gen_neon_vcgemode;
break;
-
case EQ:
- emit_insn