Hi,
In previous BE patches the way lane indexing in lanes is calculated has
been changed. To accommodate the change, arm neon intrinsics had to be
updated.
Is it okay?
/gcc/
2014-01-16 James Greenhalgh
Alex Velenko
* config/aarch64/arm_neon.h (vaddv_s8): __LANE0 cleanup.
(vaddv_s16): Likewise.
(vaddv_s32): Likewise.
(vaddv_u8): Likewise.
(vaddv_u16): Likewise.
(vaddv_u32): Likewise.
(vaddvq_s8): Likewise.
(vaddvq_s16): Likewise.
(vaddvq_s32): Likewise.
(vaddvq_s64): Likewise.
(vaddvq_u8): Likewise.
(vaddvq_u16): Likewise.
(vaddvq_u32): Likewise.
(vaddvq_u64): Likewise.
(vaddv_f32): Likewise.
(vaddvq_f32): Likewise.
(vaddvq_f64): Likewise.
(vmaxv_f32): Likewise.
(vmaxv_s8): Likewise.
(vmaxv_s16): Likewise.
(vmaxv_s32): Likewise.
(vmaxv_u8): Likewise.
(vmaxv_u16): Likewise.
(vmaxv_u32): Likewise.
(vmaxvq_f32): Likewise.
(vmaxvq_f64): Likewise.
(vmaxvq_s8): Likewise.
(vmaxvq_s16): Likewise.
(vmaxvq_s32): Likewise.
(vmaxvq_u8): Likewise.
(vmaxvq_u16): Likewise.
(vmaxvq_u32): Likewise.
(vmaxnmv_f32): Likewise.
(vmaxnmvq_f32): Likewise.
(vmaxnmvq_f64): Likewise.
(vminv_f32): Likewise.
(vminv_s8): Likewise.
(vminv_s16): Likewise.
(vminv_s32): Likewise.
(vminv_u8): Likewise.
(vminv_u16): Likewise.
(vminv_u32): Likewise.
(vminvq_f32): Likewise.
(vminvq_f64): Likewise.
(vminvq_s8): Likewise.
(vminvq_s16): Likewise.
(vminvq_s32): Likewise.
(vminvq_u8): Likewise.
(vminvq_u16): Likewise.
(vminvq_u32): Likewise.
(vminnmv_f32): Likewise.
(vminnmvq_f32): Likewise.
(vminnmvq_f64): Likewise.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 33816d4381c8cf271fc4a85db6cc668f6c031dd8..568ade61653d213da5c1826c970ee350e1fdee97 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -15307,30 +15307,24 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
return __a + __b;
}
-#if __AARCH64EB__
-#define __LANE0(__t) ((__t) - 1)
-#else
-#define __LANE0(__t) 0
-#endif
-
/* vaddv */
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddv_s8 (int8x8_t __a)
{
- return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), __LANE0 (8));
+ return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddv_s16 (int16x4_t __a)
{
- return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), __LANE0 (4));
+ return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddv_s32 (int32x2_t __a)
{
- return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), __LANE0 (2));
+ return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
@@ -15338,7 +15332,7 @@ vaddv_u8 (uint8x8_t __a)
{
return vget_lane_u8 ((uint8x8_t)
__builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
- __LANE0 (8));
+ 0);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
@@ -15346,7 +15340,7 @@ vaddv_u16 (uint16x4_t __a)
{
return vget_lane_u16 ((uint16x4_t)
__builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
- __LANE0 (4));
+ 0);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
@@ -15354,32 +15348,32 @@ vaddv_u32 (uint32x2_t __a)
{
return vget_lane_u32 ((uint32x2_t)
__builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
- __LANE0 (2));
+ 0);
}
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddvq_s8 (int8x16_t __a)
{
return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
- __LANE0 (16));
+ 0);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddvq_s16 (int16x8_t __a)
{
- return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), __LANE0 (8));
+ return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddvq_s32 (int32x4_t __a)
{
- return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), __LANE0 (4));
+ return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddvq_s64 (int64x2_t __a)
{
- return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), __LANE0 (2));
+ return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
}
__extension__ static __inline uint8_t __attribute__ ((__always_inlin