[AArch64 4.8-branch] Backport: Fix CM instruction generation.

James Greenhalgh Wed, 18 Dec 2013 04:40:57 -0800

Hi,

Recently it was pointed out that GCC 4.8 can generate instruction
aliases which are no longer mentioned in the AArch64 portion of the
ARMv8 architecture reference manual.


It turns out that a series of patches to GCC 4.9 actually
corrected this behaviour, though at the time the instructions
were still around, so there was no need to backport the patch.

Now that there is a need I have backported parts of the following
three patches:

[AArch64] Improve description of <F>CM instructions in RTL

  http://gcc.gnu.org/ml/gcc-patches/2013-04/msg01789.html
  Committed as r198490.

{Partial} [AArch64] Remap neon vcmp functions to C/TREE

  http://gcc.gnu.org/ml/gcc-patches/2013-04/msg01791.html
  Committed as r198491

[AArch64 Testsuite] Fix fallout from FCM changes.

  http://gcc.gnu.org/ml/gcc-patches/2013-04/msg01793.html
  And committed as revision 198493.

I can split these out if anyone desires, but they make more sense
as this coherent blob.

Tested on aarch64-none-elf.

OK for 4.8-branch?

Thanks,
James

---
gcc/

2013-12-18  James Greenhalgh  <james.greenha...@arm.com>

        Backport from Mainline.
        2013-05-01  James Greenhalgh  <james.greenha...@arm.com>

        * config/aarch64/aarch64-simd-builtins.def (cmhs): Rename to...
        (cmgeu): ...This.
        (cmhi): Rename to...
        (cmgtu): ...This.
        * config/aarch64/aarch64-simd.md
        (simd_mode): Add SF.
        (aarch64_vcond_internal): Use new names for unsigned comparison insns.
        (aarch64_cm<optab><mode>): Rewrite to not use UNSPECs.
        * config/aarch64/aarch64.md (*cstore<mode>_neg): Rename to...
        (cstore<mode>_neg): ...This.
        * config/aarch64/iterators.md
        (VALLF): new.
        (unspec): Remove UNSPEC_CM<EQ, LE, LT, GE, GT, HS, HI, TST>.
        (COMPARISONS): New.
        (UCOMPARISONS): Likewise.
        (optab): Add missing comparisons.
        (n_optab): New.
        (cmp_1): Likewise.
        (cmp_2): Likewise.
        (CMP): Likewise.
        (cmp): Remove.
        (VCMP_S): Likewise.
        (VCMP_U): Likewise.
        (V_cmp_result): Add DF, SF modes.
        (v_cmp_result): Likewise.
        (v): Likewise.
        (vmtype): Likewise.
        * config/aarch64/predicates.md (aarch64_reg_or_fp_zero): New.

        Partial Backport from mainline.
        2013-05-01  James Greenhalgh  <james.greenha...@arm.com>

        * config/aarch64/arm_neon.h
        (vc<eq, lt, le, gt, ge, tst><qsd>_<u><8,16,32,64>): Remap
        to builtins or C as appropriate.

gcc/testsuite/

2013-12-18  James Greenhalgh  <james.greenha...@arm.com>

        Backport from Mainline
        2013-05-01  James Greenhalgh  <james.greenha...@arm.com>

        * gcc.target/aarch64/scalar_intrinsics.c (force_simd): New.
        (test_vceqd_s64): Force arguments to SIMD registers.
        (test_vceqzd_s64): Likewise.
        (test_vcged_s64): Likewise.
        (test_vcled_s64): Likewise.
        (test_vcgezd_s64): Likewise.
        (test_vcged_u64): Likewise.
        (test_vcgtd_s64): Likewise.
        (test_vcltd_s64): Likewise.
        (test_vcgtzd_s64): Likewise.
        (test_vcgtd_u64): Likewise.
        (test_vclezd_s64): Likewise.
        (test_vcltzd_s64): Likewise.
        (test_vtst_s64): Likewise.
        (test_vtst_u64): Likewise.

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 955da26..ed73c15 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -217,8 +217,8 @@
   BUILTIN_VSDQ_I_DI (BINOP, cmle)
   BUILTIN_VSDQ_I_DI (BINOP, cmlt)
   /* Implemented by aarch64_cm<cmp><mode>.  */
-  BUILTIN_VSDQ_I_DI (BINOP, cmhs)
-  BUILTIN_VSDQ_I_DI (BINOP, cmhi)
+  BUILTIN_VSDQ_I_DI (BINOP, cmgeu)
+  BUILTIN_VSDQ_I_DI (BINOP, cmgtu)
   BUILTIN_VSDQ_I_DI (BINOP, cmtst)
 
   /* Implemented by aarch64_<fmaxmin><mode>.  */
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 00f3c3121f0330713565dfc45247a866e5c7763c..481222cf5287cef42f45d723faa4b28d176171c8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -21,7 +21,7 @@
 
 ; Main data types used by the insntructions
 
-(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,HI,QI"
+(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI"
   (const_string "unknown"))
 
 
@@ -1548,12 +1548,12 @@ (define_expand "aarch64_vcond_internal<m
 
     case LTU:
     case GEU:
-      emit_insn (gen_aarch64_cmhs<mode> (mask, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
       break;
 
     case LEU:
     case GTU:
-      emit_insn (gen_aarch64_cmhi<mode> (mask, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
       break;
 
     case NE:
@@ -3034,48 +3034,181 @@ (define_insn "aarch64_<sur>q<r>shr<u>n_n
 )
 
 
-;; cm(eq|ge|le|lt|gt)
+;; cm(eq|ge|gt|lt|le)
+;; Note, we have constraints for Dz and Z as different expanders
+;; have different ideas of what should be passed to this pattern.
 
-(define_insn "aarch64_cm<cmp><mode>"
+(define_insn "aarch64_cm<optab><mode>"
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
-        (unspec:<V_cmp_result>
-	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w,w")
-	   (match_operand:VSDQ_I_DI 2 "aarch64_simd_reg_or_zero" "w,Z")]
-          VCMP_S))]
+	(neg:<V_cmp_result>
+	  (COMPARISONS:<V_cmp_result>
+	    (match_operand:VDQ 1 "register_operand" "w,w")
+	    (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
+	  )))]
   "TARGET_SIMD"
   "@
-  cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
-  cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
+  cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
+  cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
   [(set_attr "simd_type" "simd_cmp")
    (set_attr "simd_mode" "<MODE>")]
 )
 
-;; cm(hs|hi|tst)
+(define_insn_and_split "aarch64_cm<optab>di"
+  [(set (match_operand:DI 0 "register_operand" "=w,w,r")
+	(neg:DI
+	  (COMPARISONS:DI
+	    (match_operand:DI 1 "register_operand" "w,w,r")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
+	  )))]
+  "TARGET_SIMD"
+  "@
+  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
+  cm<optab>\t%d0, %d1, #0
+  #"
+  "reload_completed
+   /* We need to prevent the split from
+      happening in the 'w' constraint cases.  */
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+  [(set (reg:CC CC_REGNUM)
+    (compare:CC
+      (match_dup 1)
+      (match_dup 2)))
+  (set (match_dup 0)
+    (neg:DI
+      (COMPARISONS:DI
+	(match_operand 3 "cc_register" "")
+	(const_int 0))))]
+  {
+    enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+    DONE;
+  }
+  [(set_attr "simd_type" "simd_cmp")
+   (set_attr "simd_mode" "DI")]
+)
+
+;; cm(hs|hi)
 
-(define_insn "aarch64_cm<cmp><mode>"
+(define_insn "aarch64_cm<optab><mode>"
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
-        (unspec:<V_cmp_result>
-	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
-	   (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
-          VCMP_U))]
+	(neg:<V_cmp_result>
+	  (UCOMPARISONS:<V_cmp_result>
+	    (match_operand:VDQ 1 "register_operand" "w")
+	    (match_operand:VDQ 2 "register_operand" "w")
+	  )))]
   "TARGET_SIMD"
-  "cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
   [(set_attr "simd_type" "simd_cmp")
    (set_attr "simd_mode" "<MODE>")]
 )
 
-;; fcm(eq|ge|le|lt|gt)
+(define_insn_and_split "aarch64_cm<optab>di"
+  [(set (match_operand:DI 0 "register_operand" "=w,r")
+	(neg:DI
+	  (UCOMPARISONS:DI
+	    (match_operand:DI 1 "register_operand" "w,r")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
+	  )))]
+  "TARGET_SIMD"
+  "@
+  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
+  #"
+  "reload_completed
+   /* We need to prevent the split from
+      happening in the 'w' constraint cases.  */
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+  [(set (reg:CC CC_REGNUM)
+    (compare:CC
+      (match_dup 1)
+      (match_dup 2)))
+  (set (match_dup 0)
+    (neg:DI
+      (UCOMPARISONS:DI
+	(match_operand 3 "cc_register" "")
+	(const_int 0))))]
+  {
+    enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+    DONE;
+  }
+  [(set_attr "simd_type" "simd_cmp")
+   (set_attr "simd_mode" "DI")]
+)
+
+;; cmtst
+
+(define_insn "aarch64_cmtst<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
+	(neg:<V_cmp_result>
+	  (ne:<V_cmp_result>
+	    (and:VDQ
+	      (match_operand:VDQ 1 "register_operand" "w")
+	      (match_operand:VDQ 2 "register_operand" "w"))
+	    (vec_duplicate:<V_cmp_result> (const_int 0)))))]
+  "TARGET_SIMD"
+  "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "simd_type" "simd_cmp")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn_and_split "aarch64_cmtstdi"
+  [(set (match_operand:DI 0 "register_operand" "=w,r")
+	(neg:DI
+	  (ne:DI
+	    (and:DI
+	      (match_operand:DI 1 "register_operand" "w,r")
+	      (match_operand:DI 2 "register_operand" "w,r"))
+	    (const_int 0))))]
+  "TARGET_SIMD"
+  "@
+  cmtst\t%d0, %d1, %d2
+  #"
+  "reload_completed
+   /* We need to prevent the split from
+      happening in the 'w' constraint cases.  */
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+   [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:DI (match_dup 1)
+		  (match_dup 2))
+	 (const_int 0)))
+  (set (match_dup 0)
+    (neg:DI
+      (ne:DI
+	(match_operand 3 "cc_register" "")
+	(const_int 0))))]
+  {
+    rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
+    enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
+    rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
+    rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
+    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+    DONE;
+  }
+  [(set_attr "simd_type" "simd_cmp")
+   (set_attr "simd_mode" "DI")]
+)
+
+;; fcm(eq|ge|gt|le|lt)
 
-(define_insn "aarch64_cm<cmp><mode>"
+(define_insn "aarch64_cm<optab><mode>"
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
-	(unspec:<V_cmp_result>
-	  [(match_operand:VDQF 1 "register_operand" "w,w")
-	   (match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")]
-	   VCMP_S))]
+	(neg:<V_cmp_result>
+	  (COMPARISONS:<V_cmp_result>
+	    (match_operand:VALLF 1 "register_operand" "w,w")
+	    (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+	  )))]
   "TARGET_SIMD"
   "@
-  fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
-  fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
+  fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
+  fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
   [(set_attr "simd_type" "simd_fcmp")
    (set_attr "simd_mode" "<MODE>")]
 )
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 04a5e01f9cf0dd3629053c43db28aca52392f1e8..c5cfedb8fa5c639df4959e1d4384c8a02a285bd5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2211,7 +2211,7 @@ (define_insn "*cstoresi_insn_uxtw"
    (set_attr "mode" "SI")]
 )
 
-(define_insn "*cstore<mode>_neg"
+(define_insn "cstore<mode>_neg"
   [(set (match_operand:ALLI 0 "register_operand" "=r")
 	(neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator"
 		  [(match_operand 2 "cc_register" "") (const_int 0)])))]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 2bb42af..d16711d 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -19551,28 +19551,28 @@ vcge_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a,
+  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
 						 (int8x8_t) __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a,
+  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
 						  (int16x4_t) __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a,
+  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
 						  (int32x2_t) __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
+  return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a,
 						(int64x1_t) __b);
 }
 
@@ -19603,28 +19603,28 @@ vcgeq_s64 (int64x2_t __a, int64x2_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a,
+  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
 						   (int8x16_t) __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a,
+  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
 						  (int16x8_t) __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a,
+  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
 						  (int32x4_t) __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a,
+  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
 						  (int64x2_t) __b);
 }
 
@@ -19637,7 +19637,7 @@ vcged_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
+  return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a,
 						(int64x1_t) __b);
 }
 
@@ -19676,28 +19676,28 @@ vcgt_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a,
+  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
 						 (int8x8_t) __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a,
+  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
 						  (int16x4_t) __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a,
+  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
 						  (int32x2_t) __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
+  return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a,
 						(int64x1_t) __b);
 }
 
@@ -19728,28 +19728,28 @@ vcgtq_s64 (int64x2_t __a, int64x2_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a,
+  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
 						   (int8x16_t) __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a,
+  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
 						  (int16x8_t) __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a,
+  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
 						  (int32x4_t) __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a,
+  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
 						  (int64x2_t) __b);
 }
 
@@ -19762,7 +19762,7 @@ vcgtd_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
+  return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a,
 						(int64x1_t) __b);
 }
 
@@ -19801,28 +19801,28 @@ vcle_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b,
+  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
 						 (int8x8_t) __a);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b,
+  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
 						  (int16x4_t) __a);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b,
+  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
 						  (int32x2_t) __a);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b,
+  return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __b,
 						(int64x1_t) __a);
 }
 
@@ -19853,28 +19853,28 @@ vcleq_s64 (int64x2_t __a, int64x2_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b,
+  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
 						   (int8x16_t) __a);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b,
+  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
 						  (int16x8_t) __a);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b,
+  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
 						  (int32x4_t) __a);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b,
+  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
 						  (int64x2_t) __a);
 }
 
@@ -19919,28 +19919,28 @@ vclt_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b,
+  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
 						 (int8x8_t) __a);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b,
+  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
 						  (int16x4_t) __a);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b,
+  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
 						  (int32x2_t) __a);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b,
+  return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __b,
 						(int64x1_t) __a);
 }
 
@@ -19971,28 +19971,28 @@ vcltq_s64 (int64x2_t __a, int64x2_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b,
+  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
 						   (int8x16_t) __a);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b,
+  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
 						  (int16x8_t) __a);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b,
+  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
 						  (int32x4_t) __a);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b,
+  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
 						  (int64x2_t) __a);
 }
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index ce81ac5ce875dca597caaec16720a47fa7f169d9..d19b26a6484dbd43c209466f471ccf1f57eb3354 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -83,6 +83,9 @@ (define_mode_iterator PTR [(SI "Pmode ==
 ;; Vector Float modes.
 (define_mode_iterator VDQF [V2SF V4SF V2DF])
 
+;; All Float modes.
+(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
+
 ;; Vector Float modes with 2 elements.
 (define_mode_iterator V2F [V2SF V2DF])
 
@@ -213,13 +216,6 @@ (define_c_enum "unspec"
     UNSPEC_URSHL	; Used in aarch64-simd.md.
     UNSPEC_SQRSHL	; Used in aarch64-simd.md.
     UNSPEC_UQRSHL	; Used in aarch64-simd.md.
-    UNSPEC_CMEQ		; Used in aarch64-simd.md.
-    UNSPEC_CMLE		; Used in aarch64-simd.md.
-    UNSPEC_CMLT		; Used in aarch64-simd.md.
-    UNSPEC_CMGE		; Used in aarch64-simd.md.
-    UNSPEC_CMGT		; Used in aarch64-simd.md.
-    UNSPEC_CMHS		; Used in aarch64-simd.md.
-    UNSPEC_CMHI		; Used in aarch64-simd.md.
     UNSPEC_SSLI		; Used in aarch64-simd.md.
     UNSPEC_USLI		; Used in aarch64-simd.md.
     UNSPEC_SSRI		; Used in aarch64-simd.md.
@@ -227,7 +223,6 @@ (define_c_enum "unspec"
     UNSPEC_SSHLL	; Used in aarch64-simd.md.
     UNSPEC_USHLL	; Used in aarch64-simd.md.
     UNSPEC_ADDP		; Used in aarch64-simd.md.
-    UNSPEC_CMTST	; Used in aarch64-simd.md.
     UNSPEC_FMAX		; Used in aarch64-simd.md.
     UNSPEC_FMIN		; Used in aarch64-simd.md.
     UNSPEC_BSL		; Used in aarch64-simd.md.
@@ -251,6 +246,7 @@ (define_mode_attr w [(QI "w") (HI "w") (
 
 ;; For scalar usage of vector/FP registers
 (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
+		    (SF "s") (DF "d")
 		    (V8QI "") (V16QI "")
 		    (V4HI "") (V8HI "")
 		    (V2SI "") (V4SI  "")
@@ -305,7 +301,8 @@ (define_mode_attr Vmtype [(V8QI ".8b") (
 			 (V4SF ".4s") (V2DF ".2d")
 			 (DI   "")    (SI   "")
 			 (HI   "")    (QI   "")
-			 (TI   "")])
+			 (TI   "")    (SF   "")
+			 (DF   "")])
 
 ;; Register suffix narrowed modes for VQN.
 (define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
@@ -444,7 +441,8 @@ (define_mode_attr V_cmp_result [(V8QI "V
 				(V2SI "V2SI") (V4SI  "V4SI")
 				(DI   "DI")   (V2DI  "V2DI")
 				(V2SF "V2SI") (V4SF  "V4SI")
-				(V2DF "V2DI")])
+				(V2DF "V2DI") (DF    "DI")
+				(SF   "SI")])
 
 ;; Lower case mode of results of comparison operations.
 (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
@@ -452,7 +450,8 @@ (define_mode_attr v_cmp_result [(V8QI "v
 				(V2SI "v2si") (V4SI  "v4si")
 				(DI   "di")   (V2DI  "v2di")
 				(V2SF "v2si") (V4SF  "v4si")
-				(V2DF "v2di")])
+				(V2DF "v2di") (DF    "di")
+				(SF   "si")])
 
 ;; Vm for lane instructions is restricted to FP_LO_REGS.
 (define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
@@ -543,6 +542,12 @@ (define_code_iterator UNQOPS [ss_neg ss_
 ;; Code iterator for signed variants of vector saturating binary ops.
 (define_code_iterator SBINQOPS [ss_plus ss_minus])
 
+;; Comparison operators for <F>CM.
+(define_code_iterator COMPARISONS [lt le eq ge gt])
+
+;; Unsigned comparison operators.
+(define_code_iterator UCOMPARISONS [ltu leu geu gtu])
+
 ;; -------------------------------------------------------------------
 ;; Code Attributes
 ;; -------------------------------------------------------------------
@@ -571,7 +576,28 @@ (define_code_attr optab [(ashift "ashl")
 			 (eq "eq")
 			 (ne "ne")
 			 (lt "lt")
-			 (ge "ge")])
+			 (ge "ge")
+			 (le "le")
+			 (gt "gt")
+			 (ltu "ltu")
+			 (leu "leu")
+			 (geu "geu")
+			 (gtu "gtu")])
+
+;; For comparison operators we use the FCM* and CM* instructions.
+;; As there are no CMLE or CMLT instructions which act on 3 vector
+;; operands, we must use CMGE or CMGT and swap the order of the
+;; source operands.
+
+(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt")
+			   (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")])
+(define_code_attr cmp_1   [(lt "2") (le "2") (eq "1") (ge "1") (gt "1")
+			   (ltu "2") (leu "2") (geu "1") (gtu "1")])
+(define_code_attr cmp_2   [(lt "1") (le "1") (eq "2") (ge "2") (gt "2")
+			   (ltu "1") (leu "1") (geu "2") (gtu "2")])
+
+(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT")
+			   (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")])
 
 ;; Optab prefix for sign/zero-extending operations
 (define_code_attr su_optab [(sign_extend "") (zero_extend "u")
@@ -680,11 +706,6 @@ (define_int_iterator VQSHRN_N [UNSPEC_SQ
                                UNSPEC_SQSHRN UNSPEC_UQSHRN
                                UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
 
-(define_int_iterator VCMP_S [UNSPEC_CMEQ UNSPEC_CMGE UNSPEC_CMGT
-			     UNSPEC_CMLE UNSPEC_CMLT])
-
-(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
-
 (define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
 			      UNSPEC_TRN1 UNSPEC_TRN2
 			      UNSPEC_UZP1 UNSPEC_UZP2])
@@ -768,12 +789,6 @@ (define_int_attr addsub [(UNSPEC_SHADD "
 			 (UNSPEC_RADDHN2 "add")
 			 (UNSPEC_RSUBHN2 "sub")])
 
-(define_int_attr cmp [(UNSPEC_CMGE "ge") (UNSPEC_CMGT "gt")
-		      (UNSPEC_CMLE "le") (UNSPEC_CMLT "lt")
-                      (UNSPEC_CMEQ "eq")
-		      (UNSPEC_CMHS "hs") (UNSPEC_CMHI "hi")
-		      (UNSPEC_CMTST "tst")])
-
 (define_int_attr offsetlr [(UNSPEC_SSLI	"1") (UNSPEC_USLI "1")
 			   (UNSPEC_SSRI	"0") (UNSPEC_USRI "0")])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 8f80b20281137c97827d089c6cacf42a0c58c5f7..8514e8f8fbdda73ecec5d79a336b54a2bd7be4f3 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -31,6 +31,11 @@ (define_predicate "aarch64_reg_or_zero"
        (ior (match_operand 0 "register_operand")
 	    (match_test "op == const0_rtx"))))
 
+(define_predicate "aarch64_reg_or_fp_zero"
+  (and (match_code "reg,subreg,const_double")
+       (ior (match_operand 0 "register_operand")
+	    (match_test "aarch64_float_const_zero_rtx_p (op)"))))
+
 (define_predicate "aarch64_reg_zero_or_m1_or_1"
   (and (match_code "reg,subreg,const_int")
        (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
index 1b85308..e984b8e 100644
--- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -1,7 +1,13 @@
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
 
-#include "../../../config/aarch64/arm_neon.h"
+#include <arm_neon.h>
+
+/* Used to force a variable to a SIMD register.  */
+#define force_simd(V1)   asm volatile ("mov %d0, %d1"		\
+	   : "=w"(V1)						\
+	   : "w"(V1)						\
+	   : /* No clobbers */);
 
 /* { dg-final { scan-assembler-times "\\tadd\\tx\[0-9\]+" 2 } } */
 
@@ -31,7 +37,12 @@ test_vaddd_s64_2 (int64x1_t a, int64x1_t b, int64x1_t c, int64x1_t d)
 uint64x1_t
 test_vceqd_s64 (int64x1_t a, int64x1_t b)
 {
-  return vceqd_s64 (a, b);
+  uint64x1_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vceqd_s64 (a, b);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
@@ -39,7 +50,11 @@ test_vceqd_s64 (int64x1_t a, int64x1_t b)
 uint64x1_t
 test_vceqzd_s64 (int64x1_t a)
 {
-  return vceqzd_s64 (a);
+  uint64x1_t res;
+  force_simd (a);
+  res = vceqzd_s64 (a);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
@@ -47,21 +62,36 @@ test_vceqzd_s64 (int64x1_t a)
 uint64x1_t
 test_vcged_s64 (int64x1_t a, int64x1_t b)
 {
-  return vcged_s64 (a, b);
+  uint64x1_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vcged_s64 (a, b);
+  force_simd (res);
+  return res;
 }
 
 uint64x1_t
 test_vcled_s64 (int64x1_t a, int64x1_t b)
 {
-  return vcled_s64 (a, b);
+  uint64x1_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vcled_s64 (a, b);
+  force_simd (res);
+  return res;
 }
 
-/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
+/* Idiom recognition will cause this testcase not to generate
+   the expected cmge instruction, so do not check for it.  */
 
 uint64x1_t
 test_vcgezd_s64 (int64x1_t a)
 {
-  return vcgezd_s64 (a);
+  uint64x1_t res;
+  force_simd (a);
+  res = vcgezd_s64 (a);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tcmhs\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
@@ -69,7 +99,12 @@ test_vcgezd_s64 (int64x1_t a)
 uint64x1_t
 test_vcged_u64 (uint64x1_t a, uint64x1_t b)
 {
-  return vcged_u64 (a, b);
+  uint64x1_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vcged_u64 (a, b);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
@@ -77,13 +112,23 @@ test_vcged_u64 (uint64x1_t a, uint64x1_t b)
 uint64x1_t
 test_vcgtd_s64 (int64x1_t a, int64x1_t b)
 {
-  return vcgtd_s64 (a, b);
+  uint64x1_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vcgtd_s64 (a, b);
+  force_simd (res);
+  return res;
 }
 
 uint64x1_t
 test_vcltd_s64 (int64x1_t a, int64x1_t b)
 {
-  return vcltd_s64 (a, b);
+  uint64x1_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vcltd_s64 (a, b);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
@@ -91,7 +136,11 @@ test_vcltd_s64 (int64x1_t a, int64x1_t b)
 uint64x1_t
 test_vcgtzd_s64 (int64x1_t a)
 {
-  return vcgtzd_s64 (a);
+  uint64x1_t res;
+  force_simd (a);
+  res = vcgtzd_s64 (a);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tcmhi\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
@@ -99,7 +148,12 @@ test_vcgtzd_s64 (int64x1_t a)
 uint64x1_t
 test_vcgtd_u64 (uint64x1_t a, uint64x1_t b)
 {
-  return vcgtd_u64 (a, b);
+  uint64x1_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vcgtd_u64 (a, b);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tcmle\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
@@ -107,15 +161,24 @@ test_vcgtd_u64 (uint64x1_t a, uint64x1_t b)
 uint64x1_t
 test_vclezd_s64 (int64x1_t a)
 {
-  return vclezd_s64 (a);
+  uint64x1_t res;
+  force_simd (a);
+  res = vclezd_s64 (a);
+  force_simd (res);
+  return res;
 }
 
-/* { dg-final { scan-assembler-times "\\tcmlt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
+/* Idiom recognition will cause this testcase not to generate
+   the expected cmlt instruction, so do not check for it.  */
 
 uint64x1_t
 test_vcltzd_s64 (int64x1_t a)
 {
-  return vcltzd_s64 (a);
+  uint64x1_t res;
+  force_simd (a);
+  res = vcltzd_s64 (a);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\tdup\\tb\[0-9\]+, v\[0-9\]+\.b" 2 } } */
@@ -179,13 +242,23 @@ test_vdupd_lane_u64 (uint64x2_t a)
 int64x1_t
 test_vtst_s64 (int64x1_t a, int64x1_t b)
 {
-  return vtstd_s64 (a, b);
+  uint64x1_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vtstd_s64 (a, b);
+  force_simd (res);
+  return res;
 }
 
 uint64x1_t
 test_vtst_u64 (uint64x1_t a, uint64x1_t b)
 {
-  return vtstd_u64 (a, b);
+  uint64x1_t res;
+  force_simd (a);
+  force_simd (b);
+  res = vtstd_s64 (a, b);
+  force_simd (res);
+  return res;
 }
 
 /* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
@@ -722,7 +795,10 @@ test_vrshld_u64 (uint64x1_t a, uint64x1_t b)
   return vrshld_u64 (a, b);
 }
 
-/* { dg-final { scan-assembler-times "\\tasr\\tx\[0-9\]+" 1 } } */
+/* Other intrinsics can generate an asr instruction (vcltzd, vcgezd),
+   so we cannot check scan-assembler-times.  */
+
+/* { dg-final { scan-assembler "\\tasr\\tx\[0-9\]+" } } */
 
 int64x1_t
 test_vshrd_n_s64 (int64x1_t a)

[AArch64 4.8-branch] Backport: Fix CM instruction generation.

Reply via email to