Re: [PATCH 1/2][GCC][AArch64] Update Armv8.4-a's FP16 FML intrinsics

James Greenhalgh Thu, 21 Feb 2019 14:48:49 -0800

On Wed, Feb 20, 2019 at 08:00:13AM -0600, Tamar Christina wrote:
> Hi All,
> 
> This patch updates the Armv8.4-a FP16 FML intrinsics's suffixes from u32 to 
> f16
> to be more consistent with the naming convention for intrinsics.
> 
> The specifications for these intrinsics have not been published yet so we do
> not need to maintain the old names.
> 
> The patch was created with the following script:
> 
> grep -lIE "(vfml[as].+)_u32" -r gcc/ | grep -iEv ".+Changelog.*" \
>   | xargs sed -i -E -e "s/(vfml[as].+)_u32/\1_f16/g"


Big bonus points for including this!

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for trunk? and eventual backport to GCC 8?

Woops.

Yes, OK for trunk and backport it please.

Thanks,
James

> gcc/ChangeLog:
> 
> 2019-02-20  Tamar Christina  <tamar.christ...@arm.com>
> 
>       * config/aarch64/arm_neon.h (vfmlal_low_u32, vfmlsl_low_u32,
>       vfmlalq_low_u32, vfmlslq_low_u32, vfmlal_high_u32, vfmlsl_high_u32,
>       vfmlalq_high_u32, vfmlslq_high_u32, vfmlal_lane_low_u32,
>       vfmlsl_lane_low_u32, vfmlal_laneq_low_u32, vfmlsl_laneq_low_u32,
>       vfmlalq_lane_low_u32, vfmlslq_lane_low_u32, vfmlalq_laneq_low_u32,
>       vfmlslq_laneq_low_u32, vfmlal_lane_high_u32, vfmlsl_lane_high_u32,
>       vfmlal_laneq_high_u32, vfmlsl_laneq_high_u32, vfmlalq_lane_high_u32,
>       vfmlslq_lane_high_u32, vfmlalq_laneq_high_u32, vfmlslq_laneq_high_u32):
>       Rename ...
>       (vfmlal_low_f16, vfmlsl_low_f16, vfmlalq_low_f16, vfmlslq_low_f16,
>       vfmlal_high_f16, vfmlsl_high_f16, vfmlalq_high_f16, vfmlslq_high_f16,
>       vfmlal_lane_low_f16, vfmlsl_lane_low_f16, vfmlal_laneq_low_f16,
>       vfmlsl_laneq_low_f16, vfmlalq_lane_low_f16, vfmlslq_lane_low_f16,
>       vfmlalq_laneq_low_f16, vfmlslq_laneq_low_f16, vfmlal_lane_high_f16,
>       vfmlsl_lane_high_f16, vfmlal_laneq_high_f16, vfmlsl_laneq_high_f16,
>       vfmlalq_lane_high_f16, vfmlslq_lane_high_f16, vfmlalq_laneq_high_f16,
>       vfmlslq_laneq_high_f16): ... To this.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-02-20  Tamar Christina  <tamar.christ...@arm.com>
> 
>       * gcc.target/aarch64/fp16_fmul_high.h (test_vfmlal_high_u32,
>       test_vfmlalq_high_u32, test_vfmlsl_high_u32, test_vfmlslq_high_u32):
>       Rename ...
>       (test_vfmlal_high_f16, test_vfmlalq_high_f16, test_vfmlsl_high_f16,
>       test_vfmlslq_high_f16): ... To this.
>       * gcc.target/aarch64/fp16_fmul_lane_high.h (test_vfmlal_lane_high_u32,
>       tets_vfmlsl_lane_high_u32, test_vfmlal_laneq_high_u32,
>       test_vfmlsl_laneq_high_u32, test_vfmlalq_lane_high_u32,
>       test_vfmlslq_lane_high_u32, test_vfmlalq_laneq_high_u32,
>       test_vfmlslq_laneq_high_u32): Rename ...
>       (test_vfmlal_lane_high_f16, tets_vfmlsl_lane_high_f16,
>       test_vfmlal_laneq_high_f16, test_vfmlsl_laneq_high_f16,
>       test_vfmlalq_lane_high_f16, test_vfmlslq_lane_high_f16,
>       test_vfmlalq_laneq_high_f16, test_vfmlslq_laneq_high_f16): ... To this.
>       * gcc.target/aarch64/fp16_fmul_lane_low.h (test_vfmlal_lane_low_u32,
>       test_vfmlsl_lane_low_u32, test_vfmlal_laneq_low_u32,
>       test_vfmlsl_laneq_low_u32, test_vfmlalq_lane_low_u32,
>       test_vfmlslq_lane_low_u32, test_vfmlalq_laneq_low_u32,
>       test_vfmlslq_laneq_low_u32): Rename ...
>       (test_vfmlal_lane_low_f16, test_vfmlsl_lane_low_f16,
>       test_vfmlal_laneq_low_f16, test_vfmlsl_laneq_low_f16,
>       test_vfmlalq_lane_low_f16, test_vfmlslq_lane_low_f16,
>       test_vfmlalq_laneq_low_f16, test_vfmlslq_laneq_low_f16): ... To this.
>       * gcc.target/aarch64/fp16_fmul_low.h (test_vfmlal_low_u32,
>       test_vfmlalq_low_u32, test_vfmlsl_low_u32, test_vfmlslq_low_u32):
>       Rename ...
>       (test_vfmlal_low_f16, test_vfmlalq_low_f16, test_vfmlsl_low_f16,
>       test_vfmlslq_low_f16): ... To This.
>       * lib/target-supports.exp
>       (check_effective_target_arm_fp16fml_neon_ok_nocache): Update test.
> 
> -- 

> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 
> f405a325cf5f3f8970e5f4b78322335c280fa7a4..314ef30187d1ba1882eaf5c610770d380344e920
>  100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -33777,63 +33777,63 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, 
> float32x4_t __a, float32x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
> +vfmlal_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
>  {
>    return __builtin_aarch64_fmlal_lowv2sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
> +vfmlsl_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
>  {
>    return __builtin_aarch64_fmlsl_lowv2sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
> +vfmlalq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
>  {
>    return __builtin_aarch64_fmlalq_lowv4sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
> +vfmlslq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
>  {
>    return __builtin_aarch64_fmlslq_lowv4sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
> +vfmlal_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
>  {
>    return __builtin_aarch64_fmlal_highv2sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
> +vfmlsl_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
>  {
>    return __builtin_aarch64_fmlsl_highv2sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
> +vfmlalq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
>  {
>    return __builtin_aarch64_fmlalq_highv4sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
> +vfmlslq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
>  {
>    return __builtin_aarch64_fmlslq_highv4sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
> +vfmlal_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>                    const int __lane)
>  {
>    return __builtin_aarch64_fmlal_lane_lowv2sf (__r, __a, __b, __lane);
> @@ -33841,7 +33841,7 @@ vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t 
> __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
> +vfmlsl_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>                    const int __lane)
>  {
>    return __builtin_aarch64_fmlsl_lane_lowv2sf (__r, __a, __b, __lane);
> @@ -33849,7 +33849,7 @@ vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t 
> __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
> +vfmlal_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlal_laneq_lowv2sf (__r, __a, __b, __lane);
> @@ -33857,7 +33857,7 @@ vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t 
> __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
> +vfmlsl_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlsl_laneq_lowv2sf (__r, __a, __b, __lane);
> @@ -33865,7 +33865,7 @@ vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t 
> __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
> +vfmlalq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlalq_lane_lowv4sf (__r, __a, __b, __lane);
> @@ -33873,7 +33873,7 @@ vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t 
> __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
> +vfmlslq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlslq_lane_lowv4sf (__r, __a, __b, __lane);
> @@ -33881,7 +33881,7 @@ vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t 
> __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
> +vfmlalq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>                      const int __lane)
>  {
>    return __builtin_aarch64_fmlalq_laneq_lowv4sf (__r, __a, __b, __lane);
> @@ -33889,7 +33889,7 @@ vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t 
> __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
> +vfmlslq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlslq_laneq_lowv4sf (__r, __a, __b, __lane);
> @@ -33897,7 +33897,7 @@ vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t 
> __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
> +vfmlal_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>                    const int __lane)
>  {
>    return __builtin_aarch64_fmlal_lane_highv2sf (__r, __a, __b, __lane);
> @@ -33905,7 +33905,7 @@ vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t 
> __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
> +vfmlsl_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>                    const int __lane)
>  {
>    return __builtin_aarch64_fmlsl_lane_highv2sf (__r, __a, __b, __lane);
> @@ -33913,7 +33913,7 @@ vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t 
> __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
> +vfmlal_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlal_laneq_highv2sf (__r, __a, __b, __lane);
> @@ -33921,7 +33921,7 @@ vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t 
> __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
> +vfmlsl_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlsl_laneq_highv2sf (__r, __a, __b, __lane);
> @@ -33929,7 +33929,7 @@ vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t 
> __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
> +vfmlalq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlalq_lane_highv4sf (__r, __a, __b, __lane);
> @@ -33937,7 +33937,7 @@ vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t 
> __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
> +vfmlslq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlslq_lane_highv4sf (__r, __a, __b, __lane);
> @@ -33945,7 +33945,7 @@ vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t 
> __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
> +vfmlalq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>                      const int __lane)
>  {
>    return __builtin_aarch64_fmlalq_laneq_highv4sf (__r, __a, __b, __lane);
> @@ -33953,7 +33953,7 @@ vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t 
> __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
> +vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>                     const int __lane)
>  {
>    return __builtin_aarch64_fmlslq_laneq_highv4sf (__r, __a, __b, __lane);
> diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h 
> b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
> index 
> 9c86bd19153cc0888f7b28f36d141b9fe08f535e..def85038a7208725ecb1db0888a1cc651aaa4934
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
> +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
> @@ -1,25 +1,25 @@
>  #include "arm_neon.h"
>  
>  float32x2_t
> -test_vfmlal_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlal_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlal_high_u32 (r, a, b);
> +  return vfmlal_high_f16 (r, a, b);
>  }
>  
>  float32x4_t
> -test_vfmlalq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlalq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlalq_high_u32 (r, a, b);
> +  return vfmlalq_high_f16 (r, a, b);
>  }
>  
>  float32x2_t
> -test_vfmlsl_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlsl_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlsl_high_u32 (r, a, b);
> +  return vfmlsl_high_f16 (r, a, b);
>  }
>  
>  float32x4_t
> -test_vfmlslq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlslq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlslq_high_u32 (r, a, b);
> +  return vfmlslq_high_f16 (r, a, b);
>  }
> diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h 
> b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
> index 
> 1039347865e0bc79dfe351fd52f36964e7c41188..a0b95f8b81e4799a6075b0f0fca6834f73de0dc8
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
> +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
> @@ -1,49 +1,49 @@
>  #include "arm_neon.h"
>  
>  float32x2_t
> -test_vfmlal_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlal_lane_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlal_lane_high_u32 (r, a, b, 0);
> +  return vfmlal_lane_high_f16 (r, a, b, 0);
>  }
>  
>  float32x2_t
> -tets_vfmlsl_lane_high_u32  (float32x2_t r, float16x4_t a, float16x4_t b)
> +tets_vfmlsl_lane_high_f16  (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlsl_lane_high_u32 (r, a, b, 0);
> +  return vfmlsl_lane_high_f16 (r, a, b, 0);
>  }
>  
>  float32x2_t
> -test_vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
> +test_vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
>  {
> -  return vfmlal_laneq_high_u32 (r, a, b, 6);
> +  return vfmlal_laneq_high_f16 (r, a, b, 6);
>  }
>  
>  float32x2_t
> -test_vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
> +test_vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
>  {
> -  return vfmlsl_laneq_high_u32 (r, a, b, 6);
> +  return vfmlsl_laneq_high_f16 (r, a, b, 6);
>  }
>  
>  float32x4_t
> -test_vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
> +test_vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
>  {
> -  return vfmlalq_lane_high_u32 (r, a, b, 1);
> +  return vfmlalq_lane_high_f16 (r, a, b, 1);
>  }
>  
>  float32x4_t
> -test_vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
> +test_vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
>  {
> -  return vfmlslq_lane_high_u32 (r, a, b, 1);
> +  return vfmlslq_lane_high_f16 (r, a, b, 1);
>  }
>  
>  float32x4_t
> -test_vfmlalq_laneq_high_u32  (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlalq_laneq_high_f16  (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlalq_laneq_high_u32 (r, a, b, 7);
> +  return vfmlalq_laneq_high_f16 (r, a, b, 7);
>  }
>  
>  float32x4_t
> -test_vfmlslq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlslq_laneq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlslq_laneq_high_u32 (r, a, b, 7);
> +  return vfmlslq_laneq_high_f16 (r, a, b, 7);
>  }
> diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h 
> b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
> index 
> b689741bdb006e89f14f29b803ba6d38a62b387e..bf49829c4bec941970eaf4e32cabf65719be9eaa
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
> +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
> @@ -1,49 +1,49 @@
>  #include "arm_neon.h"
>  
>  float32x2_t
> -test_vfmlal_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlal_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlal_lane_low_u32 (r, a, b, 0);
> +  return vfmlal_lane_low_f16 (r, a, b, 0);
>  }
>  
>  float32x2_t
> -test_vfmlsl_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlsl_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlsl_lane_low_u32 (r, a, b, 0);
> +  return vfmlsl_lane_low_f16 (r, a, b, 0);
>  }
>  
>  float32x2_t
> -test_vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
> +test_vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
>  {
> -  return vfmlal_laneq_low_u32 (r, a, b, 6);
> +  return vfmlal_laneq_low_f16 (r, a, b, 6);
>  }
>  
>  float32x2_t
> -test_vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
> +test_vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
>  {
> -  return vfmlsl_laneq_low_u32 (r, a, b, 6);
> +  return vfmlsl_laneq_low_f16 (r, a, b, 6);
>  }
>  
>  float32x4_t
> -test_vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
> +test_vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
>  {
> -  return vfmlalq_lane_low_u32 (r, a, b, 1);
> +  return vfmlalq_lane_low_f16 (r, a, b, 1);
>  }
>  
>  float32x4_t
> -test_vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
> +test_vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
>  {
> -  return vfmlslq_lane_low_u32 (r, a, b, 1);
> +  return vfmlslq_lane_low_f16 (r, a, b, 1);
>  }
>  
>  float32x4_t
> -test_vfmlalq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlalq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlalq_laneq_low_u32 (r, a, b, 7);
> +  return vfmlalq_laneq_low_f16 (r, a, b, 7);
>  }
>  
>  float32x4_t
> -test_vfmlslq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlslq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlslq_laneq_low_u32 (r, a, b, 7);
> +  return vfmlslq_laneq_low_f16 (r, a, b, 7);
>  }
> diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h 
> b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
> index 
> 778ca1c245c7343b38272e586a54927c7cd50bee..b039b548b5809f92a6ef0f91f6ab475b2b03866c
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
> +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
> @@ -1,25 +1,25 @@
>  #include "arm_neon.h"
>  
>  float32x2_t
> -test_vfmlal_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlal_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlal_low_u32 (r, a, b);
> +  return vfmlal_low_f16 (r, a, b);
>  }
>  
>  float32x4_t
> -test_vfmlalq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlalq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlalq_low_u32 (r, a, b);
> +  return vfmlalq_low_f16 (r, a, b);
>  }
>  
>  float32x2_t
> -test_vfmlsl_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlsl_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlsl_low_u32 (r, a, b);
> +  return vfmlsl_low_f16 (r, a, b);
>  }
>  
>  float32x4_t
> -test_vfmlslq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlslq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlslq_low_u32 (r, a, b);
> +  return vfmlslq_low_f16 (r, a, b);
>  }
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 
> c0df467e0175cd92c688cedebb97fd4ae87e985e..21ac2ee3b4c9591ac9efad6a1567e35fc8e3291b
>  100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -4522,7 +4522,7 @@ proc check_effective_target_arm_fp16fml_neon_ok_nocache 
> { } {
>               float32x2_t
>               foo (float32x2_t r, float16x4_t a, float16x4_t b)
>               {
> -               return vfmlal_high_u32 (r, a, b);
> +               return vfmlal_high_f16 (r, a, b);
>               }
>          } "$flags -march=armv8.2-a+fp16fml"] } {
>              set et_arm_fp16fml_neon_flags "$flags -march=armv8.2-a+fp16fml"
>

Re: [PATCH 1/2][GCC][AArch64] Update Armv8.4-a's FP16 FML intrinsics

Reply via email to