On Wed, Feb 20, 2019 at 08:00:13AM -0600, Tamar Christina wrote: > Hi All, > > This patch updates the Armv8.4-a FP16 FML intrinsics's suffixes from u32 to > f16 > to be more consistent with the naming convention for intrinsics. > > The specifications for these intrinsics have not been published yet so we do > not need to maintain the old names. > > The patch was created with the following script: > > grep -lIE "(vfml[as].+)_u32" -r gcc/ | grep -iEv ".+Changelog.*" \ > | xargs sed -i -E -e "s/(vfml[as].+)_u32/\1_f16/g"
Big bonus points for including this! > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for trunk? and eventual backport to GCC 8? Woops. Yes, OK for trunk and backport it please. Thanks, James > gcc/ChangeLog: > > 2019-02-20 Tamar Christina <tamar.christ...@arm.com> > > * config/aarch64/arm_neon.h (vfmlal_low_u32, vfmlsl_low_u32, > vfmlalq_low_u32, vfmlslq_low_u32, vfmlal_high_u32, vfmlsl_high_u32, > vfmlalq_high_u32, vfmlslq_high_u32, vfmlal_lane_low_u32, > vfmlsl_lane_low_u32, vfmlal_laneq_low_u32, vfmlsl_laneq_low_u32, > vfmlalq_lane_low_u32, vfmlslq_lane_low_u32, vfmlalq_laneq_low_u32, > vfmlslq_laneq_low_u32, vfmlal_lane_high_u32, vfmlsl_lane_high_u32, > vfmlal_laneq_high_u32, vfmlsl_laneq_high_u32, vfmlalq_lane_high_u32, > vfmlslq_lane_high_u32, vfmlalq_laneq_high_u32, vfmlslq_laneq_high_u32): > Rename ... > (vfmlal_low_f16, vfmlsl_low_f16, vfmlalq_low_f16, vfmlslq_low_f16, > vfmlal_high_f16, vfmlsl_high_f16, vfmlalq_high_f16, vfmlslq_high_f16, > vfmlal_lane_low_f16, vfmlsl_lane_low_f16, vfmlal_laneq_low_f16, > vfmlsl_laneq_low_f16, vfmlalq_lane_low_f16, vfmlslq_lane_low_f16, > vfmlalq_laneq_low_f16, vfmlslq_laneq_low_f16, vfmlal_lane_high_f16, > vfmlsl_lane_high_f16, vfmlal_laneq_high_f16, vfmlsl_laneq_high_f16, > vfmlalq_lane_high_f16, vfmlslq_lane_high_f16, vfmlalq_laneq_high_f16, > vfmlslq_laneq_high_f16): ... To this. > > gcc/testsuite/ChangeLog: > > 2019-02-20 Tamar Christina <tamar.christ...@arm.com> > > * gcc.target/aarch64/fp16_fmul_high.h (test_vfmlal_high_u32, > test_vfmlalq_high_u32, test_vfmlsl_high_u32, test_vfmlslq_high_u32): > Rename ... > (test_vfmlal_high_f16, test_vfmlalq_high_f16, test_vfmlsl_high_f16, > test_vfmlslq_high_f16): ... To this. > * gcc.target/aarch64/fp16_fmul_lane_high.h (test_vfmlal_lane_high_u32, > tets_vfmlsl_lane_high_u32, test_vfmlal_laneq_high_u32, > test_vfmlsl_laneq_high_u32, test_vfmlalq_lane_high_u32, > test_vfmlslq_lane_high_u32, test_vfmlalq_laneq_high_u32, > test_vfmlslq_laneq_high_u32): Rename ... > (test_vfmlal_lane_high_f16, tets_vfmlsl_lane_high_f16, > test_vfmlal_laneq_high_f16, test_vfmlsl_laneq_high_f16, > test_vfmlalq_lane_high_f16, test_vfmlslq_lane_high_f16, > test_vfmlalq_laneq_high_f16, test_vfmlslq_laneq_high_f16): ... To this. > * gcc.target/aarch64/fp16_fmul_lane_low.h (test_vfmlal_lane_low_u32, > test_vfmlsl_lane_low_u32, test_vfmlal_laneq_low_u32, > test_vfmlsl_laneq_low_u32, test_vfmlalq_lane_low_u32, > test_vfmlslq_lane_low_u32, test_vfmlalq_laneq_low_u32, > test_vfmlslq_laneq_low_u32): Rename ... > (test_vfmlal_lane_low_f16, test_vfmlsl_lane_low_f16, > test_vfmlal_laneq_low_f16, test_vfmlsl_laneq_low_f16, > test_vfmlalq_lane_low_f16, test_vfmlslq_lane_low_f16, > test_vfmlalq_laneq_low_f16, test_vfmlslq_laneq_low_f16): ... To this. > * gcc.target/aarch64/fp16_fmul_low.h (test_vfmlal_low_u32, > test_vfmlalq_low_u32, test_vfmlsl_low_u32, test_vfmlslq_low_u32): > Rename ... > (test_vfmlal_low_f16, test_vfmlalq_low_f16, test_vfmlsl_low_f16, > test_vfmlslq_low_f16): ... To This. > * lib/target-supports.exp > (check_effective_target_arm_fp16fml_neon_ok_nocache): Update test. > > -- > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index > f405a325cf5f3f8970e5f4b78322335c280fa7a4..314ef30187d1ba1882eaf5c610770d380344e920 > 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -33777,63 +33777,63 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, > float32x4_t __a, float32x4_t __b, > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlal_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b) > +vfmlal_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b) > { > return __builtin_aarch64_fmlal_lowv2sf (__r, __a, __b); > } > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlsl_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b) > +vfmlsl_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b) > { > return __builtin_aarch64_fmlsl_lowv2sf (__r, __a, __b); > } > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlalq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b) > +vfmlalq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b) > { > return __builtin_aarch64_fmlalq_lowv4sf (__r, __a, __b); > } > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlslq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b) > +vfmlslq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b) > { > return __builtin_aarch64_fmlslq_lowv4sf (__r, __a, __b); > } > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlal_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b) > +vfmlal_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b) > { > return __builtin_aarch64_fmlal_highv2sf (__r, __a, __b); > } > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlsl_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b) > +vfmlsl_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b) > { > return __builtin_aarch64_fmlsl_highv2sf (__r, __a, __b); > } > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlalq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b) > +vfmlalq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b) > { > return __builtin_aarch64_fmlalq_highv4sf (__r, __a, __b); > } > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlslq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b) > +vfmlslq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b) > { > return __builtin_aarch64_fmlslq_highv4sf (__r, __a, __b); > } > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b, > +vfmlal_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b, > const int __lane) > { > return __builtin_aarch64_fmlal_lane_lowv2sf (__r, __a, __b, __lane); > @@ -33841,7 +33841,7 @@ vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t > __a, float16x4_t __b, > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b, > +vfmlsl_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b, > const int __lane) > { > return __builtin_aarch64_fmlsl_lane_lowv2sf (__r, __a, __b, __lane); > @@ -33849,7 +33849,7 @@ vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t > __a, float16x4_t __b, > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b, > +vfmlal_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b, > const int __lane) > { > return __builtin_aarch64_fmlal_laneq_lowv2sf (__r, __a, __b, __lane); > @@ -33857,7 +33857,7 @@ vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t > __a, float16x8_t __b, > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b, > +vfmlsl_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b, > const int __lane) > { > return __builtin_aarch64_fmlsl_laneq_lowv2sf (__r, __a, __b, __lane); > @@ -33865,7 +33865,7 @@ vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t > __a, float16x8_t __b, > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b, > +vfmlalq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b, > const int __lane) > { > return __builtin_aarch64_fmlalq_lane_lowv4sf (__r, __a, __b, __lane); > @@ -33873,7 +33873,7 @@ vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t > __a, float16x4_t __b, > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b, > +vfmlslq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b, > const int __lane) > { > return __builtin_aarch64_fmlslq_lane_lowv4sf (__r, __a, __b, __lane); > @@ -33881,7 +33881,7 @@ vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t > __a, float16x4_t __b, > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b, > +vfmlalq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, > const int __lane) > { > return __builtin_aarch64_fmlalq_laneq_lowv4sf (__r, __a, __b, __lane); > @@ -33889,7 +33889,7 @@ vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t > __a, float16x8_t __b, > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b, > +vfmlslq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, > const int __lane) > { > return __builtin_aarch64_fmlslq_laneq_lowv4sf (__r, __a, __b, __lane); > @@ -33897,7 +33897,7 @@ vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t > __a, float16x8_t __b, > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b, > +vfmlal_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b, > const int __lane) > { > return __builtin_aarch64_fmlal_lane_highv2sf (__r, __a, __b, __lane); > @@ -33905,7 +33905,7 @@ vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t > __a, float16x4_t __b, > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b, > +vfmlsl_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b, > const int __lane) > { > return __builtin_aarch64_fmlsl_lane_highv2sf (__r, __a, __b, __lane); > @@ -33913,7 +33913,7 @@ vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t > __a, float16x4_t __b, > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b, > +vfmlal_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b, > const int __lane) > { > return __builtin_aarch64_fmlal_laneq_highv2sf (__r, __a, __b, __lane); > @@ -33921,7 +33921,7 @@ vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t > __a, float16x8_t __b, > > __extension__ extern __inline float32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b, > +vfmlsl_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b, > const int __lane) > { > return __builtin_aarch64_fmlsl_laneq_highv2sf (__r, __a, __b, __lane); > @@ -33929,7 +33929,7 @@ vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t > __a, float16x8_t __b, > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b, > +vfmlalq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b, > const int __lane) > { > return __builtin_aarch64_fmlalq_lane_highv4sf (__r, __a, __b, __lane); > @@ -33937,7 +33937,7 @@ vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t > __a, float16x4_t __b, > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b, > +vfmlslq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b, > const int __lane) > { > return __builtin_aarch64_fmlslq_lane_highv4sf (__r, __a, __b, __lane); > @@ -33945,7 +33945,7 @@ vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t > __a, float16x4_t __b, > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b, > +vfmlalq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, > const int __lane) > { > return __builtin_aarch64_fmlalq_laneq_highv4sf (__r, __a, __b, __lane); > @@ -33953,7 +33953,7 @@ vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t > __a, float16x8_t __b, > > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b, > +vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, > const int __lane) > { > return __builtin_aarch64_fmlslq_laneq_highv4sf (__r, __a, __b, __lane); > diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h > b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h > index > 9c86bd19153cc0888f7b28f36d141b9fe08f535e..def85038a7208725ecb1db0888a1cc651aaa4934 > 100644 > --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h > +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h > @@ -1,25 +1,25 @@ > #include "arm_neon.h" > > float32x2_t > -test_vfmlal_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b) > +test_vfmlal_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b) > { > - return vfmlal_high_u32 (r, a, b); > + return vfmlal_high_f16 (r, a, b); > } > > float32x4_t > -test_vfmlalq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b) > +test_vfmlalq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b) > { > - return vfmlalq_high_u32 (r, a, b); > + return vfmlalq_high_f16 (r, a, b); > } > > float32x2_t > -test_vfmlsl_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b) > +test_vfmlsl_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b) > { > - return vfmlsl_high_u32 (r, a, b); > + return vfmlsl_high_f16 (r, a, b); > } > > float32x4_t > -test_vfmlslq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b) > +test_vfmlslq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b) > { > - return vfmlslq_high_u32 (r, a, b); > + return vfmlslq_high_f16 (r, a, b); > } > diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h > b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h > index > 1039347865e0bc79dfe351fd52f36964e7c41188..a0b95f8b81e4799a6075b0f0fca6834f73de0dc8 > 100644 > --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h > +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h > @@ -1,49 +1,49 @@ > #include "arm_neon.h" > > float32x2_t > -test_vfmlal_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b) > +test_vfmlal_lane_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b) > { > - return vfmlal_lane_high_u32 (r, a, b, 0); > + return vfmlal_lane_high_f16 (r, a, b, 0); > } > > float32x2_t > -tets_vfmlsl_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b) > +tets_vfmlsl_lane_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b) > { > - return vfmlsl_lane_high_u32 (r, a, b, 0); > + return vfmlsl_lane_high_f16 (r, a, b, 0); > } > > float32x2_t > -test_vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b) > +test_vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b) > { > - return vfmlal_laneq_high_u32 (r, a, b, 6); > + return vfmlal_laneq_high_f16 (r, a, b, 6); > } > > float32x2_t > -test_vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b) > +test_vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b) > { > - return vfmlsl_laneq_high_u32 (r, a, b, 6); > + return vfmlsl_laneq_high_f16 (r, a, b, 6); > } > > float32x4_t > -test_vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b) > +test_vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b) > { > - return vfmlalq_lane_high_u32 (r, a, b, 1); > + return vfmlalq_lane_high_f16 (r, a, b, 1); > } > > float32x4_t > -test_vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b) > +test_vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b) > { > - return vfmlslq_lane_high_u32 (r, a, b, 1); > + return vfmlslq_lane_high_f16 (r, a, b, 1); > } > > float32x4_t > -test_vfmlalq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b) > +test_vfmlalq_laneq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b) > { > - return vfmlalq_laneq_high_u32 (r, a, b, 7); > + return vfmlalq_laneq_high_f16 (r, a, b, 7); > } > > float32x4_t > -test_vfmlslq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b) > +test_vfmlslq_laneq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b) > { > - return vfmlslq_laneq_high_u32 (r, a, b, 7); > + return vfmlslq_laneq_high_f16 (r, a, b, 7); > } > diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h > b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h > index > b689741bdb006e89f14f29b803ba6d38a62b387e..bf49829c4bec941970eaf4e32cabf65719be9eaa > 100644 > --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h > +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h > @@ -1,49 +1,49 @@ > #include "arm_neon.h" > > float32x2_t > -test_vfmlal_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b) > +test_vfmlal_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b) > { > - return vfmlal_lane_low_u32 (r, a, b, 0); > + return vfmlal_lane_low_f16 (r, a, b, 0); > } > > float32x2_t > -test_vfmlsl_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b) > +test_vfmlsl_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b) > { > - return vfmlsl_lane_low_u32 (r, a, b, 0); > + return vfmlsl_lane_low_f16 (r, a, b, 0); > } > > float32x2_t > -test_vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b) > +test_vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b) > { > - return vfmlal_laneq_low_u32 (r, a, b, 6); > + return vfmlal_laneq_low_f16 (r, a, b, 6); > } > > float32x2_t > -test_vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b) > +test_vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b) > { > - return vfmlsl_laneq_low_u32 (r, a, b, 6); > + return vfmlsl_laneq_low_f16 (r, a, b, 6); > } > > float32x4_t > -test_vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b) > +test_vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b) > { > - return vfmlalq_lane_low_u32 (r, a, b, 1); > + return vfmlalq_lane_low_f16 (r, a, b, 1); > } > > float32x4_t > -test_vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b) > +test_vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b) > { > - return vfmlslq_lane_low_u32 (r, a, b, 1); > + return vfmlslq_lane_low_f16 (r, a, b, 1); > } > > float32x4_t > -test_vfmlalq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b) > +test_vfmlalq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b) > { > - return vfmlalq_laneq_low_u32 (r, a, b, 7); > + return vfmlalq_laneq_low_f16 (r, a, b, 7); > } > > float32x4_t > -test_vfmlslq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b) > +test_vfmlslq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b) > { > - return vfmlslq_laneq_low_u32 (r, a, b, 7); > + return vfmlslq_laneq_low_f16 (r, a, b, 7); > } > diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h > b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h > index > 778ca1c245c7343b38272e586a54927c7cd50bee..b039b548b5809f92a6ef0f91f6ab475b2b03866c > 100644 > --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h > +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h > @@ -1,25 +1,25 @@ > #include "arm_neon.h" > > float32x2_t > -test_vfmlal_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b) > +test_vfmlal_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b) > { > - return vfmlal_low_u32 (r, a, b); > + return vfmlal_low_f16 (r, a, b); > } > > float32x4_t > -test_vfmlalq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b) > +test_vfmlalq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b) > { > - return vfmlalq_low_u32 (r, a, b); > + return vfmlalq_low_f16 (r, a, b); > } > > float32x2_t > -test_vfmlsl_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b) > +test_vfmlsl_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b) > { > - return vfmlsl_low_u32 (r, a, b); > + return vfmlsl_low_f16 (r, a, b); > } > > float32x4_t > -test_vfmlslq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b) > +test_vfmlslq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b) > { > - return vfmlslq_low_u32 (r, a, b); > + return vfmlslq_low_f16 (r, a, b); > } > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index > c0df467e0175cd92c688cedebb97fd4ae87e985e..21ac2ee3b4c9591ac9efad6a1567e35fc8e3291b > 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -4522,7 +4522,7 @@ proc check_effective_target_arm_fp16fml_neon_ok_nocache > { } { > float32x2_t > foo (float32x2_t r, float16x4_t a, float16x4_t b) > { > - return vfmlal_high_u32 (r, a, b); > + return vfmlal_high_f16 (r, a, b); > } > } "$flags -march=armv8.2-a+fp16fml"] } { > set et_arm_fp16fml_neon_flags "$flags -march=armv8.2-a+fp16fml" >