Re: [GCC][PATCH][AArch64] Add bfloat16 vldn/vstn intrinsics

Richard Sandiford Tue, 18 Feb 2020 09:07:02 -0800

Thanks.  When trying a bootstrap locally I get:

include/arm_neon.h:34709:38: error: cannot convert ‘const __bf16*’ to ‘const 
__fp16*’
34709 |   __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_bf 
*) __a);
      |                                      
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      |                                      |
      |                                      const __bf16*


I think the affected lines are:

Mihail Ionescu <mihail.ione...@foss.arm.com> writes:
> +__extension__ extern __inline bfloat16x4x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vld1_bf16_x2 (const bfloat16_t *__a)
> +{
> +  bfloat16x4x2_t ret;
> +  __builtin_aarch64_simd_oi __o;
> +  __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_bf *) 
> __a);

bf rather than hf here (the error above).

> +  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0);
> +  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
> +  return ret;
> +}
> [...]
> +__extension__ extern __inline bfloat16x4x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vld2_dup_bf16 (const bfloat16_t * __a)
> +{
> +  bfloat16x4x2_t ret;
> +  __builtin_aarch64_simd_oi __o;
> +  __o = __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) __a);
> +  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 0);

Here too (although the choice is cosmetic).

> +  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
> +  return ret;
> +}
> +
> +__extension__ extern __inline bfloat16x8x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vld2q_dup_bf16 (const bfloat16_t * __a)
> +{
> +  bfloat16x8x2_t ret;
> +  __builtin_aarch64_simd_oi __o;
> +  __o = __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) __a);
> +  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);

Same here (again cosmetic).

> +  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1);
> +  return ret;
> +}
> [...]
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __val)
> +{
> +  __builtin_aarch64_simd_ci __o;
> +  bfloat16x8x3_t __temp;
> +  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 
> (__AARCH64_UINT64_C (0)));
> +  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 
> (__AARCH64_UINT64_C (0)));
> +  __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 
> (__AARCH64_UINT64_C (0)));
> +  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 
> 0);
> +  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 
> 1);
> +  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 
> 2);
> +  __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_bf *) __a, __o);

Here too, to avoid:

include/arm_neon.h:35000:32: error: cannot convert ‘__bf16*’ to ‘__fp16*’
35000 |   __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_bf *) __a, __o);
      |                                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      |                                |
      |                                __bf16*

Looks good otherwise.  I guess this shows we should be running the
intrinsics tests for C++ as well as C (in general, not just for this patch).

Richard

Re: [GCC][PATCH][AArch64] Add bfloat16 vldn/vstn intrinsics

Reply via email to