Re: [PATCH 13/17][ARM] Add VFP FP16 instrinsics.

Matthew Wahab Mon, 04 Jul 2016 07:15:04 -0700

On 17/05/16 15:44, Matthew Wahab wrote:
> The ARMv8.2-A architecture introduces an optional FP16 extension adding
> half-precision floating point data processing instructions to the
> existing scalar (floating point) support. A future version of the ACLE
> will add support for these instructions and this patch implements that
> support.


Updated to use the standard arithmetic operations for vnegh_f16,
vaddh_f16, vsubh_f16, vmulh_f16 and vdivh_f16.

Tested the series for arm-none-linux-gnueabihf with native bootstrap and
make check and for arm-none-eabi and armeb-none-eabi with make check on
an ARMv8.2-A emulator.

Ok for trunk?
Matthew

2016-07-04  Matthew Wahab  <matthew.wa...@arm.com>

        * config.gcc (extra_headers): Add arm_fp16.h
        * config/arm/arm_fp16.h: New.
        * config/arm/arm_neon.h: Include "arm_fp16.h".

>From a9042ae0e0ea4a61436663a1afea81ccf699e9f9 Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wa...@arm.com>
Date: Thu, 7 Apr 2016 15:36:23 +0100
Subject: [PATCH 13/17] [PATCH 13/17][ARM] Add VFP FP16 instrinsics.

2016-07-04  Matthew Wahab  <matthew.wa...@arm.com>

	* config.gcc (extra_headers): Add arm_fp16.h
	* config/arm/arm_fp16.h: New.
	* config/arm/arm_neon.h: Include "arm_fp16.h".
---
 gcc/config.gcc            |   2 +-
 gcc/config/arm/arm_fp16.h | 255 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/config/arm/arm_neon.h |   1 +
 3 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 gcc/config/arm/arm_fp16.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 1f75f17..4333bc9 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -320,7 +320,7 @@ arc*-*-*)
 arm*-*-*)
 	cpu_type=arm
 	extra_objs="arm-builtins.o aarch-common.o"
-	extra_headers="mmintrin.h arm_neon.h arm_acle.h"
+	extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h"
 	target_type_format_char='%'
 	c_target_objs="arm-c.o"
 	cxx_target_objs="arm-c.o"
diff --git a/gcc/config/arm/arm_fp16.h b/gcc/config/arm/arm_fp16.h
new file mode 100644
index 0000000..c72d8c4
--- /dev/null
+++ b/gcc/config/arm/arm_fp16.h
@@ -0,0 +1,255 @@
+/* ARM FP16 intrinsics include file.
+
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _GCC_ARM_FP16_H
+#define _GCC_ARM_FP16_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/* Intrinsics for FP16 instructions.  */
+#pragma GCC push_options
+#pragma GCC target ("fpu=fp-armv8")
+
+#if defined (__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
+
+typedef __fp16 float16_t;
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vabsh_f16 (float16_t __a)
+{
+  return __builtin_neon_vabshf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vaddh_f16 (float16_t __a, float16_t __b)
+{
+  return __a + __b;
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtah_s32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvtahssi (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtah_u32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvtahusi (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s32 (int32_t __a)
+{
+  return __builtin_neon_vcvthshf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u32 (uint32_t __a)
+{
+  return __builtin_neon_vcvthuhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_n_f16_s32 (int32_t __a, const int __b)
+{
+  return __builtin_neon_vcvths_nhf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_n_f16_u32 (uint32_t __a, const int __b)
+{
+  return __builtin_neon_vcvthu_nhf ((int32_t)__a, __b);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvth_n_s32_f16 (float16_t __a, const int __b)
+{
+  return __builtin_neon_vcvths_nsi (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvth_n_u32_f16 (float16_t __a, const int __b)
+{
+  return (uint32_t)__builtin_neon_vcvthu_nsi (__a, __b);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvth_s32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvthssi (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvth_u32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvthusi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtmh_s32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvtmhssi (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtmh_u32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvtmhusi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtnh_s32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvtnhssi (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtnh_u32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvtnhusi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtph_s32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvtphssi (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtph_u32_f16 (float16_t __a)
+{
+  return __builtin_neon_vcvtphusi (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vdivh_f16 (float16_t __a, float16_t __b)
+{
+  return __a / __b;
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmah_f16 (float16_t __a, float16_t __b, float16_t __c)
+{
+  return __builtin_neon_vfmahf (__a, __b, __c);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c)
+{
+  return __builtin_neon_vfmshf (__a, __b, __c);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmaxnmh_f16 (float16_t __a, float16_t __b)
+{
+  return __builtin_neon_vmaxnmhf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vminnmh_f16 (float16_t __a, float16_t __b)
+{
+  return __builtin_neon_vminnmhf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulh_f16 (float16_t __a, float16_t __b)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vnegh_f16 (float16_t __a)
+{
+  return  - __a;
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndah_f16 (float16_t __a)
+{
+  return __builtin_neon_vrndahf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndh_f16 (float16_t __a)
+{
+  return __builtin_neon_vrndhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndih_f16 (float16_t __a)
+{
+  return __builtin_neon_vrndihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndmh_f16 (float16_t __a)
+{
+  return __builtin_neon_vrndmhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndnh_f16 (float16_t __a)
+{
+  return __builtin_neon_vrndnhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndph_f16 (float16_t __a)
+{
+  return __builtin_neon_vrndphf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndxh_f16 (float16_t __a)
+{
+  return __builtin_neon_vrndxhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vsqrth_f16 (float16_t __a)
+{
+  return __builtin_neon_vsqrthf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vsubh_f16 (float16_t __a, float16_t __b)
+{
+  return __a - __b;
+}
+
+#endif /* __ARM_FEATURE_FP16_SCALAR_ARITHMETIC  */
+#pragma GCC pop_options
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 3bd9517..8ed5aa8 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -38,6 +38,7 @@
 extern "C" {
 #endif
 
+#include <arm_fp16.h>
 #include <stdint.h>
 
 typedef __simd64_int8_t int8x8_t;
-- 
2.1.4

Re: [PATCH 13/17][ARM] Add VFP FP16 instrinsics.

Reply via email to