On 07/07/16 17:17, Jiong Wang wrote:
This patch add ARMv8.2-A FP16 two operands scalar intrinsics.
The updated patch resolve the conflict with
https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00309.html
The change is to let aarch64_emit_approx_div return false for HFmode.
gcc/
2016-07-20 Jiong Wang
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64.md (hf3):
New.
(hf3): Likewise.
(add3): Likewise.
(sub3): Likewise.
(mul3): Likewise.
(div3): Likewise.
(*div3): Likewise.
(3): Extend to HF.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return
false for HFmode.
* config/aarch64/aarch64-simd.md (aarch64_rsqrts): Likewise.
(fabd3): Likewise.
(3): Likewise.
(3): Likewise.
(aarch64_fmulx): Likewise.
(aarch64_fac): Likewise.
(aarch64_frecps): Likewise.
(hfhi3): New.
(hihf3): Likewise.
* config/aarch64/iterators.md (VHSDF_SDF): Delete.
(VSDQ_HSDI): Support HI.
(fcvt_target, FCVT_TARGET): Likewise.
* config/aarch64/arm_fp16.h: (vaddh_f16): New.
(vsubh_f16): Likewise.
(vabdh_f16): Likewise.
(vcageh_f16): Likewise.
(vcagth_f16): Likewise.
(vcaleh_f16): Likewise.
(vcalth_f16): Likewise.(vcleh_f16): Likewise.
(vclth_f16): Likewise.
(vcvth_n_f16_s16): Likewise.
(vcvth_n_f16_s32): Likewise.
(vcvth_n_f16_s64): Likewise.
(vcvth_n_f16_u16): Likewise.
(vcvth_n_f16_u32): Likewise.
(vcvth_n_f16_u64): Likewise.
(vcvth_n_s16_f16): Likewise.
(vcvth_n_s32_f16): Likewise.
(vcvth_n_s64_f16): Likewise.
(vcvth_n_u16_f16): Likewise.
(vcvth_n_u32_f16): Likewise.
(vcvth_n_u64_f16): Likewise.
(vdivh_f16): Likewise.
(vmaxh_f16): Likewise.
(vmaxnmh_f16): Likewise.
(vminh_f16): Likewise.
(vminnmh_f16): Likewise.
(vmulh_f16): Likewise.
(vmulxh_f16): Likewise.
(vrecpsh_f16): Likewise.
(vrsqrtsh_f16): Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 6f50d8405d3ee8c4823037bb2022a4f2f08b72fe..31abc077859254e3696adacb3f8f2b9b2da0647f 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -41,7 +41,7 @@
BUILTIN_VDC (COMBINE, combine, 0)
BUILTIN_VB (BINOP, pmul, 0)
- BUILTIN_VHSDF_SDF (BINOP, fmulx, 0)
+ BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0)
BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di)
@@ -393,13 +393,12 @@
/* Implemented by
aarch64_frecp. */
BUILTIN_GPF_F16 (UNOP, frecpe, 0)
- BUILTIN_GPF (BINOP, frecps, 0)
BUILTIN_GPF_F16 (UNOP, frecpx, 0)
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
BUILTIN_VHSDF (UNOP, frecpe, 0)
- BUILTIN_VHSDF (BINOP, frecps, 0)
+ BUILTIN_VHSDF_HSDF (BINOP, frecps, 0)
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
only ever used for the int64x1_t intrinsic, there is no scalar version. */
@@ -496,17 +495,23 @@
/* Implemented by <*><*>3. */
BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3)
BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3)
- BUILTIN_VHSDF_SDF (SHIFTIMM, fcvtzs, 3)
- BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3)
+ BUILTIN_VHSDF_HSDF (SHIFTIMM, fcvtzs, 3)
+ BUILTIN_VHSDF_HSDF (SHIFTIMM_USS, fcvtzu, 3)
+ VAR1 (SHIFTIMM, scvtfsi, 3, hf)
+ VAR1 (SHIFTIMM, scvtfdi, 3, hf)
+ VAR1 (FCVTIMM_SUS, ucvtfsi, 3, hf)
+ VAR1 (FCVTIMM_SUS, ucvtfdi, 3, hf)
+ BUILTIN_GPI (SHIFTIMM, fcvtzshf, 3)
+ BUILTIN_GPI (SHIFTIMM_USS, fcvtzuhf, 3)
/* Implemented by aarch64_rsqrte. */
BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0)
/* Implemented by aarch64_rsqrts. */
- BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0)
+ BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0)
/* Implemented by fabd3. */
- BUILTIN_VHSDF_SDF (BINOP, fabd, 3)
+ BUILTIN_VHSDF_HSDF (BINOP, fabd, 3)
/* Implemented by aarch64_faddp. */
BUILTIN_VHSDF (BINOP, faddp, 0)
@@ -522,10 +527,10 @@
BUILTIN_VHSDF_HSDF (UNOP, neg, 2)
/* Implemented by aarch64_fac. */
- BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0)
/* Implemented by sqrt2. */
VAR1 (UNOP, sqrt, 2, hf)
@@ -543,3 +548,7 @@
BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
+
+ /* Implemented by 3. */
+ VAR1 (BINOP, fmax, 3, hf)
+ VAR1 (BINOP, fmin, 3, hf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/conf