Hi,
This is a patch to add NEON intrinsics for the NEON instructions I added for
ARMv8.
I also re-ran the NEON test case generator.
Ok to commit?
http://llvm-reviews.chandlerc.com/D1574
Files:
include/clang/Basic/arm_neon.td
lib/CodeGen/CGBuiltin.cpp
test/CodeGen/arm_neon_intrinsics.c
Index: include/clang/Basic/arm_neon.td
===================================================================
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -224,6 +224,8 @@
// E.3.6 Max/Min
def VMAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">;
def VMIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">;
+def VMAXNM : SInst<"vmaxnm", "ddd", "fQf">;
+def VMINNM : SInst<"vminnm", "ddd", "fQf">;
////////////////////////////////////////////////////////////////////////////////
// E.3.7 Pairwise Addition
@@ -357,6 +359,16 @@
def VCVT_N_U32 : SInst<"vcvt_n_u32", "udi", "fQf">;
def VCVT_N_F32 : SInst<"vcvt_n_f32", "fdi", "iUiQiQUi">;
}
+
+def VCVTA_S32 : SInst<"vcvta_s32", "xd", "fQf">;
+def VCVTA_U32 : SInst<"vcvta_u32", "ud", "fQf">;
+def VCVTN_S32 : SInst<"vcvtn_s32", "xd", "fQf">;
+def VCVTN_U32 : SInst<"vcvtn_u32", "ud", "fQf">;
+def VCVTP_S32 : SInst<"vcvtp_s32", "xd", "fQf">;
+def VCVTP_U32 : SInst<"vcvtp_u32", "ud", "fQf">;
+def VCVTM_S32 : SInst<"vcvtm_s32", "xd", "fQf">;
+def VCVTM_U32 : SInst<"vcvtm_u32", "ud", "fQf">;
+
def VMOVN : IInst<"vmovn", "hk", "silUsUiUl">;
def VMOVL : SInst<"vmovl", "wd", "csiUcUsUi">;
def VQMOVN : SInst<"vqmovn", "hk", "silUsUiUl">;
@@ -458,6 +470,27 @@
def VFMA : SInst<"vfma", "dddd", "fQf">;
+///////////////////////////////////////////////////////////////////////////////
+// Vector round intrinsics
+def VRNDN : SInst<"vrndn", "dd", "fQf"> {
+ let InstName = "vrintn";
+}
+def VRNDX : SInst<"vrndx", "dd", "fQf"> {
+ let InstName = "vrintx";
+}
+def VRNDA : SInst<"vrnda", "dd", "fQf"> {
+ let InstName = "vrinta";
+}
+def VRNDZ : SInst<"vrnd", "dd", "fQf"> {
+ let InstName = "vrintz";
+}
+def VRNDM : SInst<"vrndm", "dd", "fQf"> {
+ let InstName = "vrintm";
+}
+def VRNDP : SInst<"vrndp", "dd", "fQf"> {
+ let InstName = "vrintp";
+}
+
////////////////////////////////////////////////////////////////////////////////
// AArch64 Intrinsics
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -2313,6 +2313,78 @@
Function *F = CGM.getIntrinsic(Int, Tys);
return EmitNeonCall(F, Ops, "vcvt_n");
}
+ case ARM::BI__builtin_neon_vcvta_s32_v:
+ case ARM::BI__builtin_neon_vcvtaq_s32_v: {
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, FloatTy };
+ Int = Intrinsic::arm_neon_vcvtas;
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvta");
+ }
+ case ARM::BI__builtin_neon_vcvta_u32_v:
+ case ARM::BI__builtin_neon_vcvtaq_u32_v: {
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, FloatTy };
+ Int = Intrinsic::arm_neon_vcvtau;
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvta");
+ }
+ case ARM::BI__builtin_neon_vcvtn_s32_v:
+ case ARM::BI__builtin_neon_vcvtnq_s32_v: {
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, FloatTy };
+ Int = Intrinsic::arm_neon_vcvtns;
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvtn");
+ }
+ case ARM::BI__builtin_neon_vcvtn_u32_v:
+ case ARM::BI__builtin_neon_vcvtnq_u32_v: {
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, FloatTy };
+ Int = Intrinsic::arm_neon_vcvtnu;
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvtn");
+ }
+ case ARM::BI__builtin_neon_vcvtp_s32_v:
+ case ARM::BI__builtin_neon_vcvtpq_s32_v: {
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, FloatTy };
+ Int = Intrinsic::arm_neon_vcvtps;
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvtp");
+ }
+ case ARM::BI__builtin_neon_vcvtp_u32_v:
+ case ARM::BI__builtin_neon_vcvtpq_u32_v: {
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, FloatTy };
+ Int = Intrinsic::arm_neon_vcvtpu;
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvtp");
+ }
+ case ARM::BI__builtin_neon_vcvtm_s32_v:
+ case ARM::BI__builtin_neon_vcvtmq_s32_v: {
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, FloatTy };
+ Int = Intrinsic::arm_neon_vcvtms;
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvtm");
+ }
+ case ARM::BI__builtin_neon_vcvtm_u32_v:
+ case ARM::BI__builtin_neon_vcvtmq_u32_v: {
+ llvm::Type *FloatTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
+ llvm::Type *Tys[2] = { Ty, FloatTy };
+ Int = Intrinsic::arm_neon_vcvtmu;
+ Function *F = CGM.getIntrinsic(Int, Tys);
+ return EmitNeonCall(F, Ops, "vcvtm");
+ }
case ARM::BI__builtin_neon_vext_v:
case ARM::BI__builtin_neon_vextq_v: {
int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
@@ -2501,10 +2573,18 @@
case ARM::BI__builtin_neon_vmaxq_v:
Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
+ case ARM::BI__builtin_neon_vmaxnm_v:
+ case ARM::BI__builtin_neon_vmaxnmq_v:
+ Int = Intrinsic::arm_neon_vmaxnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
case ARM::BI__builtin_neon_vmin_v:
case ARM::BI__builtin_neon_vminq_v:
Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
+ case ARM::BI__builtin_neon_vminnm_v:
+ case ARM::BI__builtin_neon_vminnmq_v:
+ Int = Intrinsic::arm_neon_vminnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
case ARM::BI__builtin_neon_vmovl_v: {
llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
@@ -2912,6 +2992,36 @@
}
return SV;
}
+ case ARM::BI__builtin_neon_vrndn_v:
+ case ARM::BI__builtin_neon_vrndnq_v: {
+ Int = Intrinsic::arm_neon_vrintn;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
+ }
+ case ARM::BI__builtin_neon_vrndx_v:
+ case ARM::BI__builtin_neon_vrndxq_v: {
+ Int = Intrinsic::arm_neon_vrintx;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
+ }
+ case ARM::BI__builtin_neon_vrnda_v:
+ case ARM::BI__builtin_neon_vrndaq_v: {
+ Int = Intrinsic::arm_neon_vrinta;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
+ }
+ case ARM::BI__builtin_neon_vrnd_v:
+ case ARM::BI__builtin_neon_vrndq_v: {
+ Int = Intrinsic::arm_neon_vrintz;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd");
+ }
+ case ARM::BI__builtin_neon_vrndm_v:
+ case ARM::BI__builtin_neon_vrndmq_v: {
+ Int = Intrinsic::arm_neon_vrintm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
+ }
+ case ARM::BI__builtin_neon_vrndp_v:
+ case ARM::BI__builtin_neon_vrndpq_v: {
+ Int = Intrinsic::arm_neon_vrintp;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
+ }
}
}
Index: test/CodeGen/arm_neon_intrinsics.c
===================================================================
--- test/CodeGen/arm_neon_intrinsics.c
+++ test/CodeGen/arm_neon_intrinsics.c
@@ -1592,6 +1592,110 @@
}
+// CHECK: test_vcvta_s32_f32
+// CHECK: vcvta.f32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vcvta_s32_f32(float32x2_t a) {
+ return vcvta_s32_f32(a);
+}
+
+// CHECK: test_vcvtaq_s32_f32
+// CHECK: vcvta.f32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
+ return vcvtaq_s32_f32(a);
+}
+
+
+// CHECK: test_vcvta_u32_f32
+// CHECK: vcvta.f32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
+ return vcvta_u32_f32(a);
+}
+
+// CHECK: test_vcvtaq_u32_f32
+// CHECK: vcvta.f32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
+ return vcvtaq_u32_f32(a);
+}
+
+
+// CHECK: test_vcvtm_s32_f32
+// CHECK: vcvtm.f32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
+ return vcvtm_s32_f32(a);
+}
+
+// CHECK: test_vcvtmq_s32_f32
+// CHECK: vcvtm.f32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
+ return vcvtmq_s32_f32(a);
+}
+
+
+// CHECK: test_vcvtm_u32_f32
+// CHECK: vcvtm.f32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
+ return vcvtm_u32_f32(a);
+}
+
+// CHECK: test_vcvtmq_u32_f32
+// CHECK: vcvtm.f32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
+ return vcvtmq_u32_f32(a);
+}
+
+
+// CHECK: test_vcvtn_s32_f32
+// CHECK: vcvtn.f32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
+ return vcvtn_s32_f32(a);
+}
+
+// CHECK: test_vcvtnq_s32_f32
+// CHECK: vcvtn.f32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
+ return vcvtnq_s32_f32(a);
+}
+
+
+// CHECK: test_vcvtn_u32_f32
+// CHECK: vcvtn.f32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
+ return vcvtn_u32_f32(a);
+}
+
+// CHECK: test_vcvtnq_u32_f32
+// CHECK: vcvtn.f32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
+ return vcvtnq_u32_f32(a);
+}
+
+
+// CHECK: test_vcvtp_s32_f32
+// CHECK: vcvtp.f32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
+ return vcvtp_s32_f32(a);
+}
+
+// CHECK: test_vcvtpq_s32_f32
+// CHECK: vcvtp.f32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
+ return vcvtpq_s32_f32(a);
+}
+
+
+// CHECK: test_vcvtp_u32_f32
+// CHECK: vcvtp.f32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
+ return vcvtp_u32_f32(a);
+}
+
+// CHECK: test_vcvtpq_u32_f32
+// CHECK: vcvtp.f32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
+ return vcvtpq_u32_f32(a);
+}
+
+
// CHECK: test_vcvt_f16_f32
// CHECK: vcvt.f16.f32 d{{[0-9]+}}, q{{[0-9]+}}
float16x4_t test_vcvt_f16_f32(float32x4_t a) {
@@ -4061,6 +4165,19 @@
}
+// CHECK: test_vmaxnm_f32
+// CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
+ return vmaxnm_f32(a, b);
+}
+
+// CHECK: test_vmaxnmq_f32
+// CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
+ return vmaxnmq_f32(a, b);
+}
+
+
// CHECK: test_vmin_s8
// CHECK: vmin.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
@@ -4146,6 +4263,19 @@
}
+// CHECK: test_vminnm_f32
+// CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
+ return vminnm_f32(a, b);
+}
+
+// CHECK: test_vminnmq_f32
+// CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
+ return vminnmq_f32(a, b);
+}
+
+
// CHECK: test_vmla_s8
// CHECK: vmla.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
@@ -8600,6 +8730,84 @@
}
+// CHECK: test_vrnda_f32
+// CHECK: vrinta.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrnda_f32(float32x2_t a) {
+ return vrnda_f32(a);
+}
+
+// CHECK: test_vrndaq_f32
+// CHECK: vrinta.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrndaq_f32(float32x4_t a) {
+ return vrndaq_f32(a);
+}
+
+
+// CHECK: test_vrndm_f32
+// CHECK: vrintm.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrndm_f32(float32x2_t a) {
+ return vrndm_f32(a);
+}
+
+// CHECK: test_vrndmq_f32
+// CHECK: vrintm.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrndmq_f32(float32x4_t a) {
+ return vrndmq_f32(a);
+}
+
+
+// CHECK: test_vrndn_f32
+// CHECK: vrintn.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrndn_f32(float32x2_t a) {
+ return vrndn_f32(a);
+}
+
+// CHECK: test_vrndnq_f32
+// CHECK: vrintn.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrndnq_f32(float32x4_t a) {
+ return vrndnq_f32(a);
+}
+
+
+// CHECK: test_vrndp_f32
+// CHECK: vrintp.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrndp_f32(float32x2_t a) {
+ return vrndp_f32(a);
+}
+
+// CHECK: test_vrndpq_f32
+// CHECK: vrintp.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrndpq_f32(float32x4_t a) {
+ return vrndpq_f32(a);
+}
+
+
+// CHECK: test_vrndx_f32
+// CHECK: vrintx.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrndx_f32(float32x2_t a) {
+ return vrndx_f32(a);
+}
+
+// CHECK: test_vrndxq_f32
+// CHECK: vrintx.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrndxq_f32(float32x4_t a) {
+ return vrndxq_f32(a);
+}
+
+
+// CHECK: test_vrnd_f32
+// CHECK: vrintz.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrnd_f32(float32x2_t a) {
+ return vrnd_f32(a);
+}
+
+// CHECK: test_vrndq_f32
+// CHECK: vrintz.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrndq_f32(float32x4_t a) {
+ return vrndq_f32(a);
+}
+
+
// CHECK: test_vrshl_s8
// CHECK: vrshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits