efriedma created this revision. efriedma added reviewers: rsmith, SjoerdMeijer. Herald added a subscriber: kristof.beyls. Herald added a project: clang.
It's completely impossible to check that I've actually found all the issues, due to the use of macros in arm_neon.h, but hopefully this time it'll take more than a few hours for someone to find another issue. I have no idea why, but apparently there's a rule that some, but not all, builtins which should take an fp16 vector actually take an int8 vector as an argument. Fix this, and add test coverage. Repository: rC Clang https://reviews.llvm.org/D68838 Files: test/CodeGen/aarch64-v8.2a-neon-intrinsics.c utils/TableGen/NeonEmitter.cpp Index: utils/TableGen/NeonEmitter.cpp =================================================================== --- utils/TableGen/NeonEmitter.cpp +++ utils/TableGen/NeonEmitter.cpp @@ -1442,7 +1442,8 @@ } // Check if an explicit cast is needed. - if (CastToType.isVector() && LocalCK == ClassB) { + if (CastToType.isVector() && + (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) { CastToType.makeInteger(8, true); Arg = "(" + CastToType.str() + ")" + Arg; } else if (CastToType.isVector() && LocalCK == ClassI) { Index: test/CodeGen/aarch64-v8.2a-neon-intrinsics.c =================================================================== --- test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ -// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg \ // RUN: | FileCheck %s @@ -146,14 +146,14 @@ // CHECK-LABEL: test_vcvt_u16_f16 // CHECK: [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16> // CHECK: ret <4 x i16> [[VCVT]] -int16x4_t test_vcvt_u16_f16 (float16x4_t a) { +uint16x4_t test_vcvt_u16_f16 (float16x4_t a) { return vcvt_u16_f16(a); } // CHECK-LABEL: test_vcvtq_u16_f16 // CHECK: [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16> // CHECK: ret <8 x i16> [[VCVT]] -int16x8_t test_vcvtq_u16_f16 (float16x8_t a) { +uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) { return vcvtq_u16_f16(a); } @@ -167,7 +167,7 @@ // CHECK-LABEL: test_vcvta_u16_f16 // CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a) // CHECK: ret <4 x i16> [[VCVT]] -int16x4_t test_vcvta_u16_f16 (float16x4_t a) { +uint16x4_t test_vcvta_u16_f16 (float16x4_t a) { return vcvta_u16_f16(a); }
Index: utils/TableGen/NeonEmitter.cpp =================================================================== --- utils/TableGen/NeonEmitter.cpp +++ utils/TableGen/NeonEmitter.cpp @@ -1442,7 +1442,8 @@ } // Check if an explicit cast is needed. - if (CastToType.isVector() && LocalCK == ClassB) { + if (CastToType.isVector() && + (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) { CastToType.makeInteger(8, true); Arg = "(" + CastToType.str() + ")" + Arg; } else if (CastToType.isVector() && LocalCK == ClassI) { Index: test/CodeGen/aarch64-v8.2a-neon-intrinsics.c =================================================================== --- test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ -// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg \ // RUN: | FileCheck %s @@ -146,14 +146,14 @@ // CHECK-LABEL: test_vcvt_u16_f16 // CHECK: [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16> // CHECK: ret <4 x i16> [[VCVT]] -int16x4_t test_vcvt_u16_f16 (float16x4_t a) { +uint16x4_t test_vcvt_u16_f16 (float16x4_t a) { return vcvt_u16_f16(a); } // CHECK-LABEL: test_vcvtq_u16_f16 // CHECK: [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16> // CHECK: ret <8 x i16> [[VCVT]] -int16x8_t test_vcvtq_u16_f16 (float16x8_t a) { +uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) { return vcvtq_u16_f16(a); } @@ -167,7 +167,7 @@ // CHECK-LABEL: test_vcvta_u16_f16 // CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a) // CHECK: ret <4 x i16> [[VCVT]] -int16x4_t test_vcvta_u16_f16 (float16x4_t a) { +uint16x4_t test_vcvta_u16_f16 (float16x4_t a) { return vcvta_u16_f16(a); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits