Author: aemerson Date: Thu Mar 21 15:31:37 2019 New Revision: 356722 URL: http://llvm.org/viewvc/llvm-project?rev=356722&view=rev Log: [AArch64] Split the neon.addp intrinsic into integer and fp variants.
This is the result of discussions on the list about how to deal with intrinsics which require codegen to disambiguate them via only the integer/fp overloads. It causes problems for GlobalISel as some of that information is lost during translation, while with other operations like IR instructions the information is encoded into the instruction opcode. This patch changes clang to emit the new faddp intrinsic if the vector operands to the builtin have FP element types. LLVM IR AutoUpgrade has been taught to upgrade existing calls to aarch64.neon.addp with fp vector arguments, and we remove the workarounds introduced for GlobalISel in r355865. This is a more permanent solution to PR40968. Differential Revision: https://reviews.llvm.org/D59655 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=356722&r1=356721&r2=356722&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Mar 21 15:31:37 2019 @@ -5095,6 +5095,13 @@ Value *CodeGenFunction::EmitCommonNeonBu switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_vpadd_v: + case NEON::BI__builtin_neon_vpaddq_v: + // We don't allow fp/int overloading of intrinsics. + if (VTy->getElementType()->isFloatingPointTy() && + Int == Intrinsic::aarch64_neon_addp) + Int = Intrinsic::aarch64_neon_faddp; + break; case NEON::BI__builtin_neon_vabs_v: case NEON::BI__builtin_neon_vabsq_v: if (VTy->getElementType()->isFloatingPointTy()) Modified: cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c?rev=356722&r1=356721&r2=356722&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c Thu Mar 21 15:31:37 2019 @@ -4411,7 +4411,7 @@ uint32x2_t test_vpadd_u32(uint32x2_t a, // CHECK-LABEL: @test_vpadd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VPADD_V2_I]] float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { @@ -4475,7 +4475,7 @@ uint32x4_t test_vpaddq_u32(uint32x4_t a, // CHECK-LABEL: @test_vpaddq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b) +// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VPADDQ_V2_I]] float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { @@ -4485,7 +4485,7 @@ float32x4_t test_vpaddq_f32(float32x4_t // CHECK-LABEL: @test_vpaddq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %a, <2 x double> %b) +// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x double> [[VPADDQ_V2_I]] float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) { Modified: cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c?rev=356722&r1=356721&r2=356722&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Thu Mar 21 15:31:37 2019 @@ -736,14 +736,14 @@ float16x8_t test_vmulxq_f16(float16x8_t } // CHECK-LABEL: test_vpadd_f16 -// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.addp.v4f16(<4 x half> %a, <4 x half> %b) +// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %a, <4 x half> %b) // CHECK: ret <4 x half> [[ADD]] float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) { return vpadd_f16(a, b); } // CHECK-LABEL: test_vpaddq_f16 -// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.addp.v8f16(<8 x half> %a, <8 x half> %b) +// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.faddp.v8f16(<8 x half> %a, <8 x half> %b) // CHECK: ret <8 x half> [[ADD]] float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) { return vpaddq_f16(a, b); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits