https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/67299
>From 2d628587b9cede36e7a93ecb1414cc0c16596934 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Mon, 25 Sep 2023 17:06:26 +0800 Subject: [PATCH 1/2] [PowerPC] Fix use of FPSCR builtins in smmintrin.h smmintrin.h uses __builtin_mffs, __builtin_mffsl, __builtin_mtfsf and __builtin_set_fpscr_rn. This patch replaces the uses with ppc prefix and implement the missing ones. This fixes issue #64664. --- clang/include/clang/Basic/BuiltinsPPC.def | 2 + clang/lib/Basic/Targets/PPC.cpp | 4 ++ clang/lib/CodeGen/CGBuiltin.cpp | 5 ++ clang/lib/Headers/ppc_wrappers/smmintrin.h | 50 +++++++++++++------ clang/test/CodeGen/PowerPC/builtins-ppc.c | 13 ++++- clang/test/CodeGen/PowerPC/ppc-emmintrin.c | 5 ++ clang/test/CodeGen/PowerPC/ppc-mmintrin.c | 5 ++ clang/test/CodeGen/PowerPC/ppc-pmmintrin.c | 3 ++ clang/test/CodeGen/PowerPC/ppc-smmintrin.c | 37 ++++++++------ clang/test/CodeGen/PowerPC/ppc-tmmintrin.c | 3 ++ clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c | 3 ++ 11 files changed, 99 insertions(+), 31 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 18a1186053481ed..a35488ed3dfa565 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -151,9 +151,11 @@ TARGET_BUILTIN(__builtin_ppc_extract_exp, "Uid", "", "power9-vector") TARGET_BUILTIN(__builtin_ppc_extract_sig, "ULLid", "", "power9-vector") BUILTIN(__builtin_ppc_mtfsb0, "vUIi", "") BUILTIN(__builtin_ppc_mtfsb1, "vUIi", "") +BUILTIN(__builtin_ppc_mffs, "d", "") TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "isa-v30-instructions") BUILTIN(__builtin_ppc_mtfsf, "vUIiUi", "") BUILTIN(__builtin_ppc_mtfsfi, "vUIiUIi", "") +BUILTIN(__builtin_ppc_set_fpscr_rn, "di", "") TARGET_BUILTIN(__builtin_ppc_insert_exp, "ddULLi", "", "power9-vector") BUILTIN(__builtin_ppc_fmsub, "dddd", "") BUILTIN(__builtin_ppc_fmsubs, "ffff", "") diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 4e895cc7310c00e..b8bc920c45f40a2 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -264,6 +264,10 @@ static void defineXLCompatMacros(MacroBuilder &Builder) { Builder.defineMacro("__builtin_minfe", "__builtin_ppc_minfe"); Builder.defineMacro("__builtin_minfl", "__builtin_ppc_minfl"); Builder.defineMacro("__builtin_minfs", "__builtin_ppc_minfs"); + Builder.defineMacro("__builtin_mffs", "__builtin_ppc_mffs"); + Builder.defineMacro("__builtin_mffsl", "__builtin_ppc_mffsl"); + Builder.defineMacro("__builtin_mtfsf", "__builtin_ppc_mtfsf"); + Builder.defineMacro("__builtin_set_fpscr_rn", "__builtin_ppc_set_fpscr_rn"); } /// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index bf984861bccb5cc..b80c5d9e7c01dc0 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17258,6 +17258,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Op1 = EmitScalarExpr(E->getArg(1)); return Builder.CreateFDiv(Op0, Op1, "swdiv"); } + case PPC::BI__builtin_ppc_set_fpscr_rn: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), + {EmitScalarExpr(E->getArg(0))}); + case PPC::BI__builtin_ppc_mffs: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); } } diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h index 349b395c4f00b92..19cdecb18d2b83f 100644 --- a/clang/lib/Headers/ppc_wrappers/smmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h @@ -14,7 +14,7 @@ #ifndef NO_WARN_X86_INTRINSICS /* This header is distributed to simplify porting x86_64 code that - makes explicit use of Intel intrinsics to powerp64/powerpc64le. + makes explicit use of Intel intrinsics to powerpc64/powerpc64le. It is the user's responsibility to determine if the results are acceptable and make additional changes as necessary. @@ -68,10 +68,10 @@ extern __inline __m128d __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else - __fpscr_save.__fr = __builtin_mffs(); + __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule @@ -83,10 +83,15 @@ extern __inline __m128d switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: - __builtin_set_fpscr_rn(0b00); + __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. @@ -102,7 +107,7 @@ extern __inline __m128d This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); - __builtin_set_fpscr_rn(__fpscr_save.__fpscr); + __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: @@ -128,9 +133,14 @@ extern __inline __m128d */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128d)__r; } @@ -159,10 +169,10 @@ extern __inline __m128 __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else - __fpscr_save.__fr = __builtin_mffs(); + __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule @@ -174,10 +184,15 @@ extern __inline __m128 switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: - __builtin_set_fpscr_rn(0b00); + __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. @@ -193,7 +208,7 @@ extern __inline __m128 This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); - __builtin_set_fpscr_rn(__fpscr_save.__fpscr); + __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: @@ -219,9 +234,14 @@ extern __inline __m128 */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128)__r; } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc.c b/clang/test/CodeGen/PowerPC/builtins-ppc.c index ccc91b6560845e2..c13edf44cdcbd2a 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc.c @@ -1,5 +1,8 @@ // REQUIRES: powerpc-registered-target -// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ +// RUN: | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ +// RUN: -target-cpu pwr9 | FileCheck %s --check-prefixes=P9,CHECK void test_eh_return_data_regno() { @@ -26,6 +29,9 @@ void test_builtin_ppc_setrnd() { // CHECK: call double @llvm.ppc.setrnd(i32 %2) res = __builtin_setrnd(x); + + // CHECK: call double @llvm.ppc.setrnd(i32 %4) + res = __builtin_ppc_set_fpscr_rn(x); } void test_builtin_ppc_flm() { @@ -33,7 +39,10 @@ void test_builtin_ppc_flm() { // CHECK: call double @llvm.ppc.readflm() res = __builtin_readflm(); - // CHECK: call double @llvm.ppc.setflm(double %1) + // CHECK: call double @llvm.ppc.readflm() + res = __builtin_ppc_mffs(); + + // CHECK: call double @llvm.ppc.setflm(double %2) res = __builtin_setflm(res); #ifdef _ARCH_PWR9 diff --git a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c index e2d26e611ac81c4..15d291496c20a4a 100644 --- a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c @@ -8,6 +8,11 @@ // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P10 +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ diff --git a/clang/test/CodeGen/PowerPC/ppc-mmintrin.c b/clang/test/CodeGen/PowerPC/ppc-mmintrin.c index 4cb5b8540092f9b..1dc6292ae3244c3 100644 --- a/clang/test/CodeGen/PowerPC/ppc-mmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-mmintrin.c @@ -9,6 +9,11 @@ // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n| FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \ diff --git a/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c index 39194427978ad42..6e152c549498d23 100644 --- a/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c @@ -13,6 +13,9 @@ // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + #include <pmmintrin.h> __m128d resd, md1, md2; diff --git a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c index 220b65c1ce16495..7daef71a61c329e 100644 --- a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c @@ -15,6 +15,11 @@ // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefix=P10 +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ @@ -239,44 +244,48 @@ test_round() { // CHECK-LABEL: @test_round // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) -// CHECK: call signext i32 @__builtin_mffs() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.setrnd(i32 0) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" // CHECK: call <4 x float> @vec_rint(float vector[4]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @vec_floor(float vector[4]) // CHECK: call <4 x float> @vec_ceil(float vector[4]) // CHECK: call <4 x float> @vec_trunc(float vector[4]) // CHECK: call <4 x float> @vec_rint(float vector[4]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ss(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @_mm_round_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0 // CHECK-LABEL: define available_externally <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) -// CHECK: call signext i32 @__builtin_mffs() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i32 noundef signext 0) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.setrnd(i32 0) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" // CHECK: call <2 x double> @vec_rint(double vector[2]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_set_fpscr_rn(i64 noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.setrnd(i32 %{{[0-9a-zA-Z_.]+}}) // CHECK: call <2 x double> @vec_floor(double vector[2]) // CHECK: call <2 x double> @vec_ceil(double vector[2]) // CHECK: call <2 x double> @vec_trunc(double vector[2]) // CHECK: call <2 x double> @vec_rint(double vector[2]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call signext i32 @__builtin_mffsl() -// CHECK: call signext i32 @__builtin_mtfsf(i32 noundef signext 3, double noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call double @llvm.ppc.readflm() +// P10: call double @llvm.ppc.mffsl() +// CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK-LABEL: define available_externally <2 x double> @_mm_round_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) // CHECK: call <2 x double> @_mm_round_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) diff --git a/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c index 60633e34b56b9a1..40d3839dcf026f7 100644 --- a/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c @@ -13,6 +13,9 @@ // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + #include <tmmintrin.h> __m64 res, m1, m2; diff --git a/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c b/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c index 238ce7c7ee574ef..ac90a5f8c530ba4 100644 --- a/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-x86gprintrin.c @@ -12,6 +12,9 @@ // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr7 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s +// RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr7 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only + #include <x86gprintrin.h> unsigned short us; >From 2c7688c2076cce1d9e33a6881b70e042041078a8 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Tue, 10 Oct 2023 12:10:27 +0800 Subject: [PATCH 2/2] Implement mffsl in pre-Power9 targets --- clang/include/clang/Basic/BuiltinsPPC.def | 2 +- clang/lib/Headers/ppc_wrappers/smmintrin.h | 20 -------- clang/test/CodeGen/PowerPC/builtins-ppc.c | 6 +-- clang/test/CodeGen/PowerPC/ppc-smmintrin.c | 12 ++--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 54 +++++++++++++++++++-- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 4 +- llvm/test/CodeGen/PowerPC/read-set-flm.ll | 46 +++++++++++++++++- 7 files changed, 104 insertions(+), 40 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index a35488ed3dfa565..f7dcf3df7b7dd58 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -152,7 +152,7 @@ TARGET_BUILTIN(__builtin_ppc_extract_sig, "ULLid", "", "power9-vector") BUILTIN(__builtin_ppc_mtfsb0, "vUIi", "") BUILTIN(__builtin_ppc_mtfsb1, "vUIi", "") BUILTIN(__builtin_ppc_mffs, "d", "") -TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "isa-v30-instructions") +TARGET_BUILTIN(__builtin_ppc_mffsl, "d", "", "") BUILTIN(__builtin_ppc_mtfsf, "vUIiUi", "") BUILTIN(__builtin_ppc_mtfsfi, "vUIiUIi", "") BUILTIN(__builtin_ppc_set_fpscr_rn, "di", "") diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h index 19cdecb18d2b83f..7174a83af1b8120 100644 --- a/clang/lib/Headers/ppc_wrappers/smmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h @@ -83,12 +83,7 @@ extern __inline __m128d switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: -#ifdef _ARCH_PWR9 __fpscr_save.__fr = __builtin_ppc_mffsl(); -#else - __fpscr_save.__fr = __builtin_ppc_mffs(); - __fpscr_save.__fpscr &= 0x70007f0ffL; -#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: __builtin_ppc_set_fpscr_rn(0b00); @@ -133,12 +128,7 @@ extern __inline __m128d */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ -#ifdef _ARCH_PWR9 __fpscr_save.__fr = __builtin_ppc_mffsl(); -#else - __fpscr_save.__fr = __builtin_ppc_mffs(); - __fpscr_save.__fpscr &= 0x70007f0ffL; -#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } @@ -184,12 +174,7 @@ extern __inline __m128 switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: -#ifdef _ARCH_PWR9 __fpscr_save.__fr = __builtin_ppc_mffsl(); -#else - __fpscr_save.__fr = __builtin_ppc_mffs(); - __fpscr_save.__fpscr &= 0x70007f0ffL; -#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: __builtin_ppc_set_fpscr_rn(0b00); @@ -234,12 +219,7 @@ extern __inline __m128 */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ -#ifdef _ARCH_PWR9 __fpscr_save.__fr = __builtin_ppc_mffsl(); -#else - __fpscr_save.__fr = __builtin_ppc_mffs(); - __fpscr_save.__fpscr &= 0x70007f0ffL; -#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc.c b/clang/test/CodeGen/PowerPC/builtins-ppc.c index c13edf44cdcbd2a..b94e79910650a6d 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc.c @@ -1,8 +1,6 @@ // REQUIRES: powerpc-registered-target // RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ // RUN: | FileCheck %s -// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - \ -// RUN: -target-cpu pwr9 | FileCheck %s --check-prefixes=P9,CHECK void test_eh_return_data_regno() { @@ -45,10 +43,8 @@ void test_builtin_ppc_flm() { // CHECK: call double @llvm.ppc.setflm(double %2) res = __builtin_setflm(res); -#ifdef _ARCH_PWR9 - // P9: call double @llvm.ppc.mffsl() + // CHECK: call double @llvm.ppc.mffsl() res = __builtin_ppc_mffsl(); -#endif } double test_builtin_unpack_ldbl(long double x) { diff --git a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c index 7daef71a61c329e..9bee798ff1c071b 100644 --- a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c @@ -247,8 +247,7 @@ test_round() { // CHECK: call double @llvm.ppc.readflm() // CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" -// CHECK: call double @llvm.ppc.readflm() -// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.mffsl() // CHECK: call double @llvm.ppc.setrnd(i32 0) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <4 x float> asm "", "=^wa,0" // CHECK: call <4 x float> @vec_rint(float vector[4]) @@ -259,8 +258,7 @@ test_round() { // CHECK: call <4 x float> @vec_trunc(float vector[4]) // CHECK: call <4 x float> @vec_rint(float vector[4]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call double @llvm.ppc.readflm() -// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.mffsl() // CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK-LABEL: define available_externally <4 x float> @_mm_round_ss(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) @@ -271,8 +269,7 @@ test_round() { // CHECK: call double @llvm.ppc.readflm() // CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" -// CHECK: call double @llvm.ppc.readflm() -// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.mffsl() // CHECK: call double @llvm.ppc.setrnd(i32 0) // CHECK: %{{[0-9a-zA-Z_.]+}} = call <2 x double> asm "", "=^wa,0" // CHECK: call <2 x double> @vec_rint(double vector[2]) @@ -283,8 +280,7 @@ test_round() { // CHECK: call <2 x double> @vec_trunc(double vector[2]) // CHECK: call <2 x double> @vec_rint(double vector[2]) // CHECK: call void asm sideeffect "", "^wa" -// CHECK: call double @llvm.ppc.readflm() -// P10: call double @llvm.ppc.mffsl() +// CHECK: call double @llvm.ppc.mffsl() // CHECK: call void @llvm.ppc.mtfsf(i32 3, double %{{[0-9a-zA-Z_.]+}}) // CHECK-LABEL: define available_externally <2 x double> @_mm_round_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index c6257dcdf76f633..526f7b1419d9924 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -646,8 +646,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); - // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); @@ -11595,6 +11595,50 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("ERROR:Should return for all cases within swtich."); } +// Lower mffsl intrinsic with mffs in targets without ISA 3.0 +static SDValue lowerMFFSL(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + assert(cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue() == + Intrinsic::ppc_mffsl && + "Should only be called on int_ppc_mffsl"); + if (Subtarget.isISA3_0()) + return Op; + + SDLoc dl(Op); + SDValue Chain = Op.getOperand(0); + SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain); + Chain = MFFS.getValue(1); + + if (Subtarget.isPPC64()) { + SDValue Int = DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS); + // Mask 29-31, 45-51 and 56-63 bits + SDValue Masked = DAG.getNode(ISD::AND, dl, MVT::i64, Int, + DAG.getConstant(0x70007f0ffULL, dl, MVT::i64)); + SDValue Cast = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Masked); + return DAG.getMergeValues({Cast, Chain}, dl); + } + + MachineFunction &MF = DAG.getMachineFunction(); + MachinePointerInfo PtrInfo; + int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); + SDValue Base = DAG.getFrameIndex(SSFI, MVT::i32); + Chain = DAG.getStore(Chain, dl, MFFS, Base, PtrInfo); + + assert(!Subtarget.isLittleEndian() && "32-bit little endian is unsupported!"); + SDValue Offset4 = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, + DAG.getConstant(4, dl, MVT::i32)); + SDValue Hi = DAG.getLoad(MVT::i32, dl, Chain, Base, PtrInfo); + SDValue Lo = DAG.getLoad(MVT::i32, dl, Hi.getValue(1), Offset4, PtrInfo); + Chain = Lo.getValue(1); + Hi = + DAG.getNode(ISD::AND, dl, MVT::i32, Hi, DAG.getConstant(7, dl, MVT::i32)); + Lo = DAG.getNode(ISD::AND, dl, MVT::i32, Lo, + DAG.getConstant(0x7f0ffULL, dl, MVT::i32)); + Chain = DAG.getStore(Chain, dl, Hi, Base, PtrInfo); + Chain = DAG.getStore(Chain, dl, Lo, Offset4, PtrInfo); + return DAG.getLoad(MVT::f64, dl, Chain, Base, PtrInfo); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -11669,8 +11713,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerFP_ROUND(Op, DAG); case ISD::ROTL: return LowerROTL(Op, DAG); - // For counter-based loop handling. - case ISD::INTRINSIC_W_CHAIN: return SDValue(); + case ISD::INTRINSIC_W_CHAIN: { + if (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue() == + Intrinsic::ppc_mffsl) + return lowerMFFSL(Op, DAG, Subtarget); + return SDValue(); + } case ISD::BITCAST: return LowerBITCAST(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index a97062e0c643fb2..74311b00fe7ffe9 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3188,7 +3188,6 @@ def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), (TCRETURNri CTRRC:$dst, imm:$imm)>; def : Pat<(int_ppc_readflm), (MFFS)>; -def : Pat<(int_ppc_mffsl), (MFFSL)>; // Hi and Lo for Darwin Global Addresses. def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>; @@ -4510,6 +4509,9 @@ def : Pat<(int_ppc_dcbfl xoaddr:$dst), def : Pat<(int_ppc_dcbflp xoaddr:$dst), (DCBF 3, xoaddr:$dst)>; +let Predicates = [IsISA3_0] in +def : Pat<(int_ppc_mffsl), (MFFSL)>; + let Predicates = [IsISA3_1] in { def DCBFPS : PPCAsmPseudo<"dcbfps $dst", (ins memrr:$dst)>; def DCBSTPS : PPCAsmPseudo<"dcbstps $dst", (ins memrr:$dst)>; diff --git a/llvm/test/CodeGen/PowerPC/read-set-flm.ll b/llvm/test/CodeGen/PowerPC/read-set-flm.ll index d9f392474555288..71747e282372cf1 100644 --- a/llvm/test/CodeGen/PowerPC/read-set-flm.ll +++ b/llvm/test/CodeGen/PowerPC/read-set-flm.ll @@ -1,5 +1,8 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple powerpc64le-unknown-linux | FileCheck %s +; RUN: llc < %s -mtriple powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s \ +; RUN: --check-prefix=P9 +; RUN: llc < %s -mtriple powerpc64-ibm-aix | FileCheck %s --check-prefix=BE +; RUN: llc < %s -mtriple powerpc-ibm-aix | FileCheck %s --check-prefix=BE32 ; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \ ; RUN: 2>&1 | FileCheck %s --check-prefix=LOG ; REQUIRES: asserts @@ -151,8 +154,47 @@ entry: define double @mffsl() { ; CHECK-LABEL: mffsl: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mffsl 1 +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: lis 4, -8192 +; CHECK-NEXT: mffprd 3, 0 +; CHECK-NEXT: ori 4, 4, 65055 +; CHECK-NEXT: rldicl 4, 4, 3, 29 +; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: mtfprd 1, 3 ; CHECK-NEXT: blr +; +; P9-LABEL: mffsl: +; P9: # %bb.0: # %entry +; P9-NEXT: mffsl 1 +; P9-NEXT: blr +; +; BE-LABEL: mffsl: +; BE: # %bb.0: # %entry +; BE-NEXT: mffs 0 +; BE-NEXT: stfd 0, -16(1) +; BE-NEXT: ld 3, -16(1) +; BE-NEXT: lis 4, -8192 +; BE-NEXT: ori 4, 4, 65055 +; BE-NEXT: rldicl 4, 4, 3, 29 +; BE-NEXT: and 3, 3, 4 +; BE-NEXT: std 3, -8(1) +; BE-NEXT: lfd 1, -8(1) +; BE-NEXT: blr +; +; BE32-LABEL: mffsl: +; BE32: # %bb.0: # %entry +; BE32-NEXT: mffs 0 +; BE32-NEXT: stfd 0, -8(1) +; BE32-NEXT: lis 4, 7 +; BE32-NEXT: lwz 3, -4(1) +; BE32-NEXT: ori 4, 4, 61695 +; BE32-NEXT: lwz 5, -8(1) +; BE32-NEXT: and 3, 3, 4 +; BE32-NEXT: stw 3, -4(1) +; BE32-NEXT: clrlwi 3, 5, 29 +; BE32-NEXT: stw 3, -8(1) +; BE32-NEXT: lfd 1, -8(1) +; BE32-NEXT: blr entry: %x = call double @llvm.ppc.mffsl() ret double %x _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits