https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/85040
>From 4977659b16a7f220e1a738a0b9841102fe9f1d07 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Wed, 13 Mar 2024 15:46:51 +0800 Subject: [PATCH] [PowerPC] Fix behavior of rldimi/rlwimi/rlwnm builtins rldimi is 64-bit instruction, so the corresponding builtin should not be available in 32-bit mode. Rotate amount should be in range and cases when mask is zero needs special handling. This change also swaps the first and second operands of rldimi/rlwimi to match previous behavior. For masks not ending at bit 63-SH, rotation will be inserted before rldimi. --- clang/lib/Sema/SemaChecking.cpp | 5 ++- .../PowerPC/builtins-ppc-xlcompat-error.c | 7 ++++ .../PowerPC/builtins-ppc-xlcompat-rotate.c | 22 +++++++----- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 35 +++++++++++++++---- llvm/test/CodeGen/PowerPC/rlwimi.ll | 3 +- 5 files changed, 54 insertions(+), 18 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index a5f42b630c3fa2..b032ea1db344a8 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -4992,6 +4992,7 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: + case PPC::BI__builtin_ppc_rldimi: return true; } return false; @@ -5093,8 +5094,10 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case PPC::BI__builtin_ppc_rlwnm: return SemaValueIsRunOfOnes(TheCall, 2); case PPC::BI__builtin_ppc_rlwimi: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31) || + SemaValueIsRunOfOnes(TheCall, 3); case PPC::BI__builtin_ppc_rldimi: - return SemaBuiltinConstantArg(TheCall, 2, Result) || + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63) || SemaValueIsRunOfOnes(TheCall, 3); case PPC::BI__builtin_ppc_addex: { if (SemaBuiltinConstantArgRange(TheCall, 2, 0, 3)) diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c index 5f57d7575c859a..272e0222dc9e41 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c @@ -24,13 +24,16 @@ void test_trap(void) { __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}} } +#ifdef __PPC64__ void test_builtin_ppc_rldimi() { unsigned int shift; unsigned long long mask; unsigned long long res = __builtin_ppc_rldimi(ull, ull, shift, 7); // expected-error {{argument to '__builtin_ppc_rldimi' must be a constant integer}} res = __builtin_ppc_rldimi(ull, ull, 63, mask); // expected-error {{argument to '__builtin_ppc_rldimi' must be a constant integer}} res = __builtin_ppc_rldimi(ull, ull, 63, 0xFFFF000000000F00); // expected-error {{argument 3 value should represent a contiguous bit field}} + res = __builtin_ppc_rldimi(ull, ull, 64, 0xFFFF000000000000); // expected-error {{argument value 64 is outside the valid range [0, 63]}} } +#endif void test_builtin_ppc_rlwimi() { unsigned int shift; @@ -83,6 +86,10 @@ void testalignx(const void *pointer, unsigned int alignment) { } #ifndef __PPC64__ +unsigned long long testrldimi32() { + return __rldimi(ull, ui, 3, 0x7ffff8ULL); //expected-error {{this builtin is only available on 64-bit targets}} +} + long long testbpermd(long long bit_selector, long long source) { return __bpermd(bit_selector, source); //expected-error {{this builtin is only available on 64-bit targets}} } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c index b218547c00d931..4773d6cb1a0cfd 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c @@ -1,8 +1,10 @@ // REQUIRES: powerpc-registered-target // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu \ -// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s \ +// RUN: -check-prefixes=PPC64,CHECK // RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu \ -// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s \ +// RUN: -check-prefixes=PPC64,CHECK // RUN: %clang_cc1 -triple powerpc-unknown-aix \ // RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s // RUN: %clang_cc1 -triple powerpc64-unknown-aix \ @@ -11,18 +13,20 @@ extern unsigned int ui; extern unsigned long long ull; +#ifdef __PPC64__ void test_builtin_ppc_rldimi() { - // CHECK-LABEL: test_builtin_ppc_rldimi - // CHECK: %res = alloca i64, align 8 - // CHECK-NEXT: [[RA:%[0-9]+]] = load i64, ptr @ull, align 8 - // CHECK-NEXT: [[RB:%[0-9]+]] = load i64, ptr @ull, align 8 - // CHECK-NEXT: [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480) - // CHECK-NEXT: store i64 [[RC]], ptr %res, align 8 - // CHECK-NEXT: ret void + // PPC64-LABEL: test_builtin_ppc_rldimi + // PPC64: %res = alloca i64, align 8 + // PPC64-NEXT: [[RA:%[0-9]+]] = load i64, ptr @ull, align 8 + // PPC64-NEXT: [[RB:%[0-9]+]] = load i64, ptr @ull, align 8 + // PPC64-NEXT: [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480) + // PPC64-NEXT: store i64 [[RC]], ptr %res, align 8 + // PPC64-NEXT: ret void /*shift = 63, mask = 0x00FFFFFFF0000000 = 72057593769492480, ~mask = 0xFF0000000FFFFFFF = -72057593769492481*/ unsigned long long res = __builtin_ppc_rldimi(ull, ull, 63, 0x00FFFFFFF0000000); } +#endif void test_builtin_ppc_rlwimi() { // CHECK-LABEL: test_builtin_ppc_rlwimi diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 68c80dd9aa5c76..306a04f47ee84d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10764,30 +10764,51 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(PPC::R2, MVT::i32); case Intrinsic::ppc_rldimi: { + assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!"); + if (Op.getConstantOperandVal(4) == 0) + return Op.getOperand(2); uint64_t SH = Op.getConstantOperandVal(3); unsigned MB = 0, ME = 0; - if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME) || ME != 63 - SH) + if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME)) report_fatal_error("invalid rldimi mask!"); - return SDValue(DAG.getMachineNode( - PPC::RLDIMI, dl, MVT::i64, - {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), - DAG.getTargetConstant(MB, dl, MVT::i32)}), - 0); + + // For all-one mask, MB will be set to 0, adjust it next to 63-SH. + if (MB == 0 && ME == 63 && SH != 0) + MB = 64 - SH; + SDValue Src = Op.getOperand(1); + // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. + if (ME < 63 - SH) { + Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, + DAG.getConstant(ME + SH + 1, dl, MVT::i32)); + } else if (ME > 63 - SH) { + Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, + DAG.getConstant(ME + SH - 63, dl, MVT::i32)); + } + return SDValue( + DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64, + {Op.getOperand(2), Src, + DAG.getTargetConstant(63 - ME, dl, MVT::i32), + DAG.getTargetConstant(MB, dl, MVT::i32)}), + 0); } case Intrinsic::ppc_rlwimi: { + if (Op.getConstantOperandVal(4) == 0) + return Op.getOperand(2); unsigned MB = 0, ME = 0; if (!isRunOfOnes(Op.getConstantOperandVal(4), MB, ME)) report_fatal_error("invalid rlwimi mask!"); return SDValue(DAG.getMachineNode( PPC::RLWIMI, dl, MVT::i32, - {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), + {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3), DAG.getTargetConstant(MB, dl, MVT::i32), DAG.getTargetConstant(ME, dl, MVT::i32)}), 0); } case Intrinsic::ppc_rlwnm: { + if (Op.getConstantOperandVal(3) == 0) + return DAG.getConstant(0, dl, MVT::i32); unsigned MB = 0, ME = 0; if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME)) report_fatal_error("invalid rlwnm mask!"); diff --git a/llvm/test/CodeGen/PowerPC/rlwimi.ll b/llvm/test/CodeGen/PowerPC/rlwimi.ll index 8b126cd3393c10..b7a2ded7bd8df4 100644 --- a/llvm/test/CodeGen/PowerPC/rlwimi.ll +++ b/llvm/test/CodeGen/PowerPC/rlwimi.ll @@ -107,7 +107,8 @@ entry: define i32 @test9(i32 %a, i32 %b) { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: rlwimi 3, 4, 8, 20, 26 +; CHECK-NEXT: rlwimi 4, 3, 8, 20, 26 +; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr entry: %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 4064) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits