Author: Qiu Chaofan Date: 2024-03-04T21:13:59+08:00 New Revision: 906580bad3a68e3877f4ff7ac2b7fc1b7ee84fd5
URL: https://github.com/llvm/llvm-project/commit/906580bad3a68e3877f4ff7ac2b7fc1b7ee84fd5 DIFF: https://github.com/llvm/llvm-project/commit/906580bad3a68e3877f4ff7ac2b7fc1b7ee84fd5.diff LOG: [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (#82968) These builtins are already there in Clang, however current codegen may produce suboptimal results due to their complex behavior. Implement them as intrinsics to ensure expected instructions are emitted. Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/test/CodeGen/PowerPC/rldimi.ll llvm/test/CodeGen/PowerPC/rlwimi.ll llvm/test/CodeGen/PowerPC/rlwinm.ll Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e90014261217bc..9ee51ca7142c77 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17091,37 +17091,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); } - // Rotate and insert under mask operation. - // __rldimi(rs, is, shift, mask) - // (rotl64(rs, shift) & mask) | (is & ~mask) - // __rlwimi(rs, is, shift, mask) - // (rotl(rs, shift) & mask) | (is & ~mask) case PPC::BI__builtin_ppc_rldimi: case PPC::BI__builtin_ppc_rlwimi: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); - llvm::Type *Ty = Op0->getType(); - Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); - if (BuiltinID == PPC::BI__builtin_ppc_rldimi) - Op2 = Builder.CreateZExt(Op2, Int64Ty); - Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); - Value *X = Builder.CreateAnd(Shift, Op3); - Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3)); - return Builder.CreateOr(X, Y); - } - // Rotate and insert under mask operation. - // __rlwnm(rs, shift, mask) - // rotl(rs, shift) & mask + return Builder.CreateCall( + CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi + ? Intrinsic::ppc_rldimi + : Intrinsic::ppc_rlwimi), + {Op0, Op1, Op2, Op3}); + } case PPC::BI__builtin_ppc_rlwnm: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); - llvm::Type *Ty = Op0->getType(); - Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); - Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1}); - return Builder.CreateAnd(Shift, Op2); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm), + {Op0, Op1, Op2}); } case PPC::BI__builtin_ppc_poppar4: case PPC::BI__builtin_ppc_poppar8: { diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c index d96bfb4621421e..b218547c00d931 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c @@ -16,11 +16,8 @@ void test_builtin_ppc_rldimi() { // CHECK: %res = alloca i64, align 8 // CHECK-NEXT: [[RA:%[0-9]+]] = load i64, ptr @ull, align 8 // CHECK-NEXT: [[RB:%[0-9]+]] = load i64, ptr @ull, align 8 - // CHECK-NEXT: [[RC:%[0-9]+]] = call i64 @llvm.fshl.i64(i64 [[RA]], i64 [[RA]], i64 63) - // CHECK-NEXT: [[RD:%[0-9]+]] = and i64 [[RC]], 72057593769492480 - // CHECK-NEXT: [[RE:%[0-9]+]] = and i64 [[RB]], -72057593769492481 - // CHECK-NEXT: [[RF:%[0-9]+]] = or i64 [[RD]], [[RE]] - // CHECK-NEXT: store i64 [[RF]], ptr %res, align 8 + // CHECK-NEXT: [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 [[RB]], i32 63, i64 72057593769492480) + // CHECK-NEXT: store i64 [[RC]], ptr %res, align 8 // CHECK-NEXT: ret void /*shift = 63, mask = 0x00FFFFFFF0000000 = 72057593769492480, ~mask = 0xFF0000000FFFFFFF = -72057593769492481*/ @@ -32,11 +29,8 @@ void test_builtin_ppc_rlwimi() { // CHECK: %res = alloca i32, align 4 // CHECK-NEXT: [[RA:%[0-9]+]] = load i32, ptr @ui, align 4 // CHECK-NEXT: [[RB:%[0-9]+]] = load i32, ptr @ui, align 4 - // CHECK-NEXT: [[RC:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 [[RA]], i32 31) - // CHECK-NEXT: [[RD:%[0-9]+]] = and i32 [[RC]], 16776960 - // CHECK-NEXT: [[RE:%[0-9]+]] = and i32 [[RB]], -16776961 - // CHECK-NEXT: [[RF:%[0-9]+]] = or i32 [[RD]], [[RE]] - // CHECK-NEXT: store i32 [[RF]], ptr %res, align 4 + // CHECK-NEXT: [[RC:%[0-9]+]] = call i32 @llvm.ppc.rlwimi(i32 [[RA]], i32 [[RB]], i32 31, i32 16776960) + // CHECK-NEXT: store i32 [[RC]], ptr %res, align 4 // CHECK-NEXT: ret void /*shift = 31, mask = 0xFFFF00 = 16776960, ~mask = 0xFFFFFFFFFF0000FF = -16776961*/ @@ -47,9 +41,8 @@ void test_builtin_ppc_rlwnm() { // CHECK-LABEL: test_builtin_ppc_rlwnm // CHECK: %res = alloca i32, align 4 // CHECK-NEXT: [[RA:%[0-9]+]] = load i32, ptr @ui, align 4 - // CHECK-NEXT: [[RB:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 [[RA]], i32 31) - // CHECK-NEXT: [[RC:%[0-9]+]] = and i32 [[RB]], 511 - // CHECK-NEXT: store i32 [[RC]], ptr %res, align 4 + // CHECK-NEXT: [[RB:%[0-9]+]] = call i32 @llvm.ppc.rlwnm(i32 [[RA]], i32 31, i32 511) + // CHECK-NEXT: store i32 [[RB]], ptr %res, align 4 // CHECK-NEXT: ret void /*shift = 31, mask = 0x1FF = 511*/ @@ -63,9 +56,8 @@ void test_builtin_ppc_rlwnm2(unsigned int shift) { // CHECK-NEXT: store i32 %shift, ptr %shift.addr, align 4 // CHECK-NEXT: [[RA:%[0-9]+]] = load i32, ptr @ui, align 4 // CHECK-NEXT: [[RB:%[0-9]+]] = load i32, ptr %shift.addr, align 4 - // CHECK-NEXT: [[RC:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 [[RA]], i32 [[RB]]) - // CHECK-NEXT: [[RD:%[0-9]+]] = and i32 [[RC]], 511 - // CHECK-NEXT: store i32 [[RD]], ptr %res, align 4 + // CHECK-NEXT: [[RC:%[0-9]+]] = call i32 @llvm.ppc.rlwnm(i32 [[RA]], i32 [[RB]], i32 511) + // CHECK-NEXT: store i32 [[RC]], ptr %res, align 4 // CHECK-NEXT: ret void /*mask = 0x1FF = 511*/ diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index bfc2b17043bc79..ee9a04241ac2ec 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -182,6 +182,18 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_fctuwz : ClangBuiltin<"__builtin_ppc_fctuwz">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + def int_ppc_rldimi + : ClangBuiltin<"__builtin_ppc_rldimi">, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; + def int_ppc_rlwimi + : ClangBuiltin<"__builtin_ppc_rlwimi">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; + def int_ppc_rlwnm + : ClangBuiltin<"__builtin_ppc_rlwnm">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; // XL compatible select functions // TODO: Add llvm_f128_ty support. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9fa17bac55450d..68c80dd9aa5c76 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "PPCISelLowering.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCCCState.h" @@ -10762,6 +10763,42 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(PPC::X13, MVT::i64); return DAG.getRegister(PPC::R2, MVT::i32); + case Intrinsic::ppc_rldimi: { + uint64_t SH = Op.getConstantOperandVal(3); + unsigned MB = 0, ME = 0; + if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME) || ME != 63 - SH) + report_fatal_error("invalid rldimi mask!"); + return SDValue(DAG.getMachineNode( + PPC::RLDIMI, dl, MVT::i64, + {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), + DAG.getTargetConstant(MB, dl, MVT::i32)}), + 0); + } + + case Intrinsic::ppc_rlwimi: { + unsigned MB = 0, ME = 0; + if (!isRunOfOnes(Op.getConstantOperandVal(4), MB, ME)) + report_fatal_error("invalid rlwimi mask!"); + return SDValue(DAG.getMachineNode( + PPC::RLWIMI, dl, MVT::i32, + {Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), + DAG.getTargetConstant(MB, dl, MVT::i32), + DAG.getTargetConstant(ME, dl, MVT::i32)}), + 0); + } + + case Intrinsic::ppc_rlwnm: { + unsigned MB = 0, ME = 0; + if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME)) + report_fatal_error("invalid rlwnm mask!"); + return SDValue( + DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32, + {Op.getOperand(1), Op.getOperand(2), + DAG.getTargetConstant(MB, dl, MVT::i32), + DAG.getTargetConstant(ME, dl, MVT::i32)}), + 0); + } + case Intrinsic::ppc_mma_disassemble_acc: { if (Subtarget.isISAFuture()) { EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1}; diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll index 4e26ddfc37f99e..322975f547c996 100644 --- a/llvm/test/CodeGen/PowerPC/rldimi.ll +++ b/llvm/test/CodeGen/PowerPC/rldimi.ll @@ -58,3 +58,18 @@ entry: %8 = or i64 %6, %7 ret i64 %8 } + +define i64 @rldimi_intrinsic(i64 %a) { +; CHECK-LABEL: rldimi_intrinsic: +; CHECK: # %bb.0: +; CHECK-NEXT: rldimi 3, 3, 8, 0 +; CHECK-NEXT: rldimi 3, 3, 16, 0 +; CHECK-NEXT: rldimi 3, 3, 32, 0 +; CHECK-NEXT: blr + %r1 = call i64 @llvm.ppc.rldimi(i64 %a, i64 %a, i32 8, i64 -256) + %r2 = call i64 @llvm.ppc.rldimi(i64 %r1, i64 %r1, i32 16, i64 -65536) + %r3 = call i64 @llvm.ppc.rldimi(i64 %r2, i64 %r2, i32 32, i64 -4294967296) + ret i64 %r3 +} + +declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg) diff --git a/llvm/test/CodeGen/PowerPC/rlwimi.ll b/llvm/test/CodeGen/PowerPC/rlwimi.ll index e701236b840b2c..8b126cd3393c10 100644 --- a/llvm/test/CodeGen/PowerPC/rlwimi.ll +++ b/llvm/test/CodeGen/PowerPC/rlwimi.ll @@ -1,70 +1,117 @@ -; All of these ands and shifts should be folded into rlwimi's -; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | not grep and -; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | grep rlwimi | count 8 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s define i32 @test1(i32 %x, i32 %y) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 4, 3, 16, 0, 15 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr entry: - %tmp.3 = shl i32 %x, 16 ; <i32> [#uses=1] - %tmp.7 = and i32 %y, 65535 ; <i32> [#uses=1] - %tmp.9 = or i32 %tmp.7, %tmp.3 ; <i32> [#uses=1] - ret i32 %tmp.9 + %tmp.3 = shl i32 %x, 16 + %tmp.7 = and i32 %y, 65535 + %tmp.9 = or i32 %tmp.7, %tmp.3 + ret i32 %tmp.9 } define i32 @test2(i32 %x, i32 %y) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 3, 4, 16, 0, 15 +; CHECK-NEXT: blr entry: - %tmp.7 = and i32 %x, 65535 ; <i32> [#uses=1] - %tmp.3 = shl i32 %y, 16 ; <i32> [#uses=1] - %tmp.9 = or i32 %tmp.7, %tmp.3 ; <i32> [#uses=1] - ret i32 %tmp.9 + %tmp.7 = and i32 %x, 65535 + %tmp.3 = shl i32 %y, 16 + %tmp.9 = or i32 %tmp.7, %tmp.3 + ret i32 %tmp.9 } define i32 @test3(i32 %x, i32 %y) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 4, 3, 16, 16, 31 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr entry: - %tmp.3 = lshr i32 %x, 16 ; <i32> [#uses=1] - %tmp.6 = and i32 %y, -65536 ; <i32> [#uses=1] - %tmp.7 = or i32 %tmp.6, %tmp.3 ; <i32> [#uses=1] - ret i32 %tmp.7 + %tmp.3 = lshr i32 %x, 16 + %tmp.6 = and i32 %y, -65536 + %tmp.7 = or i32 %tmp.6, %tmp.3 + ret i32 %tmp.7 } define i32 @test4(i32 %x, i32 %y) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 3, 4, 16, 16, 31 +; CHECK-NEXT: blr entry: - %tmp.6 = and i32 %x, -65536 ; <i32> [#uses=1] - %tmp.3 = lshr i32 %y, 16 ; <i32> [#uses=1] - %tmp.7 = or i32 %tmp.6, %tmp.3 ; <i32> [#uses=1] - ret i32 %tmp.7 + %tmp.6 = and i32 %x, -65536 + %tmp.3 = lshr i32 %y, 16 + %tmp.7 = or i32 %tmp.6, %tmp.3 + ret i32 %tmp.7 } define i32 @test5(i32 %x, i32 %y) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 4, 3, 1, 0, 15 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr entry: - %tmp.3 = shl i32 %x, 1 ; <i32> [#uses=1] - %tmp.4 = and i32 %tmp.3, -65536 ; <i32> [#uses=1] - %tmp.7 = and i32 %y, 65535 ; <i32> [#uses=1] - %tmp.9 = or i32 %tmp.4, %tmp.7 ; <i32> [#uses=1] - ret i32 %tmp.9 + %tmp.3 = shl i32 %x, 1 + %tmp.4 = and i32 %tmp.3, -65536 + %tmp.7 = and i32 %y, 65535 + %tmp.9 = or i32 %tmp.4, %tmp.7 + ret i32 %tmp.9 } define i32 @test6(i32 %x, i32 %y) { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 3, 4, 1, 0, 15 +; CHECK-NEXT: blr entry: - %tmp.7 = and i32 %x, 65535 ; <i32> [#uses=1] - %tmp.3 = shl i32 %y, 1 ; <i32> [#uses=1] - %tmp.4 = and i32 %tmp.3, -65536 ; <i32> [#uses=1] - %tmp.9 = or i32 %tmp.4, %tmp.7 ; <i32> [#uses=1] - ret i32 %tmp.9 + %tmp.7 = and i32 %x, 65535 + %tmp.3 = shl i32 %y, 1 + %tmp.4 = and i32 %tmp.3, -65536 + %tmp.9 = or i32 %tmp.4, %tmp.7 + ret i32 %tmp.9 } define i32 @test7(i32 %x, i32 %y) { +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andis. 3, 3, 65535 +; CHECK-NEXT: rldimi 3, 4, 0, 48 +; CHECK-NEXT: blr entry: - %tmp.2 = and i32 %x, -65536 ; <i32> [#uses=1] - %tmp.5 = and i32 %y, 65535 ; <i32> [#uses=1] - %tmp.7 = or i32 %tmp.5, %tmp.2 ; <i32> [#uses=1] - ret i32 %tmp.7 + %tmp.2 = and i32 %x, -65536 + %tmp.5 = and i32 %y, 65535 + %tmp.7 = or i32 %tmp.5, %tmp.2 + ret i32 %tmp.7 } define i32 @test8(i32 %bar) { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 3, 3, 1, 30, 30 +; CHECK-NEXT: blr entry: - %tmp.3 = shl i32 %bar, 1 ; <i32> [#uses=1] - %tmp.4 = and i32 %tmp.3, 2 ; <i32> [#uses=1] - %tmp.6 = and i32 %bar, -3 ; <i32> [#uses=1] - %tmp.7 = or i32 %tmp.4, %tmp.6 ; <i32> [#uses=1] - ret i32 %tmp.7 + %tmp.3 = shl i32 %bar, 1 + %tmp.4 = and i32 %tmp.3, 2 + %tmp.6 = and i32 %bar, -3 + %tmp.7 = or i32 %tmp.4, %tmp.6 + ret i32 %tmp.7 } + +define i32 @test9(i32 %a, i32 %b) { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 3, 4, 8, 20, 26 +; CHECK-NEXT: blr +entry: + %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 4064) + ret i32 %r +} + +declare i32 @llvm.ppc.rlwimi(i32, i32, i32 immarg, i32 immarg) diff --git a/llvm/test/CodeGen/PowerPC/rlwinm.ll b/llvm/test/CodeGen/PowerPC/rlwinm.ll index 2f3b3bf003cf65..c6d4e5bb000040 100644 --- a/llvm/test/CodeGen/PowerPC/rlwinm.ll +++ b/llvm/test/CodeGen/PowerPC/rlwinm.ll @@ -1,61 +1,100 @@ -; All of these ands and shifts should be folded into rlwimi's -; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -o %t -; RUN: not grep and %t -; RUN: not grep srawi %t -; RUN: not grep srwi %t -; RUN: not grep slwi %t -; RUN: grep rlwinm %t | count 8 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s define i32 @test1(i32 %a) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwinm 3, 3, 0, 4, 19 +; CHECK-NEXT: blr entry: - %tmp.1 = and i32 %a, 268431360 ; <i32> [#uses=1] - ret i32 %tmp.1 + %tmp.1 = and i32 %a, 268431360 + ret i32 %tmp.1 } define i32 @test2(i32 %a) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwinm 3, 3, 24, 24, 31 +; CHECK-NEXT: blr entry: - %tmp.1 = and i32 %a, -268435441 ; <i32> [#uses=1] - ret i32 %tmp.1 + %tmp.2 = ashr i32 %a, 8 + %tmp.3 = and i32 %tmp.2, 255 + ret i32 %tmp.3 } define i32 @test3(i32 %a) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwinm 3, 3, 24, 24, 31 +; CHECK-NEXT: blr entry: - %tmp.2 = ashr i32 %a, 8 ; <i32> [#uses=1] - %tmp.3 = and i32 %tmp.2, 255 ; <i32> [#uses=1] - ret i32 %tmp.3 + %tmp.3 = lshr i32 %a, 8 + %tmp.4 = and i32 %tmp.3, 255 + ret i32 %tmp.4 } define i32 @test4(i32 %a) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwinm 3, 3, 8, 0, 8 +; CHECK-NEXT: blr entry: - %tmp.3 = lshr i32 %a, 8 ; <i32> [#uses=1] - %tmp.4 = and i32 %tmp.3, 255 ; <i32> [#uses=1] - ret i32 %tmp.4 + %tmp.2 = shl i32 %a, 8 + %tmp.3 = and i32 %tmp.2, -8388608 + ret i32 %tmp.3 } define i32 @test5(i32 %a) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwinm 3, 3, 24, 24, 31 +; CHECK-NEXT: blr entry: - %tmp.2 = shl i32 %a, 8 ; <i32> [#uses=1] - %tmp.3 = and i32 %tmp.2, -8388608 ; <i32> [#uses=1] - ret i32 %tmp.3 + %tmp.1 = and i32 %a, 65280 + %tmp.2 = ashr i32 %tmp.1, 8 + ret i32 %tmp.2 } define i32 @test6(i32 %a) { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwinm 3, 3, 24, 24, 31 +; CHECK-NEXT: blr entry: - %tmp.1 = and i32 %a, 65280 ; <i32> [#uses=1] - %tmp.2 = ashr i32 %tmp.1, 8 ; <i32> [#uses=1] - ret i32 %tmp.2 + %tmp.1 = and i32 %a, 65280 + %tmp.2 = lshr i32 %tmp.1, 8 + ret i32 %tmp.2 } define i32 @test7(i32 %a) { +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwinm 3, 3, 8, 0, 7 +; CHECK-NEXT: blr entry: - %tmp.1 = and i32 %a, 65280 ; <i32> [#uses=1] - %tmp.2 = lshr i32 %tmp.1, 8 ; <i32> [#uses=1] - ret i32 %tmp.2 + %tmp.1 = and i32 %a, 16711680 + %tmp.2 = shl i32 %tmp.1, 8 + ret i32 %tmp.2 } -define i32 @test8(i32 %a) { +define i32 @test8(i32 %a, i32 %s) { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwnm 3, 3, 4, 23, 31 +; CHECK-NEXT: blr entry: - %tmp.1 = and i32 %a, 16711680 ; <i32> [#uses=1] - %tmp.2 = shl i32 %tmp.1, 8 ; <i32> [#uses=1] - ret i32 %tmp.2 + %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 %s, i32 511) + ret i32 %r } + +define i32 @test9(i32 %a) { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwinm 3, 3, 31, 23, 31 +; CHECK-NEXT: blr +entry: + %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 31, i32 511) + ret i32 %r +} + +declare i32 @llvm.ppc.rlwnm(i32, i32, i32 immarg) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits