[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-04 Thread Qiu Chaofan via cfe-commits

https://github.com/ecnelises closed 
https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Qiu Chaofan via cfe-commits

https://github.com/ecnelises updated 
https://github.com/llvm/llvm-project/pull/82968

>From a06fa5e18313ad50019d50006e34a6b8249d95cd Mon Sep 17 00:00:00 2001
From: Qiu Chaofan 
Date: Mon, 26 Feb 2024 16:32:28 +0800
Subject: [PATCH 1/4] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm

These builtins are already there in Clang, however current codegen may
produce suboptimal results due to their complex behavior. Implement them
as intrinsics to ensure expected instructions are emitted.
---
 clang/lib/CodeGen/CGBuiltin.cpp   |  29 ++---
 .../PowerPC/builtins-ppc-xlcompat-rotate.c|  24 ++--
 llvm/include/llvm/IR/IntrinsicsPowerPC.td |  12 ++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  52 
 llvm/test/CodeGen/PowerPC/rldimi.ll   |  15 +++
 llvm/test/CodeGen/PowerPC/rlwimi.ll   | 123 --
 llvm/test/CodeGen/PowerPC/rlwinm.ll   | 108 ++-
 7 files changed, 259 insertions(+), 104 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 734eb5a035ca49..5d55be6e9e99df 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17080,37 +17080,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned 
BuiltinID,
 }
 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
   }
-  // Rotate and insert under mask operation.
-  // __rldimi(rs, is, shift, mask)
-  // (rotl64(rs, shift) & mask) | (is & ~mask)
-  // __rlwimi(rs, is, shift, mask)
-  // (rotl(rs, shift) & mask) | (is & ~mask)
   case PPC::BI__builtin_ppc_rldimi:
   case PPC::BI__builtin_ppc_rlwimi: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
 Value *Op3 = EmitScalarExpr(E->getArg(3));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
-  Op2 = Builder.CreateZExt(Op2, Int64Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
-Value *X = Builder.CreateAnd(Shift, Op3);
-Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
-return Builder.CreateOr(X, Y);
-  }
-  // Rotate and insert under mask operation.
-  // __rlwnm(rs, shift, mask)
-  // rotl(rs, shift) & mask
+return Builder.CreateCall(
+CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
+ ? Intrinsic::ppc_rldimi
+ : Intrinsic::ppc_rlwimi),
+{Op0, Op1, Op2, Op3});
+  }
   case PPC::BI__builtin_ppc_rlwnm: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
-return Builder.CreateAnd(Shift, Op2);
+return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
+  {Op0, Op1, Op2});
   }
   case PPC::BI__builtin_ppc_poppar4:
   case PPC::BI__builtin_ppc_poppar8: {
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c 
b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
index d96bfb4621421e..b218547c00d931 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
@@ -16,11 +16,8 @@ void test_builtin_ppc_rldimi() {
   // CHECK:   %res = alloca i64, align 8
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.fshl.i64(i64 [[RA]], i64 
[[RA]], i64 63)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i64 [[RC]], 72057593769492480
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i64 [[RB]], -72057593769492481
-  // CHECK-NEXT:  [[RF:%[0-9]+]] = or i64 [[RD]], [[RE]]
-  // CHECK-NEXT:  store i64 [[RF]], ptr %res, align 8
+  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 
[[RB]], i32 63, i64 72057593769492480)
+  // CHECK-NEXT:  store i64 [[RC]], ptr %res, align 8
   // CHECK-NEXT:  ret void
 
   /*shift = 63, mask = 0x00FFF000 = 72057593769492480, ~mask = 
0xFF000FFF = -72057593769492481*/
@@ -32,11 +29,8 @@ void test_builtin_ppc_rlwimi() {
   // CHECK:   %res = alloca i32, align 4
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i32, ptr @ui, align 4
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i32, ptr @ui, align 4
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 
[[RA]], i32 31)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i32 [[RC]], 16776960
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i32 [[RB]], -16776961
-  // CHECK-NEXT:  [[RF:%[0-9]+]] = or i32 [[RD]], [[RE]]
-  // CHECK-NEXT:  store i32 [[RF]], ptr %res, align 4
+  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i32 @llvm.ppc.rlwimi(i32 [[RA]], i32 
[[R

[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Chen Zheng via cfe-commits


@@ -58,3 +58,18 @@ entry:
   %8 = or i64 %6, %7
   ret i64 %8
 }
+
+define i64 @rldimi_intrinsic(i64 %a) {
+; CHECK-LABEL: rldimi_intrinsic:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:rldimi 3, 3, 8, 0
+; CHECK-NEXT:rldimi 3, 3, 16, 0
+; CHECK-NEXT:rldimi 3, 3, 32, 0
+; CHECK-NEXT:blr
+  %r1 = call i64 @llvm.ppc.rldimi(i64 %a, i64 %a, i32 8, i64 -256)
+  %r2 = call i64 @llvm.ppc.rldimi(i64 %r1, i64 %r1, i32 16, i64 -65536)
+  %r3 = call i64 @llvm.ppc.rldimi(i64 %r2, i64 %r2, i32 32, i64 -4294967296)
+  ret i64 %r3
+}
+
+declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg)

chenzheng1030 wrote:

OK. Thanks. TIL : )

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Qiu Chaofan via cfe-commits


@@ -1,61 +1,111 @@
-; All of these ands and shifts should be folded into rlwimi's
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -o %t
-; RUN: not grep and %t
-; RUN: not grep srawi %t
-; RUN: not grep srwi %t
-; RUN: not grep slwi %t
-; RUN: grep rlwinm %t | count 8
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | 
FileCheck %s
 
 define i32 @test1(i32 %a) {
+; CHECK-LABEL: test1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 0, 4, 19
+; CHECK-NEXT:blr
 entry:
-   %tmp.1 = and i32 %a, 268431360  ;  [#uses=1]
-   ret i32 %tmp.1
+  %tmp.1 = and i32 %a, 268431360
+  ret i32 %tmp.1
 }
 
 define i32 @test2(i32 %a) {
+; CHECK-LABEL: test2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rldicl 3, 3, 36, 24
+; CHECK-NEXT:rldicl 3, 3, 28, 32

ecnelises wrote:

Yes, I just meant `rlwinm` does not sign-ext higher 32 bits.

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Chen Zheng via cfe-commits


@@ -1,61 +1,111 @@
-; All of these ands and shifts should be folded into rlwimi's
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -o %t
-; RUN: not grep and %t
-; RUN: not grep srawi %t
-; RUN: not grep srwi %t
-; RUN: not grep slwi %t
-; RUN: grep rlwinm %t | count 8
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | 
FileCheck %s
 
 define i32 @test1(i32 %a) {
+; CHECK-LABEL: test1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 0, 4, 19
+; CHECK-NEXT:blr
 entry:
-   %tmp.1 = and i32 %a, 268431360  ;  [#uses=1]
-   ret i32 %tmp.1
+  %tmp.1 = and i32 %a, 268431360
+  ret i32 %tmp.1
 }
 
 define i32 @test2(i32 %a) {
+; CHECK-LABEL: test2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rldicl 3, 3, 36, 24
+; CHECK-NEXT:rldicl 3, 3, 28, 32

chenzheng1030 wrote:

hmm, OK. The `rlwinm` does not match the 64-bit semantic here. Would you please 
just remove this case. It would be strange to keep it in the rlwinm file now.

Please don't treat rlwinm as a 32-bit instruction. It alters and well defined 
the high 32 bit of a GPR as well especially the MB/ME are wrapped.

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Qiu Chaofan via cfe-commits


@@ -1,61 +1,111 @@
-; All of these ands and shifts should be folded into rlwimi's
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -o %t
-; RUN: not grep and %t
-; RUN: not grep srawi %t
-; RUN: not grep srwi %t
-; RUN: not grep slwi %t
-; RUN: grep rlwinm %t | count 8
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | 
FileCheck %s
 
 define i32 @test1(i32 %a) {
+; CHECK-LABEL: test1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 0, 4, 19
+; CHECK-NEXT:blr
 entry:
-   %tmp.1 = and i32 %a, 268431360  ;  [#uses=1]
-   ret i32 %tmp.1
+  %tmp.1 = and i32 %a, 268431360
+  ret i32 %tmp.1
 }
 
 define i32 @test2(i32 %a) {
+; CHECK-LABEL: test2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rldicl 3, 3, 36, 24
+; CHECK-NEXT:rldicl 3, 3, 28, 32

ecnelises wrote:

`rlwinm` is a 32-bit instruction, in 64-bit mode both the input arg and return 
values are sign-extended. `rldicl` here does more stuff including 
sign-extending it (so here we see no `extsw` exists)

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Qiu Chaofan via cfe-commits


@@ -58,3 +58,18 @@ entry:
   %8 = or i64 %6, %7
   ret i64 %8
 }
+
+define i64 @rldimi_intrinsic(i64 %a) {
+; CHECK-LABEL: rldimi_intrinsic:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:rldimi 3, 3, 8, 0
+; CHECK-NEXT:rldimi 3, 3, 16, 0
+; CHECK-NEXT:rldimi 3, 3, 32, 0
+; CHECK-NEXT:blr
+  %r1 = call i64 @llvm.ppc.rldimi(i64 %a, i64 %a, i32 8, i64 -256)
+  %r2 = call i64 @llvm.ppc.rldimi(i64 %r1, i64 %r1, i32 16, i64 -65536)
+  %r3 = call i64 @llvm.ppc.rldimi(i64 %r2, i64 %r2, i32 32, i64 -4294967296)
+  ret i64 %r3
+}
+
+declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg)

ecnelises wrote:

immarg specifies the argument is constant, like 
PowerPC/builtins-ppc-xlcompat-test.ll 

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Chen Zheng via cfe-commits

chenzheng1030 wrote:

The failure in the buildkite should be unrelated. But would be better to double 
confirm.

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Chen Zheng via cfe-commits


@@ -1,70 +1,117 @@
-; All of these ands and shifts should be folded into rlwimi's
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | not grep and
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | grep rlwimi | count 8
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | 
FileCheck %s
 
 define i32 @test1(i32 %x, i32 %y) {
+; CHECK-LABEL: test1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwimi 4, 3, 16, 0, 15
+; CHECK-NEXT:mr 3, 4
+; CHECK-NEXT:blr
 entry:
-   %tmp.3 = shl i32 %x, 16 ;  [#uses=1]
-   %tmp.7 = and i32 %y, 65535  ;  [#uses=1]
-   %tmp.9 = or i32 %tmp.7, %tmp.3  ;  [#uses=1]
-   ret i32 %tmp.9
+  %tmp.3 = shl i32 %x, 16
+  %tmp.7 = and i32 %y, 65535
+  %tmp.9 = or i32 %tmp.7, %tmp.3
+  ret i32 %tmp.9
 }
 
 define i32 @test2(i32 %x, i32 %y) {
+; CHECK-LABEL: test2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwimi 3, 4, 16, 0, 15
+; CHECK-NEXT:blr
 entry:
-   %tmp.7 = and i32 %x, 65535  ;  [#uses=1]
-   %tmp.3 = shl i32 %y, 16 ;  [#uses=1]
-   %tmp.9 = or i32 %tmp.7, %tmp.3  ;  [#uses=1]
-   ret i32 %tmp.9
+  %tmp.7 = and i32 %x, 65535
+  %tmp.3 = shl i32 %y, 16
+  %tmp.9 = or i32 %tmp.7, %tmp.3
+  ret i32 %tmp.9
 }
 
 define i32 @test3(i32 %x, i32 %y) {
+; CHECK-LABEL: test3:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwimi 4, 3, 16, 16, 31
+; CHECK-NEXT:mr 3, 4
+; CHECK-NEXT:blr
 entry:
-   %tmp.3 = lshr i32 %x, 16;  [#uses=1]
-   %tmp.6 = and i32 %y, -65536 ;  [#uses=1]
-   %tmp.7 = or i32 %tmp.6, %tmp.3  ;  [#uses=1]
-   ret i32 %tmp.7
+  %tmp.3 = lshr i32 %x, 16
+  %tmp.6 = and i32 %y, -65536
+  %tmp.7 = or i32 %tmp.6, %tmp.3
+  ret i32 %tmp.7
 }
 
 define i32 @test4(i32 %x, i32 %y) {
+; CHECK-LABEL: test4:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwimi 3, 4, 16, 16, 31
+; CHECK-NEXT:blr
 entry:
-   %tmp.6 = and i32 %x, -65536 ;  [#uses=1]
-   %tmp.3 = lshr i32 %y, 16;  [#uses=1]
-   %tmp.7 = or i32 %tmp.6, %tmp.3  ;  [#uses=1]
-   ret i32 %tmp.7
+  %tmp.6 = and i32 %x, -65536
+  %tmp.3 = lshr i32 %y, 16
+  %tmp.7 = or i32 %tmp.6, %tmp.3
+  ret i32 %tmp.7
 }
 
 define i32 @test5(i32 %x, i32 %y) {
+; CHECK-LABEL: test5:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwimi 4, 3, 1, 0, 15
+; CHECK-NEXT:mr 3, 4
+; CHECK-NEXT:blr
 entry:
-   %tmp.3 = shl i32 %x, 1  ;  [#uses=1]
-   %tmp.4 = and i32 %tmp.3, -65536 ;  [#uses=1]
-   %tmp.7 = and i32 %y, 65535  ;  [#uses=1]
-   %tmp.9 = or i32 %tmp.4, %tmp.7  ;  [#uses=1]
-   ret i32 %tmp.9
+  %tmp.3 = shl i32 %x, 1
+  %tmp.4 = and i32 %tmp.3, -65536
+  %tmp.7 = and i32 %y, 65535
+  %tmp.9 = or i32 %tmp.4, %tmp.7
+  ret i32 %tmp.9
 }
 
 define i32 @test6(i32 %x, i32 %y) {
+; CHECK-LABEL: test6:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwimi 3, 4, 1, 0, 15
+; CHECK-NEXT:blr
 entry:
-   %tmp.7 = and i32 %x, 65535  ;  [#uses=1]
-   %tmp.3 = shl i32 %y, 1  ;  [#uses=1]
-   %tmp.4 = and i32 %tmp.3, -65536 ;  [#uses=1]
-   %tmp.9 = or i32 %tmp.4, %tmp.7  ;  [#uses=1]
-   ret i32 %tmp.9
+  %tmp.7 = and i32 %x, 65535
+  %tmp.3 = shl i32 %y, 1
+  %tmp.4 = and i32 %tmp.3, -65536
+  %tmp.9 = or i32 %tmp.4, %tmp.7
+  ret i32 %tmp.9
 }
 
 define i32 @test7(i32 %x, i32 %y) {
+; CHECK-LABEL: test7:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:andis. 3, 3, 65535
+; CHECK-NEXT:rldimi 3, 4, 0, 48
+; CHECK-NEXT:blr
 entry:
-   %tmp.2 = and i32 %x, -65536 ;  [#uses=1]
-   %tmp.5 = and i32 %y, 65535  ;  [#uses=1]
-   %tmp.7 = or i32 %tmp.5, %tmp.2  ;  [#uses=1]
-   ret i32 %tmp.7
+  %tmp.2 = and i32 %x, -65536
+  %tmp.5 = and i32 %y, 65535
+  %tmp.7 = or i32 %tmp.5, %tmp.2
+  ret i32 %tmp.7
 }
 
 define i32 @test8(i32 %bar) {
+; CHECK-LABEL: test8:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwimi 3, 3, 1, 30, 30
+; CHECK-NEXT:blr
 entry:
-   %tmp.3 = shl i32 %bar, 1;  [#uses=1]
-   %tmp.4 = and i32 %tmp.3, 2  ;  [#uses=1]
-   %tmp.6 = and i32 %bar, -3   ;  [#uses=1]
-   %tmp.7 = or i32 %tmp.4, %tmp.6  ;  [#uses=1]
-   ret i32 %tmp.7
+  %tmp.3 = shl i32 %bar, 1
+  %tmp.4 = and i32 %tmp.3, 2
+  %tmp.6 = and i32 %bar, -3
+  %tmp.7 = or i32 %tmp.4, %tmp.6
+  ret i32 %tmp.7
 }
+
+define i32 @test9(i32 %a, i32 %b) {
+; CHECK-LABEL: test9:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwimi 3, 4, 8, 20, 26
+; CHECK-NEXT:blr
+entry:
+  %r = call i32 @llvm.ppc.rlwimi(i32 %a, i32 %b, i32 8, i32 4064)
+  ret i32 %r
+}
+
+declare i32 @llvm.ppc.rlwimi(i32, i32, i32 immarg, i32 i

[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Chen Zheng via cfe-commits


@@ -1,61 +1,111 @@
-; All of these ands and shifts should be folded into rlwimi's
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -o %t
-; RUN: not grep and %t
-; RUN: not grep srawi %t
-; RUN: not grep srwi %t
-; RUN: not grep slwi %t
-; RUN: grep rlwinm %t | count 8
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | 
FileCheck %s
 
 define i32 @test1(i32 %a) {
+; CHECK-LABEL: test1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 0, 4, 19
+; CHECK-NEXT:blr
 entry:
-   %tmp.1 = and i32 %a, 268431360  ;  [#uses=1]
-   ret i32 %tmp.1
+  %tmp.1 = and i32 %a, 268431360
+  ret i32 %tmp.1
 }
 
 define i32 @test2(i32 %a) {
+; CHECK-LABEL: test2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rldicl 3, 3, 36, 24
+; CHECK-NEXT:rldicl 3, 3, 28, 32

chenzheng1030 wrote:

After the triple change, now two `rldicl` are emitted instead of a single 
`rlwinm`. Would you please help to add a FIXME here? or create an issue in 
github? Thanks.

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Chen Zheng via cfe-commits


@@ -1,61 +1,111 @@
-; All of these ands and shifts should be folded into rlwimi's
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -o %t
-; RUN: not grep and %t
-; RUN: not grep srawi %t
-; RUN: not grep srwi %t
-; RUN: not grep slwi %t
-; RUN: grep rlwinm %t | count 8
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | 
FileCheck %s
 
 define i32 @test1(i32 %a) {
+; CHECK-LABEL: test1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 0, 4, 19
+; CHECK-NEXT:blr
 entry:
-   %tmp.1 = and i32 %a, 268431360  ;  [#uses=1]
-   ret i32 %tmp.1
+  %tmp.1 = and i32 %a, 268431360
+  ret i32 %tmp.1
 }
 
 define i32 @test2(i32 %a) {
+; CHECK-LABEL: test2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rldicl 3, 3, 36, 24
+; CHECK-NEXT:rldicl 3, 3, 28, 32
+; CHECK-NEXT:blr
 entry:
-   %tmp.1 = and i32 %a, -268435441 ;  [#uses=1]
-   ret i32 %tmp.1
+  %tmp.1 = and i32 %a, -268435441
+  ret i32 %tmp.1
 }
 
 define i32 @test3(i32 %a) {
+; CHECK-LABEL: test3:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 24, 24, 31
+; CHECK-NEXT:blr
 entry:
-   %tmp.2 = ashr i32 %a, 8 ;  [#uses=1]
-   %tmp.3 = and i32 %tmp.2, 255;  [#uses=1]
-   ret i32 %tmp.3
+  %tmp.2 = ashr i32 %a, 8
+  %tmp.3 = and i32 %tmp.2, 255
+  ret i32 %tmp.3
 }
 
 define i32 @test4(i32 %a) {
+; CHECK-LABEL: test4:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 24, 24, 31
+; CHECK-NEXT:blr
 entry:
-   %tmp.3 = lshr i32 %a, 8 ;  [#uses=1]
-   %tmp.4 = and i32 %tmp.3, 255;  [#uses=1]
-   ret i32 %tmp.4
+  %tmp.3 = lshr i32 %a, 8
+  %tmp.4 = and i32 %tmp.3, 255
+  ret i32 %tmp.4
 }
 
 define i32 @test5(i32 %a) {
+; CHECK-LABEL: test5:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 8, 0, 8
+; CHECK-NEXT:blr
 entry:
-   %tmp.2 = shl i32 %a, 8  ;  [#uses=1]
-   %tmp.3 = and i32 %tmp.2, -8388608   ;  [#uses=1]
-   ret i32 %tmp.3
+  %tmp.2 = shl i32 %a, 8
+  %tmp.3 = and i32 %tmp.2, -8388608
+  ret i32 %tmp.3
 }
 
 define i32 @test6(i32 %a) {
+; CHECK-LABEL: test6:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 24, 24, 31
+; CHECK-NEXT:blr
 entry:
-   %tmp.1 = and i32 %a, 65280  ;  [#uses=1]
-   %tmp.2 = ashr i32 %tmp.1, 8 ;  [#uses=1]
-   ret i32 %tmp.2
+  %tmp.1 = and i32 %a, 65280
+  %tmp.2 = ashr i32 %tmp.1, 8
+  ret i32 %tmp.2
 }
 
 define i32 @test7(i32 %a) {
+; CHECK-LABEL: test7:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 24, 24, 31
+; CHECK-NEXT:blr
 entry:
-   %tmp.1 = and i32 %a, 65280  ;  [#uses=1]
-   %tmp.2 = lshr i32 %tmp.1, 8 ;  [#uses=1]
-   ret i32 %tmp.2
+  %tmp.1 = and i32 %a, 65280
+  %tmp.2 = lshr i32 %tmp.1, 8
+  ret i32 %tmp.2
 }
 
 define i32 @test8(i32 %a) {
+; CHECK-LABEL: test8:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 8, 0, 7
+; CHECK-NEXT:blr
 entry:
-   %tmp.1 = and i32 %a, 16711680   ;  [#uses=1]
-   %tmp.2 = shl i32 %tmp.1, 8  ;  [#uses=1]
-   ret i32 %tmp.2
+  %tmp.1 = and i32 %a, 16711680
+  %tmp.2 = shl i32 %tmp.1, 8
+  ret i32 %tmp.2
 }
+
+define i32 @test9(i32 %a, i32 %s) {
+; CHECK-LABEL: test9:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwnm 3, 3, 4, 23, 31
+; CHECK-NEXT:blr
+entry:
+  %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 %s, i32 511)
+  ret i32 %r
+}
+
+define i32 @test10(i32 %a) {
+; CHECK-LABEL: test10:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:rlwinm 3, 3, 31, 23, 31
+; CHECK-NEXT:blr
+entry:
+  %r = call i32 @llvm.ppc.rlwnm(i32 %a, i32 31, i32 511)
+  ret i32 %r
+}
+
+declare i32 @llvm.ppc.rlwnm(i32, i32, i32 immarg)

chenzheng1030 wrote:

nit: ditto


https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Chen Zheng via cfe-commits

https://github.com/chenzheng1030 edited 
https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Chen Zheng via cfe-commits

https://github.com/chenzheng1030 approved this pull request.

LGTM except two comments in the case change. One is a nit and the other one 
should be other issue unrelated to this patch.

Thanks for implementing this.

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-03-03 Thread Chen Zheng via cfe-commits


@@ -58,3 +58,18 @@ entry:
   %8 = or i64 %6, %7
   ret i64 %8
 }
+
+define i64 @rldimi_intrinsic(i64 %a) {
+; CHECK-LABEL: rldimi_intrinsic:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:rldimi 3, 3, 8, 0
+; CHECK-NEXT:rldimi 3, 3, 16, 0
+; CHECK-NEXT:rldimi 3, 3, 32, 0
+; CHECK-NEXT:blr
+  %r1 = call i64 @llvm.ppc.rldimi(i64 %a, i64 %a, i32 8, i64 -256)
+  %r2 = call i64 @llvm.ppc.rldimi(i64 %r1, i64 %r1, i32 16, i64 -65536)
+  %r3 = call i64 @llvm.ppc.rldimi(i64 %r2, i64 %r2, i32 32, i64 -4294967296)
+  ret i64 %r3
+}
+
+declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg)

chenzheng1030 wrote:

nit: missing % before immarg? Why not just remove them?


https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread Qiu Chaofan via cfe-commits

https://github.com/ecnelises updated 
https://github.com/llvm/llvm-project/pull/82968

>From a06fa5e18313ad50019d50006e34a6b8249d95cd Mon Sep 17 00:00:00 2001
From: Qiu Chaofan 
Date: Mon, 26 Feb 2024 16:32:28 +0800
Subject: [PATCH 1/3] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm

These builtins are already there in Clang, however current codegen may
produce suboptimal results due to their complex behavior. Implement them
as intrinsics to ensure expected instructions are emitted.
---
 clang/lib/CodeGen/CGBuiltin.cpp   |  29 ++---
 .../PowerPC/builtins-ppc-xlcompat-rotate.c|  24 ++--
 llvm/include/llvm/IR/IntrinsicsPowerPC.td |  12 ++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  52 
 llvm/test/CodeGen/PowerPC/rldimi.ll   |  15 +++
 llvm/test/CodeGen/PowerPC/rlwimi.ll   | 123 --
 llvm/test/CodeGen/PowerPC/rlwinm.ll   | 108 ++-
 7 files changed, 259 insertions(+), 104 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 734eb5a035ca49..5d55be6e9e99df 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17080,37 +17080,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned 
BuiltinID,
 }
 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
   }
-  // Rotate and insert under mask operation.
-  // __rldimi(rs, is, shift, mask)
-  // (rotl64(rs, shift) & mask) | (is & ~mask)
-  // __rlwimi(rs, is, shift, mask)
-  // (rotl(rs, shift) & mask) | (is & ~mask)
   case PPC::BI__builtin_ppc_rldimi:
   case PPC::BI__builtin_ppc_rlwimi: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
 Value *Op3 = EmitScalarExpr(E->getArg(3));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
-  Op2 = Builder.CreateZExt(Op2, Int64Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
-Value *X = Builder.CreateAnd(Shift, Op3);
-Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
-return Builder.CreateOr(X, Y);
-  }
-  // Rotate and insert under mask operation.
-  // __rlwnm(rs, shift, mask)
-  // rotl(rs, shift) & mask
+return Builder.CreateCall(
+CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
+ ? Intrinsic::ppc_rldimi
+ : Intrinsic::ppc_rlwimi),
+{Op0, Op1, Op2, Op3});
+  }
   case PPC::BI__builtin_ppc_rlwnm: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
-return Builder.CreateAnd(Shift, Op2);
+return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
+  {Op0, Op1, Op2});
   }
   case PPC::BI__builtin_ppc_poppar4:
   case PPC::BI__builtin_ppc_poppar8: {
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c 
b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
index d96bfb4621421e..b218547c00d931 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
@@ -16,11 +16,8 @@ void test_builtin_ppc_rldimi() {
   // CHECK:   %res = alloca i64, align 8
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.fshl.i64(i64 [[RA]], i64 
[[RA]], i64 63)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i64 [[RC]], 72057593769492480
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i64 [[RB]], -72057593769492481
-  // CHECK-NEXT:  [[RF:%[0-9]+]] = or i64 [[RD]], [[RE]]
-  // CHECK-NEXT:  store i64 [[RF]], ptr %res, align 8
+  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 
[[RB]], i32 63, i64 72057593769492480)
+  // CHECK-NEXT:  store i64 [[RC]], ptr %res, align 8
   // CHECK-NEXT:  ret void
 
   /*shift = 63, mask = 0x00FFF000 = 72057593769492480, ~mask = 
0xFF000FFF = -72057593769492481*/
@@ -32,11 +29,8 @@ void test_builtin_ppc_rlwimi() {
   // CHECK:   %res = alloca i32, align 4
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i32, ptr @ui, align 4
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i32, ptr @ui, align 4
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 
[[RA]], i32 31)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i32 [[RC]], 16776960
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i32 [[RB]], -16776961
-  // CHECK-NEXT:  [[RF:%[0-9]+]] = or i32 [[RD]], [[RE]]
-  // CHECK-NEXT:  store i32 [[RF]], ptr %res, align 4
+  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i32 @llvm.ppc.rlwimi(i32 [[RA]], i32 
[[R

[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff c67a4ae47c86f1f390db7ba0ea9c021abff130f8 
d9c9b4eb91ca3cec0bc469364914706b89ab1eeb -- clang/lib/CodeGen/CGBuiltin.cpp 
clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 178904d1e3..7a9b1520ec 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10741,8 +10741,7 @@ SDValue 
PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::ppc_rldimi: {
 uint64_t SH = Op.getConstantOperandVal(3);
 unsigned MB = 0, ME = 0;
-if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME) ||
-ME != 63 - SH)
+if (!isRunOfOnes64(Op.getConstantOperandVal(4), MB, ME) || ME != 63 - SH)
   report_fatal_error("invalid rldimi mask!");
 return SDValue(DAG.getMachineNode(
PPC::RLDIMI, dl, MVT::i64,

``




https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread Qiu Chaofan via cfe-commits

https://github.com/ecnelises updated 
https://github.com/llvm/llvm-project/pull/82968

>From a06fa5e18313ad50019d50006e34a6b8249d95cd Mon Sep 17 00:00:00 2001
From: Qiu Chaofan 
Date: Mon, 26 Feb 2024 16:32:28 +0800
Subject: [PATCH 1/2] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm

These builtins are already there in Clang, however current codegen may
produce suboptimal results due to their complex behavior. Implement them
as intrinsics to ensure expected instructions are emitted.
---
 clang/lib/CodeGen/CGBuiltin.cpp   |  29 ++---
 .../PowerPC/builtins-ppc-xlcompat-rotate.c|  24 ++--
 llvm/include/llvm/IR/IntrinsicsPowerPC.td |  12 ++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  52 
 llvm/test/CodeGen/PowerPC/rldimi.ll   |  15 +++
 llvm/test/CodeGen/PowerPC/rlwimi.ll   | 123 --
 llvm/test/CodeGen/PowerPC/rlwinm.ll   | 108 ++-
 7 files changed, 259 insertions(+), 104 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 734eb5a035ca49..5d55be6e9e99df 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17080,37 +17080,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned 
BuiltinID,
 }
 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
   }
-  // Rotate and insert under mask operation.
-  // __rldimi(rs, is, shift, mask)
-  // (rotl64(rs, shift) & mask) | (is & ~mask)
-  // __rlwimi(rs, is, shift, mask)
-  // (rotl(rs, shift) & mask) | (is & ~mask)
   case PPC::BI__builtin_ppc_rldimi:
   case PPC::BI__builtin_ppc_rlwimi: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
 Value *Op3 = EmitScalarExpr(E->getArg(3));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
-  Op2 = Builder.CreateZExt(Op2, Int64Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
-Value *X = Builder.CreateAnd(Shift, Op3);
-Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
-return Builder.CreateOr(X, Y);
-  }
-  // Rotate and insert under mask operation.
-  // __rlwnm(rs, shift, mask)
-  // rotl(rs, shift) & mask
+return Builder.CreateCall(
+CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
+ ? Intrinsic::ppc_rldimi
+ : Intrinsic::ppc_rlwimi),
+{Op0, Op1, Op2, Op3});
+  }
   case PPC::BI__builtin_ppc_rlwnm: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
-return Builder.CreateAnd(Shift, Op2);
+return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
+  {Op0, Op1, Op2});
   }
   case PPC::BI__builtin_ppc_poppar4:
   case PPC::BI__builtin_ppc_poppar8: {
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c 
b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
index d96bfb4621421e..b218547c00d931 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
@@ -16,11 +16,8 @@ void test_builtin_ppc_rldimi() {
   // CHECK:   %res = alloca i64, align 8
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.fshl.i64(i64 [[RA]], i64 
[[RA]], i64 63)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i64 [[RC]], 72057593769492480
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i64 [[RB]], -72057593769492481
-  // CHECK-NEXT:  [[RF:%[0-9]+]] = or i64 [[RD]], [[RE]]
-  // CHECK-NEXT:  store i64 [[RF]], ptr %res, align 8
+  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 
[[RB]], i32 63, i64 72057593769492480)
+  // CHECK-NEXT:  store i64 [[RC]], ptr %res, align 8
   // CHECK-NEXT:  ret void
 
   /*shift = 63, mask = 0x00FFF000 = 72057593769492480, ~mask = 
0xFF000FFF = -72057593769492481*/
@@ -32,11 +29,8 @@ void test_builtin_ppc_rlwimi() {
   // CHECK:   %res = alloca i32, align 4
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i32, ptr @ui, align 4
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i32, ptr @ui, align 4
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 
[[RA]], i32 31)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i32 [[RC]], 16776960
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i32 [[RB]], -16776961
-  // CHECK-NEXT:  [[RF:%[0-9]+]] = or i32 [[RD]], [[RE]]
-  // CHECK-NEXT:  store i32 [[RF]], ptr %res, align 4
+  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i32 @llvm.ppc.rlwimi(i32 [[RA]], i32 
[[R

[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread Chen Zheng via cfe-commits

chenzheng1030 wrote:

> If you run into issues using normal integer ops, please file bugs. Most 
> people aren't going to hand-tune their code like this; builtins like this are 
> at best an ugly workaround.

Yes, a user should not try to write source code(using compiler builtins) to 
just emit one "powerful" instruction. Instead, it is compiler's responsibility 
to generate these instructions according to user's source codes.

I think the reason why these builtins were added is LLVM PowerPC target wants 
to keep its compatibility with XLC/C++ commercial compiler related to these 
builtins.

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread Chen Zheng via cfe-commits


@@ -641,6 +641,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine 
&TM,
 
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  // setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);

chenzheng1030 wrote:

Is this needed?

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread Chen Zheng via cfe-commits


@@ -10722,6 +10723,20 @@ static bool getVectorCompareInfo(SDValue Intrin, int 
&CompareOpc,
   return true;
 }
 
+bool isContiguousMask(const APInt &Val, unsigned &MB, unsigned &ME,

chenzheng1030 wrote:

Is it possible to reuse `isRunOfOnes()`/`isRunOfOnes64()` in 
`llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h`?

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread Eli Friedman via cfe-commits

https://github.com/efriedma-quic commented:

If you run into issues using normal integer ops, please file bugs.  Most people 
aren't going to hand-tune their code like this; builtins like this are at best 
an ugly workaround.

That said, I guess I'm not strongly against adding a backdoor to force a 
particular instruction here.

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread Eli Friedman via cfe-commits


@@ -10737,6 +10752,43 @@ SDValue 
PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   return DAG.getRegister(PPC::X13, MVT::i64);
 return DAG.getRegister(PPC::R2, MVT::i32);
 
+  case Intrinsic::ppc_rldimi: {
+uint64_t SH = Op.getConstantOperandVal(3);
+unsigned MB = 0, ME = 0;
+if (!isContiguousMask(Op.getConstantOperandAPInt(4), MB, ME, 64) ||
+ME != 63 - SH)
+  llvm_unreachable("invalid rldimi mask!");

efriedma-quic wrote:

Please use report_fatal_error for this sort of check.

https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread Eli Friedman via cfe-commits

https://github.com/efriedma-quic edited 
https://github.com/llvm/llvm-project/pull/82968
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread via cfe-commits

llvmbot wrote:



@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-powerpc

Author: Qiu Chaofan (ecnelises)


Changes

These builtins are already there in Clang, however current codegen may produce 
suboptimal results due to their complex behavior. Implement them as intrinsics 
to ensure expected instructions are emitted.

---
Full diff: https://github.com/llvm/llvm-project/pull/82968.diff


7 Files Affected:

- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+8-21) 
- (modified) clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c (+8-16) 
- (modified) llvm/include/llvm/IR/IntrinsicsPowerPC.td (+12) 
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+52) 
- (modified) llvm/test/CodeGen/PowerPC/rldimi.ll (+15) 
- (modified) llvm/test/CodeGen/PowerPC/rlwimi.ll (+85-38) 
- (modified) llvm/test/CodeGen/PowerPC/rlwinm.ll (+79-29) 


``diff
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 734eb5a035ca49..5d55be6e9e99df 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17080,37 +17080,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned 
BuiltinID,
 }
 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
   }
-  // Rotate and insert under mask operation.
-  // __rldimi(rs, is, shift, mask)
-  // (rotl64(rs, shift) & mask) | (is & ~mask)
-  // __rlwimi(rs, is, shift, mask)
-  // (rotl(rs, shift) & mask) | (is & ~mask)
   case PPC::BI__builtin_ppc_rldimi:
   case PPC::BI__builtin_ppc_rlwimi: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
 Value *Op3 = EmitScalarExpr(E->getArg(3));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
-  Op2 = Builder.CreateZExt(Op2, Int64Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
-Value *X = Builder.CreateAnd(Shift, Op3);
-Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
-return Builder.CreateOr(X, Y);
-  }
-  // Rotate and insert under mask operation.
-  // __rlwnm(rs, shift, mask)
-  // rotl(rs, shift) & mask
+return Builder.CreateCall(
+CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
+ ? Intrinsic::ppc_rldimi
+ : Intrinsic::ppc_rlwimi),
+{Op0, Op1, Op2, Op3});
+  }
   case PPC::BI__builtin_ppc_rlwnm: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
-return Builder.CreateAnd(Shift, Op2);
+return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
+  {Op0, Op1, Op2});
   }
   case PPC::BI__builtin_ppc_poppar4:
   case PPC::BI__builtin_ppc_poppar8: {
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c 
b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
index d96bfb4621421e..b218547c00d931 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
@@ -16,11 +16,8 @@ void test_builtin_ppc_rldimi() {
   // CHECK:   %res = alloca i64, align 8
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.fshl.i64(i64 [[RA]], i64 
[[RA]], i64 63)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i64 [[RC]], 72057593769492480
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i64 [[RB]], -72057593769492481
-  // CHECK-NEXT:  [[RF:%[0-9]+]] = or i64 [[RD]], [[RE]]
-  // CHECK-NEXT:  store i64 [[RF]], ptr %res, align 8
+  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 
[[RB]], i32 63, i64 72057593769492480)
+  // CHECK-NEXT:  store i64 [[RC]], ptr %res, align 8
   // CHECK-NEXT:  ret void
 
   /*shift = 63, mask = 0x00FFF000 = 72057593769492480, ~mask = 
0xFF000FFF = -72057593769492481*/
@@ -32,11 +29,8 @@ void test_builtin_ppc_rlwimi() {
   // CHECK:   %res = alloca i32, align 4
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i32, ptr @ui, align 4
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i32, ptr @ui, align 4
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 
[[RA]], i32 31)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i32 [[RC]], 16776960
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i32 [[RB]], -16776961
-  // CHECK-NEXT:  [[RF:%[0-9]+]] = or i32 [[RD]], [[RE]]
-  // CHECK-NEXT:  store i32 [[RF]], ptr %res, align 4
+  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i32 @llvm.ppc.rlwimi(i32 [[RA]], i32 
[[RB]], i32 31, i32 16776960)
+  // CHECK-NEXT:  store i32 [[RC]], ptr %res

[clang] [llvm] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm (PR #82968)

2024-02-26 Thread Qiu Chaofan via cfe-commits

https://github.com/ecnelises created 
https://github.com/llvm/llvm-project/pull/82968

These builtins are already there in Clang, however current codegen may produce 
suboptimal results due to their complex behavior. Implement them as intrinsics 
to ensure expected instructions are emitted.

>From a06fa5e18313ad50019d50006e34a6b8249d95cd Mon Sep 17 00:00:00 2001
From: Qiu Chaofan 
Date: Mon, 26 Feb 2024 16:32:28 +0800
Subject: [PATCH] [PowerPC] Add intrinsics for rldimi/rlwimi/rlwnm

These builtins are already there in Clang, however current codegen may
produce suboptimal results due to their complex behavior. Implement them
as intrinsics to ensure expected instructions are emitted.
---
 clang/lib/CodeGen/CGBuiltin.cpp   |  29 ++---
 .../PowerPC/builtins-ppc-xlcompat-rotate.c|  24 ++--
 llvm/include/llvm/IR/IntrinsicsPowerPC.td |  12 ++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  52 
 llvm/test/CodeGen/PowerPC/rldimi.ll   |  15 +++
 llvm/test/CodeGen/PowerPC/rlwimi.ll   | 123 --
 llvm/test/CodeGen/PowerPC/rlwinm.ll   | 108 ++-
 7 files changed, 259 insertions(+), 104 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 734eb5a035ca49..5d55be6e9e99df 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17080,37 +17080,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned 
BuiltinID,
 }
 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
   }
-  // Rotate and insert under mask operation.
-  // __rldimi(rs, is, shift, mask)
-  // (rotl64(rs, shift) & mask) | (is & ~mask)
-  // __rlwimi(rs, is, shift, mask)
-  // (rotl(rs, shift) & mask) | (is & ~mask)
   case PPC::BI__builtin_ppc_rldimi:
   case PPC::BI__builtin_ppc_rlwimi: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
 Value *Op3 = EmitScalarExpr(E->getArg(3));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
-  Op2 = Builder.CreateZExt(Op2, Int64Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
-Value *X = Builder.CreateAnd(Shift, Op3);
-Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
-return Builder.CreateOr(X, Y);
-  }
-  // Rotate and insert under mask operation.
-  // __rlwnm(rs, shift, mask)
-  // rotl(rs, shift) & mask
+return Builder.CreateCall(
+CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
+ ? Intrinsic::ppc_rldimi
+ : Intrinsic::ppc_rlwimi),
+{Op0, Op1, Op2, Op3});
+  }
   case PPC::BI__builtin_ppc_rlwnm: {
 Value *Op0 = EmitScalarExpr(E->getArg(0));
 Value *Op1 = EmitScalarExpr(E->getArg(1));
 Value *Op2 = EmitScalarExpr(E->getArg(2));
-llvm::Type *Ty = Op0->getType();
-Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
-Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
-return Builder.CreateAnd(Shift, Op2);
+return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
+  {Op0, Op1, Op2});
   }
   case PPC::BI__builtin_ppc_poppar4:
   case PPC::BI__builtin_ppc_poppar8: {
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c 
b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
index d96bfb4621421e..b218547c00d931 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
@@ -16,11 +16,8 @@ void test_builtin_ppc_rldimi() {
   // CHECK:   %res = alloca i64, align 8
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i64, ptr @ull, align 8
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i64, ptr @ull, align 8
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.fshl.i64(i64 [[RA]], i64 
[[RA]], i64 63)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i64 [[RC]], 72057593769492480
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i64 [[RB]], -72057593769492481
-  // CHECK-NEXT:  [[RF:%[0-9]+]] = or i64 [[RD]], [[RE]]
-  // CHECK-NEXT:  store i64 [[RF]], ptr %res, align 8
+  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i64 @llvm.ppc.rldimi(i64 [[RA]], i64 
[[RB]], i32 63, i64 72057593769492480)
+  // CHECK-NEXT:  store i64 [[RC]], ptr %res, align 8
   // CHECK-NEXT:  ret void
 
   /*shift = 63, mask = 0x00FFF000 = 72057593769492480, ~mask = 
0xFF000FFF = -72057593769492481*/
@@ -32,11 +29,8 @@ void test_builtin_ppc_rlwimi() {
   // CHECK:   %res = alloca i32, align 4
   // CHECK-NEXT:  [[RA:%[0-9]+]] = load i32, ptr @ui, align 4
   // CHECK-NEXT:  [[RB:%[0-9]+]] = load i32, ptr @ui, align 4
-  // CHECK-NEXT:  [[RC:%[0-9]+]] = call i32 @llvm.fshl.i32(i32 [[RA]], i32 
[[RA]], i32 31)
-  // CHECK-NEXT:  [[RD:%[0-9]+]] = and i32 [[RC]], 16776960
-  // CHECK-NEXT:  [[RE:%[0-9]+]] = and i32 [[RB]], -16