[PATCH] D105194: [PowerPC] Add PowerPC cmpb builtin and emit target indepedent code for XL compatibility

Victor Huang via Phabricator via cfe-commits Mon, 19 Jul 2021 10:52:51 -0700

NeHuang updated this revision to Diff 359843.
NeHuang added a comment.

Address review comments from Nemanja.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105194/new/

https://reviews.llvm.org/D105194

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-compare.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstr64Bit.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cmpb-32.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cmpb-64.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cmpb-64.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cmpb-64.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr7 < %s | FileCheck %s
+
+define i64 @test_cmpb(i64 %a, i64 %b) {
+; CHECK-LABEL: test_cmpb:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpb 3, 3, 4
+; CHECK-NEXT:    blr
+entry:
+  %cmpb = tail call i64 @llvm.ppc.cmpb.i64.i64.i64(i64 %a, i64 %b)
+  ret i64 %cmpb
+}
+
+declare i64 @llvm.ppc.cmpb.i64.i64.i64(i64, i64)
Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cmpb-32.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cmpb-32.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s
+
+define i64 @test_cmpb(i64 %a, i64 %b) {
+; CHECK-LABEL: test_cmpb:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpb 4, 4, 6
+; CHECK-NEXT:    cmpb 3, 3, 5
+; CHECK-NEXT:    blr
+entry:
+  %0 = trunc i64 %a to i32
+  %1 = trunc i64 %b to i32
+  %2 = lshr i64 %a, 32
+  %3 = trunc i64 %2 to i32
+  %4 = lshr i64 %b, 32
+  %5 = trunc i64 %4 to i32
+  %cmpb = tail call i32 @llvm.ppc.cmpb.i32.i32.i32(i32 %0, i32 %1)
+  %6 = zext i32 %cmpb to i64
+  %cmpb1 = tail call i32 @llvm.ppc.cmpb.i32.i32.i32(i32 %3, i32 %5)
+  %7 = zext i32 %cmpb1 to i64
+  %8 = shl nuw i64 %7, 32
+  %9 = or i64 %8, %6
+  ret i64 %9
+}
+
+declare i32 @llvm.ppc.cmpb.i32.i32.i32(i32, i32)
Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5291,6 +5291,8 @@
           (i32 (MULHW $a, $b))>;
 def : Pat<(i32 (int_ppc_mulhwu gprc:$a, gprc:$b)),
           (i32 (MULHWU $a, $b))>;
+def : Pat<(i32 (int_ppc_cmpb gprc:$a, gprc:$b)),
+          (i32 (CMPB $a, $b))>;
 
 def : Pat<(int_ppc_load2r ForceXForm:$ptr),
           (LHBRX ForceXForm:$ptr)>;
Index: llvm/lib/Target/PowerPC/PPCInstr64Bit.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1796,6 +1796,9 @@
           (STDBRX g8rc:$a, ForceXForm:$ptr)>;
 }
 
+def : Pat<(i64 (int_ppc_cmpb g8rc:$a, g8rc:$b)),
+          (i64 (CMPB8 $a, $b))>;
+
 let Predicates = [IsISA3_0] in {
 // DARN (deliver random number)
 // L=0 for 32-bit, L=1 for conditioned random, L=2 for raw random
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1576,6 +1576,9 @@
   def int_ppc_setb
       : GCCBuiltin<"__builtin_ppc_setb">,
         Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_ppc_cmpb
+      : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, llvm_anyint_ty],
+                  [IntrNoMem]>;
   // multiply
   def int_ppc_mulhd
       : GCCBuiltin<"__builtin_ppc_mulhd">,
Index: clang/test/CodeGen/builtins-ppc-xlcompat-compare.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-xlcompat-compare.c
@@ -0,0 +1,44 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown \
+// RUN:   -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s --check-prefix=CHECK-64B
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown \
+// RUN:   -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s --check-prefix=CHECK-64B
+// RUN: %clang_cc1 -triple powerpc-unknown-aix \
+// RUN:   -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s --check-prefix=CHECK-32B
+// RUN: %clang_cc1 -triple powerpc64-unknown-aix \
+// RUN:   -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s --check-prefix=CHECK-64B
+
+// CHECK-64B-LABEL: @test_builtin_ppc_cmpb(
+// CHECK-64B:         [[LLA_ADDR:%.*]] = alloca i64, align 8
+// CHECK-64B-NEXT:    [[LLB_ADDR:%.*]] = alloca i64, align 8
+// CHECK-64B-NEXT:    store i64 [[LLA:%.*]], i64* [[LLA_ADDR]], align 8
+// CHECK-64B-NEXT:    store i64 [[LLB:%.*]], i64* [[LLB_ADDR]], align 8
+// CHECK-64B-NEXT:    [[TMP0:%.*]] = load i64, i64* [[LLA_ADDR]], align 8
+// CHECK-64B-NEXT:    [[TMP1:%.*]] = load i64, i64* [[LLB_ADDR]], align 8
+// CHECK-64B-NEXT:    [[CMPB:%.*]] = call i64 @llvm.ppc.cmpb.i64.i64.i64(i64 [[TMP0]], i64 [[TMP1]])
+// CHECK-64B-NEXT:    ret i64 [[CMPB]]
+//
+// CHECK-32B-LABEL: @test_builtin_ppc_cmpb(
+// CHECK-32B:         [[LLA_ADDR:%.*]] = alloca i64, align 8
+// CHECK-32B-NEXT:    [[LLB_ADDR:%.*]] = alloca i64, align 8
+// CHECK-32B-NEXT:    store i64 [[LLA:%.*]], i64* [[LLA_ADDR]], align 8
+// CHECK-32B-NEXT:    store i64 [[LLB:%.*]], i64* [[LLB_ADDR]], align 8
+// CHECK-32B-NEXT:    [[TMP0:%.*]] = load i64, i64* [[LLA_ADDR]], align 8
+// CHECK-32B-NEXT:    [[TMP1:%.*]] = load i64, i64* [[LLB_ADDR]], align 8
+// CHECK-32B-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
+// CHECK-32B-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP1]] to i32
+// CHECK-32B-NEXT:    [[TMP4:%.*]] = lshr i64 [[TMP0]], 32
+// CHECK-32B-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+// CHECK-32B-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP1]], 32
+// CHECK-32B-NEXT:    [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
+// CHECK-32B-NEXT:    [[CMPB:%.*]] = call i32 @llvm.ppc.cmpb.i32.i32.i32(i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-32B-NEXT:    [[TMP8:%.*]] = zext i32 [[CMPB]] to i64
+// CHECK-32B-NEXT:    [[CMPB1:%.*]] = call i32 @llvm.ppc.cmpb.i32.i32.i32(i32 [[TMP5]], i32 [[TMP7]])
+// CHECK-32B-NEXT:    [[TMP9:%.*]] = zext i32 [[CMPB1]] to i64
+// CHECK-32B-NEXT:    [[TMP10:%.*]] = shl i64 [[TMP9]], 32
+// CHECK-32B-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
+// CHECK-32B-NEXT:    ret i64 [[TMP11]]
+//
+long long test_builtin_ppc_cmpb(long long lla, long long llb) {
+  return __builtin_ppc_cmpb(lla, llb);
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -15212,7 +15212,42 @@
                                      "cast");
     return Result;
   }
-
+  case PPC::BI__builtin_ppc_cmpb: {
+    if (getTarget().getTriple().isPPC64()) {
+      Function *F =
+          CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
+      return Builder.CreateCall(F, Ops, "cmpb");
+    }
+    // For 32 bit, emit the code as below:
+    // %conv = trunc i64 %a to i32
+    // %conv1 = trunc i64 %b to i32
+    // %shr = lshr i64 %a, 32
+    // %conv2 = trunc i64 %shr to i32
+    // %shr3 = lshr i64 %b, 32
+    // %conv4 = trunc i64 %shr3 to i32
+    // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
+    // %conv5 = zext i32 %0 to i64
+    // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
+    // %conv614 = zext i32 %1 to i64
+    // %shl = shl nuw i64 %conv614, 32
+    // %or = or i64 %shl, %conv5
+    // ret i64 %or
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
+    Value *ArgOneLo = Builder.CreateTrunc(Ops[0], Int32Ty);
+    Value *ArgTwoLo = Builder.CreateTrunc(Ops[1], Int32Ty);
+    Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
+    Value *ArgOneHi =
+        Builder.CreateTrunc(Builder.CreateLShr(Ops[0], ShiftAmt), Int32Ty);
+    Value *ArgTwoHi =
+        Builder.CreateTrunc(Builder.CreateLShr(Ops[1], ShiftAmt), Int32Ty);
+    Value *ResLo =
+        Builder.CreateZExt(Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
+    Value *ResHiShift =
+        Builder.CreateZExt(Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
+    Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
+    return Builder.CreateOr(ResLo, ResHi);
+  }
   // Copy sign
   case PPC::BI__builtin_vsx_xvcpsgnsp:
   case PPC::BI__builtin_vsx_xvcpsgndp: {
Index: clang/lib/Basic/Targets/PPC.cpp
===================================================================
--- clang/lib/Basic/Targets/PPC.cpp
+++ clang/lib/Basic/Targets/PPC.cpp
@@ -135,6 +135,7 @@
   Builder.defineMacro("__cmpeqb", "__builtin_ppc_cmpeqb");
   Builder.defineMacro("__cmprb", "__builtin_ppc_cmprb");
   Builder.defineMacro("__setb", "__builtin_ppc_setb");
+  Builder.defineMacro("__cmpb", "__builtin_ppc_cmpb");
   Builder.defineMacro("__mulhd", "__builtin_ppc_mulhd");
   Builder.defineMacro("__mulhdu", "__builtin_ppc_mulhdu");
   Builder.defineMacro("__mulhw", "__builtin_ppc_mulhw");
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -77,6 +77,7 @@
 BUILTIN(__builtin_ppc_cmpeqb, "LLiLLiLLi", "")
 BUILTIN(__builtin_ppc_cmprb, "iCIiii", "")
 BUILTIN(__builtin_ppc_setb, "LLiLLiLLi", "")
+BUILTIN(__builtin_ppc_cmpb, "LLiLLiLLi", "")
 // Multiply
 BUILTIN(__builtin_ppc_mulhd, "LLiLiLi", "")
 BUILTIN(__builtin_ppc_mulhdu, "ULLiULiULi", "")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D105194: [PowerPC] Add PowerPC cmpb builtin and emit target indepedent code for XL compatibility

Reply via email to