nemanjai updated this revision to Diff 361658.
nemanjai added a comment.

Cleaned up some of the control flow.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106757/new/

https://reviews.llvm.org/D106757

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/altivec.h
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c

Index: clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c
+++ clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c
@@ -44,3 +44,13 @@
 // CHECK-NOPWR8: error: this builtin is only valid on POWER8 or later CPUs
   return __builtin_ppc_stbcx(c_addr, c);
 }
+
+vector unsigned char test_ldrmb(char *ptr) {
+  // CHECK-NOPWR8: error: this builtin is only valid on POWER8 or later CPUs
+  return __builtin_vsx_ldrmb(ptr, 14);
+}
+
+void test_strmbb(char *ptr, vector unsigned char data) {
+  // CHECK-NOPWR8: error: this builtin is only valid on POWER8 or later CPUs
+  __builtin_vsx_strmb(ptr, 14, data);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3440,6 +3440,11 @@
   case PPC::BI__builtin_ppc_lbarx:
     return SemaFeatureCheck(*this, TheCall, "isa-v207-instructions",
                             diag::err_ppc_builtin_only_on_arch, "8");
+  case PPC::BI__builtin_vsx_ldrmb:
+  case PPC::BI__builtin_vsx_strmb:
+    return SemaFeatureCheck(*this, TheCall, "isa-v207-instructions",
+                            diag::err_ppc_builtin_only_on_arch, "8") ||
+           SemaBuiltinConstantArgRange(TheCall, 1, 1, 16);
 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
   case PPC::BI__builtin_##Name: \
     return SemaBuiltinPPCMMACall(TheCall, Types);
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -3143,6 +3143,15 @@
 #endif
 #endif
 
+#if defined(__POWER9_VECTOR__) && defined(__powerpc64__)
+#define __vec_ldrmb(PTR, CNT) vec_xl_len_r((const unsigned char *)(PTR), (CNT))
+#define __vec_strmb(PTR, CNT, VAL)                                             \
+  vec_xst_len_r((VAL), (unsigned char *)(PTR), (CNT))
+#else
+#define __vec_ldrmb __builtin_vsx_ldrmb
+#define __vec_strmb __builtin_vsx_strmb
+#endif
+
 /* vec_cpsgn */
 
 #ifdef __VSX__
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -15069,6 +15069,143 @@
     llvm::Function *F = CGM.getIntrinsic(ID);
     return Builder.CreateCall(F, Ops, "");
   }
+  case PPC::BI__builtin_vsx_ldrmb: {
+    // Essentially boils down to performing an unaligned VMX load sequence so
+    // as to avoid crossing a page boundary and then shuffling the elements
+    // into the right side of the vector register.
+    int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue();
+    llvm::Type *ResTy = ConvertType(E->getType());
+    bool IsLE = getTarget().isLittleEndian();
+
+    // If the user wants the entire vector, just load the entire vector.
+    if (NumBytes == 16) {
+      Value *BC = Builder.CreateBitCast(Ops[0], ResTy->getPointerTo());
+      Value *LD = Builder.CreateLoad(Address(BC, CharUnits::fromQuantity(1)));
+      if (!IsLE)
+        return LD;
+
+      // Reverse the bytes on LE.
+      SmallVector<int, 16> RevMask;
+      for (int Idx = 0; Idx < 16; Idx++)
+        RevMask.push_back(15 - Idx);
+      return Builder.CreateShuffleVector(LD, LD, RevMask);
+    }
+
+    llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
+    llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
+                                                : Intrinsic::ppc_altivec_lvsl);
+    llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
+    Value *HiMem = Builder.CreateGEP(
+        Int8Ty, Ops[0], ConstantInt::get(Ops[1]->getType(), NumBytes - 1));
+    Value *LoLd = Builder.CreateCall(Lvx, Ops[0], "ld.lo");
+    Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
+    Value *Mask1 = Builder.CreateCall(Lvs, Ops[0], "mask1");
+
+    Ops.clear();
+    Ops.push_back(IsLE ? HiLd : LoLd);
+    Ops.push_back(IsLE ? LoLd : HiLd);
+    Ops.push_back(Mask1);
+    Value *AllElts = Builder.CreateCall(Vperm, Ops, "shuffle1");
+    Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
+
+    if (IsLE) {
+      SmallVector<int, 16> Consts;
+      for (int Idx = 0; Idx < 16; Idx++) {
+        int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
+                                            : 16 - (NumBytes - Idx);
+        Consts.push_back(Val);
+      }
+      return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
+                                         Zero, Consts);
+    }
+    SmallVector<Constant *, 16> Consts;
+    for (int Idx = 0; Idx < 16; Idx++)
+      Consts.push_back(Builder.getInt8(NumBytes + Idx));
+    Value *Mask2 = ConstantVector::get(Consts);
+    return Builder.CreateBitCast(
+        Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
+  }
+  case PPC::BI__builtin_vsx_strmb: {
+    int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue();
+    bool IsLE = getTarget().isLittleEndian();
+    auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
+      // Storing the whole vector, simply store it on BE and reverse bytes and
+      // store on LE.
+      if (Width == 16) {
+        Value *BC =
+            Builder.CreateBitCast(Ops[0], Ops[2]->getType()->getPointerTo());
+        Value *StVec = Ops[2];
+        if (IsLE) {
+          SmallVector<int, 16> RevMask;
+          for (int Idx = 0; Idx < 16; Idx++)
+            RevMask.push_back(15 - Idx);
+          StVec = Builder.CreateShuffleVector(Ops[2], Ops[2], RevMask);
+        }
+        return Builder.CreateStore(StVec,
+                                   Address(BC, CharUnits::fromQuantity(1)));
+      }
+      auto *ConvTy = Int64Ty;
+      unsigned NumElts = 0;
+      switch (Width) {
+      default:
+        llvm_unreachable("width for stores must be a power of 2");
+      case 8:
+        ConvTy = Int64Ty;
+        NumElts = 2;
+        break;
+      case 4:
+        ConvTy = Int32Ty;
+        NumElts = 4;
+        break;
+      case 2:
+        ConvTy = Int16Ty;
+        NumElts = 8;
+        break;
+      case 1:
+        ConvTy = Int8Ty;
+        NumElts = 16;
+        break;
+      }
+      Value *Vec = Builder.CreateBitCast(
+          Ops[2], llvm::FixedVectorType::get(ConvTy, NumElts));
+      Value *Ptr = Builder.CreateGEP(Int8Ty, Ops[0],
+                                     ConstantInt::get(Int64Ty, Offset));
+      Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo());
+      Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
+      if (IsLE && Width > 1) {
+        Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
+        Elt = Builder.CreateCall(F, Elt);
+      }
+      return Builder.CreateStore(Elt,
+                                 Address(PtrBC, CharUnits::fromQuantity(1)));
+    };
+    unsigned Stored = 0;
+    unsigned RemainingBytes = NumBytes;
+    Value *Result;
+    if (NumBytes == 16)
+      return StoreSubVec(16, 0, 0);
+    if (NumBytes >= 8) {
+      Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
+      RemainingBytes -= 8;
+      Stored += 8;
+    }
+    if (RemainingBytes >= 4) {
+      Result = StoreSubVec(4, NumBytes - Stored - 4,
+                           IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
+      RemainingBytes -= 4;
+      Stored += 4;
+    }
+    if (RemainingBytes >= 2) {
+      Result = StoreSubVec(2, NumBytes - Stored - 2,
+                           IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
+      RemainingBytes -= 2;
+      Stored += 2;
+    }
+    if (RemainingBytes)
+      Result =
+          StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
+    return Result;
+  }
   // Square root
   case PPC::BI__builtin_vsx_xvsqrtsp:
   case PPC::BI__builtin_vsx_xvsqrtdp: {
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -573,6 +573,8 @@
 BUILTIN(__builtin_vsx_lxvll, "V4ivC*ULLi", "")
 BUILTIN(__builtin_vsx_stxvl, "vV4iv*ULLi", "")
 BUILTIN(__builtin_vsx_stxvll, "vV4iv*ULLi", "")
+BUILTIN(__builtin_vsx_ldrmb, "V16UcCc*Ii", "")
+BUILTIN(__builtin_vsx_strmb, "vCc*IiV16Uc", "")
 
 BUILTIN(__builtin_vsx_xvmaxdp, "V2dV2dV2d", "")
 BUILTIN(__builtin_vsx_xvmaxsp, "V4fV4fV4f", "")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to