Conanap created this revision.
Conanap added reviewers: power-llvm-team, PowerPC, nemanjai, saghir, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.

Implemented following functions for Load VSX Vector Sign extend nad zero extend 
instructions:

  vector signed __int128 vec_xl_sext (signed long long, signed char *);
  vector signed __int128 vec_xl_sext (signed long long, signed short *);
  vector signed __int128 vec_xl_sext (signed long long, signed int *);
  vector signed __int128 vec_xl_sext (signed long long, signed long long *);
  
  vector unsigned __int128 vec_xl_zext (signed long long, unsigned char *);
  vector unsigned __int128 vec_xl_zext (signed long long, unsigned short *);
  vector unsigned __int128 vec_xl_zext (signed long long, unsigned int *);
  vector unsigned __int128 vec_xl_zext (signed long long, unsigned long long *);


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===================================================================
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -33,3 +33,15 @@
 # CHECK-BE: vclrrb 1, 4, 3                        # encoding: [0x10,0x24,0x19,0xcd]
 # CHECK-LE: vclrrb 1, 4, 3                        # encoding: [0xcd,0x19,0x24,0x10]
             vclrrb 1, 4, 3
+# CHECK-BE: lxvrbx 32, 1, 2                       # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2                       # encoding: [0x1b,0x10,0x01,0x7c]
+            lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2                       # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2                       # encoding: [0x5b,0x10,0x21,0x7c]
+            lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2                       # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2                       # encoding: [0xdb,0x10,0x41,0x7c]
+            lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2                       # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2                       # encoding: [0x9b,0x10,0x61,0x7c]
+            lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===================================================================
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -30,3 +30,15 @@
 
 # CHECK: vclrrb 1, 4, 3
 0x10 0x24 0x19 0xcd
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; ZEXT TEST CASES
+
+; i8
+; CHECK: lxvrbx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvrbx v2, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; i16
+; CHECK: lxvrhx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r3, 1
+; CHECK-NEXT:    lxvrhx v2, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; i32
+; CHECK: lxvrwx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r3, 2
+; CHECK-NEXT:    lxvrwx v2, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; i64
+; CHECK: lxvrdx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    lxvrdx v2, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
+  %0 = load i64, i64* %add.ptr, align 8
+  %conv = zext i64 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -209,6 +209,21 @@
 def PrefixInstrs : Predicate<"PPCSubTarget->hasPrefixInstrs()">;
 def IsISA3_1 : Predicate<"PPCSubTarget->isISA3_1()">;
 
+let mayLoad = 1, mayStore = 0, Predicates = [IsISA3_1] in {
+  // The XFormMemOp flag is set on the instruction format.
+  def LXVRBX : X_XT6_RA5_RB5<31, 13, "lxvrbx", vsrc, []>;
+  def LXVRHX : X_XT6_RA5_RB5<31, 45, "lxvrhx", vsrc, []>;
+  def LXVRWX : X_XT6_RA5_RB5<31, 77, "lxvrwx", vsrc, []>;
+  def LXVRDX : X_XT6_RA5_RB5<31, 109, "lxvrdx", vsrc, []>;
+}
+
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+  SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+                    [SDNPHasChain, SDNPMayLoad]>;
+
 let Predicates = [PrefixInstrs] in {
   let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
     defm PADDI8 :
@@ -512,7 +527,7 @@
               (PPCmatpcreladdr pcreladdr:$dst), 8),
             (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>;
 
-  // If the PPCmatpcreladdr node is not caught by any other pattern it should be
+    // If the PPCmatpcreladdr node is not caught by any other pattern it should be
   // caught here and turned into a paddi instruction to materialize the address.
   def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
 }
@@ -564,4 +579,16 @@
             (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>;
   def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)),
             (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>;
+
+  // Special isntructions for PPC10 load vsx vector with zero extend
+  // Utilize the appropriate Load VSX Vector Rightmost instruction depending
+  // on the width of PPClxvrzx.
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
+             (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
+             (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
+             (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
+             (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
 }
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -519,7 +519,13 @@
     /// GPRC = TOC_ENTRY GA, TOC
     /// Loads the entry for GA from the TOC, where the TOC base is given by
     /// the last operand.
-    TOC_ENTRY
+    TOC_ENTRY,
+
+    /// LXVRZX - Load VSX Vector Rightmost and Zero Extend
+    /// This node represents v1i128 BUILD_VECTOR of a zero extending load
+    /// instruction from <byte, halfword, word, or doubleword> to i128.
+    /// Allows utilization of the Load VSX Vector Rightmost Instructions.
+    LXVRZX
   };
 
   } // end namespace PPCISD
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1573,6 +1573,7 @@
   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
+  case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
   }
   return nullptr;
 }
@@ -13760,6 +13761,51 @@
   return SDValue();
 }
 
+static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
+  SDLoc dl(N);
+
+  // Look for the pattern of a load from a narrow width to i128, feeding
+  // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
+  // (LXVRZX). This node represents a zero extending load that will be matched
+  // to the Load VSX Vector Rightmost instructions.
+
+  // This combine is only eligible for a BUILD_VECTOR of v1i128.
+  // Other return types are not valid for the LXVRZX replacement.
+  if (N->getValueType(0) != MVT::v1i128) {
+    return SDValue();
+  }
+
+  SDValue Operand = N->getOperand(0);
+  // Proceed with the transformation if the operand to the BUILD_VECTOR
+  // is a load instruction.
+  if (Operand.getOpcode() != ISD::LOAD) {
+    return SDValue();
+  }
+
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
+  EVT MemoryType = LD->getMemoryVT();
+
+  // This transformation is only valid if the we are loading either a byte,
+  // halfword, word, or doubleword.
+  bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
+                     MemoryType == MVT::i32 || MemoryType == MVT::i64;
+
+  // Ensure that the load from the narrow width is being zero extended to i128.
+  if ((LD->getValueType(0) != MVT::i128) ||
+      (LD->getExtensionType() != ISD::ZEXTLOAD) || (!ValidLDType))
+    return SDValue();
+
+  // The width of the narrow type becomes an operand of the LXVRZX node
+  // we are creating in order to pattern match to the appropriate instruction
+  // in the backend.
+  SDValue Width = DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), dl);
+  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr(), Width};
+
+  return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, dl,
+                                 DAG.getVTList(MVT::v1i128, MVT::Other),
+                                 LoadOps, MemoryType, LD->getMemOperand());
+}
+
 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@@ -13797,6 +13843,14 @@
       return Reduced;
   }
 
+  // On Power10, the Load VSX Vector Rightmost instructions can be utilized
+  // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
+  // is a load from <valid narrow width> to i128.
+  if (Subtarget.isISA3_1()) {
+    SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
+    if (BVOfZLoad)
+      return BVOfZLoad;
+  }
 
   if (N->getValueType(0) != MVT::v2f64)
     return SDValue();
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -9,8 +9,16 @@
 vector unsigned char vuca;
 vector unsigned short vusa;
 vector unsigned int vuia;
+signed char *ca;
+unsigned char *uca;
+
 vector unsigned long long vulla, vullb;
-unsigned int uia;
+unsigned int uia, *uiap;
+signed int *ia;
+signed short *sia;
+unsigned short *usia;
+signed long long *llia, llib;
+unsigned long long *ullia;
 
 vector unsigned long long test_vpdepd(void) {
   // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
@@ -79,3 +87,59 @@
   // CHECK-LE-NEXT: ret <16 x i8>
   return vec_clrr(vuca, uia);
 }
+
+vector signed __int128 test_vec_xl_sext_i8(void) {
+  // CHECK: load i8
+  // CHECK: sext i8
+  // CHECK: ret <1 x i128>
+  return vec_xl_sext(llib, ca);
+}
+
+vector signed __int128 test_vec_xl_sext_i16(void) {
+  // CHECK: load i16
+  // CHECK: sext i16
+  // CHECK: ret <1 x i128>
+  return vec_xl_sext(llib, sia);
+}
+
+vector signed __int128 test_vec_xl_sext_i32(void) {
+  // CHECK: load i32
+  // CHECK: sext i32
+  // CHECK: ret <1 x i128>
+  return vec_xl_sext(llib, ia);
+}
+
+vector signed __int128 test_vec_xl_sext_i64(void) {
+  // CHECK: load i64
+  // CHECK: sext i64
+  // CHECK: ret <1 x i128>
+  return vec_xl_sext(llib, llia);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i8(void) {
+  // CHECK: load i8
+  // CHECK: zext i8
+  // CHECK: ret <1 x i128>
+  return vec_xl_zext(llib, uca);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i16(void) {
+  // CHECK: load i16
+  // CHECK: zext i16
+  // CHECK: ret <1 x i128>
+  return vec_xl_zext(llib, usia);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i32(void) {
+  // CHECK: load i32
+  // CHECK: zext i32
+  // CHECK: ret <1 x i128>
+  return vec_xl_zext(llib, uiap);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i64(void) {
+  // CHECK: load i64
+  // CHECK: zext i64
+  // CHECK: ret <1 x i128>
+  return vec_xl_zext(llib, ullia);
+}
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -16509,6 +16509,52 @@
   #define vec_xl_be vec_xl
 #endif
 
+#if defined(__POWER10_VECTOR) && defined(__VSX)
+
+/* vect_xl_sext */
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed char *__pointer) {
+  return (vector signed __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed short *__pointer) {
+  return (vector signed __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed int *__pointer) {
+  return (vector signed __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed long long *__pointer) {
+  return (vector signed __int128)*(__pointer + __offset);
+}
+
+/* vec_xl_zext */
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned char *__pointer) {
+  return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned short *__pointer) {
+  return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned int *__pointer) {
+  return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned long long *__pointer) {
+  return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+#endif
+
 /* vec_xst */
 
 static inline __ATTRS_o_ai void vec_xst(vector signed char __vec,
@@ -16830,6 +16876,7 @@
   return __builtin_altivec_vclrrb((vector signed char)__a, __n);
 #endif
 }
+
 #endif /* __POWER10_VECTOR__ */
 
 #undef __ATTRS_o_ai
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to