[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-14 Thread Ulrich Weigand via cfe-commits

uweigand wrote:

Fixed merge conflicts, updated as described above, and fixed support for i128 
parameters in the z/OS XPLINK ABI.

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-14 Thread Ulrich Weigand via cfe-commits

uweigand wrote:

> I have looked through the changes and made some comments inline.

Thanks for the review!

> Commenting:

Fixed, thanks!

> I happened to notice some cases with room for improvement:

Good catch.  I've not addressed these right now, this can be done as a 
follow-up.  (The memory case is a bit tedious due to TableGen pattern 
limitations ...)

> As a side question: I forgot why we can get CCMask '5' here: it seems it 
> should be CCMASK_CMP_NE ('6'), if we reverse the LOC operation..?

No, 5 is correct here.  Reversing XORs the mask with the set of valid bits, so 
we have 13 ^ 8 == 5.

Looking at the VTM instruction, we have the following valid condition codes 
(making up the 13, i.e. 0, 1, or 3):
0 - Selected bits all zeros; or all mask bits zero
1 - Selected bits a mix of zeros and ones
2 - n/a
3 - Selected bits all ones

The original mask is 8, i.e. condition code 0 ("selected bits all zeros").  
Reversing this needs to check for condition codes 1 or 3, i.e. mask 5 
("selected bits a mix of zeros and ones" or "selected bits all ones").


https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-14 Thread Ulrich Weigand via cfe-commits


@@ -1516,48 +1536,206 @@ let Predicates = [FeatureVector] in {
   }
 }
 
+//===--===//
+// Support for 128-bit integer values in vector registers
+//===--===//
+
+// Loads and stores.
+let Predicates = [FeatureVector] in {
+  def : Pat<(i128 (load bdxaddr12only:$addr)),
+(VL bdxaddr12only:$addr)>;
+  def : Pat<(store (i128 VR128:$src), bdxaddr12only:$addr),
+(VST VR128:$src, bdxaddr12only:$addr)>;
+}
+
+// Full i128 move from GPR pair.
+let Predicates = [FeatureVector] in
+  def : Pat<(i128 (or (zext GR64:$x), (shl (anyext GR64:$y), (i32 64,
+(VLVGP GR64:$y, GR64:$x)>;
+
+// Any-extensions from GPR to i128.
+let Predicates = [FeatureVector] in {
+  def : Pat<(i128 (anyext GR32:$x)), (VLVGP32 GR32:$x, GR32:$x)>;
+  def : Pat<(i128 (anyext GR64:$x)), (VLVGP GR64:$x, GR64:$x)>;
+}
+
+// Any-extending loads into i128.
+let Predicates = [FeatureVector] in {
+  def : Pat<(i128 (extloadi8 bdxaddr12only:$addr)),
+(VLREPB bdxaddr12only:$addr)>;
+  def : Pat<(i128 (extloadi16 bdxaddr12only:$addr)),
+(VLREPH bdxaddr12only:$addr)>;
+  def : Pat<(i128 (extloadi32 bdxaddr12only:$addr)),
+(VLREPF bdxaddr12only:$addr)>;
+  def : Pat<(i128 (extloadi64 bdxaddr12only:$addr)),
+(VLREPG bdxaddr12only:$addr)>;
+}
+
+// Truncations from i128 to GPR.
+let Predicates = [FeatureVector] in {
+  def : Pat<(i32 (trunc (i128 VR128:$vec))),
+(EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 3), subreg_l32)>;
+  def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 32,
+(EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 2), subreg_l32)>;
+  def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 64,
+(EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 1), subreg_l32)>;
+  def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 96,
+(EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 0), subreg_l32)>;
+  def : Pat<(i64 (trunc (i128 VR128:$vec))),
+(VLGVG VR128:$vec, zero_reg, 1)>;
+  def : Pat<(i64 (trunc (srl (i128 VR128:$vec), (i32 64,
+(VLGVG VR128:$vec, zero_reg, 0)>;
+}
+
+// Truncating stores from i128.
+let Predicates = [FeatureVector] in {
+  def : Pat<(truncstorei8 (i128 VR128:$x), bdxaddr12only:$addr),
+(VSTEB VR128:$x, bdxaddr12only:$addr, 15)>;
+  def : Pat<(truncstorei16 (i128 VR128:$x), bdxaddr12only:$addr),
+(VSTEH VR128:$x, bdxaddr12only:$addr, 7)>;
+  def : Pat<(truncstorei32 (i128 VR128:$x), bdxaddr12only:$addr),
+(VSTEF VR128:$x, bdxaddr12only:$addr, 3)>;
+  def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 32)), 
bdxaddr12only:$addr),
+(VSTEF VR128:$x, bdxaddr12only:$addr, 2)>;
+  def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 64)), 
bdxaddr12only:$addr),
+(VSTEF VR128:$x, bdxaddr12only:$addr, 1)>;
+  def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 96)), 
bdxaddr12only:$addr),
+(VSTEF VR128:$x, bdxaddr12only:$addr, 0)>;
+  def : Pat<(truncstorei64 (i128 VR128:$x), bdxaddr12only:$addr),
+(VSTEG VR128:$x, bdxaddr12only:$addr, 1)>;
+  def : Pat<(truncstorei64 (srl (i128 VR128:$x), (i32 64)), 
bdxaddr12only:$addr),
+(VSTEG VR128:$x, bdxaddr12only:$addr, 0)>;
+}
+
+// Zero-extensions from GPR to i128.
+let Predicates = [FeatureVector] in {
+  def : Pat<(i128 (zext8 (anyext GR32:$x))),
+(VLVGB (VGBM 0), GR32:$x, zero_reg, 15)>;
+  def : Pat<(i128 (zext16 (anyext GR32:$x))),
+(VLVGH (VGBM 0), GR32:$x, zero_reg, 7)>;
+  def : Pat<(i128 (zext GR32:$x)),
+(VLVGF (VGBM 0), GR32:$x, zero_reg, 3)>;
+  def : Pat<(i128 (zext GR64:$x)),
+(VLVGG (VGBM 0), GR64:$x, zero_reg, 1)>;
+}
+
+// Zero-extending loads into i128.
+let Predicates = [FeatureVector] in {
+  def : Pat<(i128 (zextloadi8 bdxaddr12only:$addr)),
+(VLEB (VGBM 0), bdxaddr12only:$addr, 15)>;
+  def : Pat<(i128 (zextloadi16 bdxaddr12only:$addr)),
+(VLEH (VGBM 0), bdxaddr12only:$addr, 7)>;
+  def : Pat<(i128 (zextloadi32 bdxaddr12only:$addr)),
+(VLEF (VGBM 0), bdxaddr12only:$addr, 3)>;
+  def : Pat<(i128 (zextloadi64 bdxaddr12only:$addr)),
+(VLEG (VGBM 0), bdxaddr12only:$addr, 1)>;
+}
+
+// In-register i128 sign-extensions.
+let Predicates = [FeatureVector] in {
+  def : Pat<(i128 (sext_inreg VR128:$x, i8)),
+(VSRAB (VREPB VR128:$x, 15), (VREPIB 120))>;
+  def : Pat<(i128 (sext_inreg VR128:$x, i16)),
+(VSRAB (VREPH VR128:$x, 7), (VREPIB 112))>;
+  def : Pat<(i128 (sext_inreg VR128:$x, i32)),
+(VSRAB (VREPF VR128:$x, 3), (VREPIB 96))>;
+  def : Pat<(i128 (sext_inreg VR128:$x, i64)),
+(VSRAB (VREPG VR128:$x, 1), (VREPIB 64))>;
+}
+
+// Sign-extensions from GPR to i128.
+let Predicates = [FeatureVector] in {
+  def : Pat<(i128 (sext_inreg 

[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-14 Thread Ulrich Weigand via cfe-commits


@@ -6481,6 +6737,71 @@ SDValue SystemZTargetLowering::combineLOAD(
 SDNode *N, DAGCombinerInfo ) const {
   SelectionDAG  = DCI.DAG;
   EVT LdVT = N->getValueType(0);
+  SDLoc DL(N);
+
+  // Replace an i128 load that is used solely to move its value into GPRs
+  // by separate loads of both halves.
+  if (LdVT == MVT::i128) {
+LoadSDNode *LD = cast(N);
+if (!LD->isSimple() || !ISD::isNormalLoad(LD))
+  return SDValue();
+
+// Scan through all users.
+SmallVector, 2> Users;
+int UsedElements = 0;
+for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+  // Skip the uses of the chain.
+  if (UI.getUse().getResNo() != 0)
+continue;
+
+  // Verify every user is a TRUNCATE to i64 of the low or high half ...
+  SDNode *User = *UI;
+  int Index = 1;
+  if (User->getOpcode() == ISD::SRL &&
+  User->getOperand(1).getOpcode() == ISD::Constant &&
+  cast(User->getOperand(1))->getZExtValue() == 64 &&
+  User->hasOneUse()) {
+User = *User->use_begin();
+Index = 0;
+  }
+  if (User->getOpcode() != ISD::TRUNCATE ||
+  User->getValueType(0) != MVT::i64)
+return SDValue();
+
+  // ... and no half is extracted twice.
+  if (UsedElements & (1 << Index))
+return SDValue();
+
+  UsedElements |= 1 << Index;
+  Users.push_back(std::make_pair(User, Index));
+}
+
+// Rewrite each extraction as an independent load.
+SmallVector ArgChains;
+for (auto UserAndIndex : Users) {
+  SDNode *User = UserAndIndex.first;
+  unsigned Offset = User->getValueType(0).getStoreSize() * 
UserAndIndex.second;
+  SDValue Ptr =

uweigand wrote:

Well, `getStoreSize` should be 8 here, but seems clearer to refer to it 
symbolically.

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-14 Thread Ulrich Weigand via cfe-commits


@@ -2918,16 +3049,17 @@ static Comparison getCmp(SelectionDAG , SDValue 
CmpOp0, SDValue CmpOp1,
  bool IsSignaling = false) {
   if (CmpOp1.getOpcode() == ISD::Constant) {
 assert(!Chain);
-uint64_t Constant = cast(CmpOp1)->getZExtValue();
 unsigned Opcode, CCValid;

uweigand wrote:

`getZExtValue` will crash when the constant is 128-bit, which can now happen.  
Therefore I'm now only evaluating it in code paths where we know the constant 
cannot be 128-bit.

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-14 Thread Ulrich Weigand via cfe-commits


@@ -2772,6 +2837,27 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, 
unsigned CCMask,
 // Update the arguments with the TM version if so.
 static void adjustForTestUnderMask(SelectionDAG , const SDLoc ,
Comparison ) {
+  // Use VECTOR TEST UNDER MASK for i128 operations.
+  if (C.Op0.getValueType() == MVT::i128) {

uweigand wrote:

Well, we need `VCEQS` to set CC which takes an extra cycle, so it should both 
be three cycles, but then VTM is shorter.

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-14 Thread Ulrich Weigand via cfe-commits


@@ -1466,7 +1509,15 @@ static SDValue convertValVTToLocVT(SelectionDAG , 
const SDLoc ,
 static SDValue lowerI128ToGR128(SelectionDAG , SDValue In) {
   SDLoc DL(In);
   SDValue Lo, Hi;
-  std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
+  if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
+Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);

uweigand wrote:

Hmmm.  According to the specs, `EXTRACT_ELEMENT` (and therefore `SplitScalar`) 
is only supposed to be used on non-legal types, for values that will be broken 
up into multiple registers.   However, the actual implementation doesn't appear 
to verify this, and expands `EXTRACT_ELEMENT` even for a legal `i128`, to the 
same code ...

I would still prefer to not rely on that undocumented behavior, and use the 
explicit expansion.  Also, this keeps the implementation symmetrical to 
`lowerGR128ToI128`, and may be slightly more efficient as it omits generating 
and then immediately eliminating the `EXTRACT_ELEMENT` nodes.

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-14 Thread Ulrich Weigand via cfe-commits


@@ -1183,6 +1187,35 @@ void SystemZDAGToDAGISel::loadVectorConstant(
   SelectCode(Op.getNode());
 }
 
+SDNode *SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val, EVT VT, SDLoc 
DL) {
+  SDNode *ResNode;
+  assert (VT.getSizeInBits() == 128);
+
+  SDValue CP = CurDAG->getTargetConstantPool(
+  ConstantInt::get(Type::getInt128Ty(*CurDAG->getContext()), Val),
+  TLI->getPointerTy(CurDAG->getDataLayout()));
+
+  EVT PtrVT = CP.getValueType();
+  SDValue Ops[] = {
+SDValue(CurDAG->getMachineNode(SystemZ::LARL, DL, PtrVT, CP), 0),
+CurDAG->getTargetConstant(0, DL, PtrVT),
+CurDAG->getRegister(0, PtrVT),
+CurDAG->getEntryNode()
+  };
+  ResNode = CurDAG->getMachineNode(SystemZ::VL, DL, VT, MVT::Other, Ops);
+
+  // Annotate ResNode with memory operand information so that MachineInstr
+  // queries work properly. This e.g. gives the register allocation the
+  // required information for rematerialization.
+  MachineFunction& MF = CurDAG->getMachineFunction();
+  MachineMemOperand *MemOp =
+  MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+  MachineMemOperand::MOLoad, 8, Align(8));
+

uweigand wrote:

Fixed, thanks!

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-13 Thread Jonas Paulsson via cfe-commits

JonPsson1 wrote:

I have looked through the changes and made some comments inline.

I built this with expensive checks enabled with all checks passing, and SPEC 
built successfully.

Commenting:

```
  @@ -293,7 +293,7 @@ SystemZTargetLowering::SystemZTargetLowering(const 
TargetMachine ,
   setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
   setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
 
-  // Even though i128 is not a legal type, we still need to custom lower
+  // Even though i128 is not a legal type, we still need to custom lower   
**// Update comment**

@@ -2144,7 +2145,7 @@ CanLowerReturn(CallingConv::ID CallConv,
 VerifyVectorTypes(Outs);
 
   // Special case that we cannot easily detect in RetCC_SystemZ since
-  // i128 is not a legal type.
+  // i128 is not a legal type.   **// Update comment**


+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -124,7 +124,7 @@ defm GRX32 : SystemZRegClass<"GRX32", [i32], 32,
R12L,R12H,R13L,R13H,R14L,R14H,R15L,R15H)
  ]>;
 
-// The architecture doesn't really have any i128 support, so model the
+// The architecture doesn't really have any i128 support, so model the  **// 
Update comment**

```

I happened to notice some cases with room for improvement:
```
; Scalar load + insertion + replication could be just a vlrepb.
define i128 @fun0(i128 %a, i128 %sh) {
; CHECK-LABEL: fun0:
; CHECK:   # %bb.0:
; CHECK-NEXT:l %r0, 12(%r4)   //
; CHECK-NEXT:vlvgp %v1, %r0, %r0  //
; CHECK-NEXT:vl %v0, 0(%r3), 3
; CHECK-NEXT:vrepb %v1, %v1, 15   // ===> vlrepb %v1, 12(%r4)  ?
; CHECK-NEXT:vslb %v0, %v0, %v1
; CHECK-NEXT:vsl %v0, %v0, %v1
; CHECK-NEXT:vst %v0, 0(%r2), 3
; CHECK-NEXT:br %r14
  %res = shl i128 %a, %sh
  ret i128 %res
}

; %v1 is the shift amount in a VR already.
define i128 @fun1(i128 %a, i128 %sh, i128 %t) {
; CHECK-LABEL: fun1:
; CHECK:   # %bb.0:
; CHECK-NEXT:vl %v1, 0(%r5), 3
; CHECK-NEXT:vl %v2, 0(%r4), 3
; CHECK-NEXT:vaq %v1, %v2, %v1
; CHECK-NEXT:vlgvf %r0, %v1, 3 //
; CHECK-NEXT:vlvgp %v1, %r0, %r0   //
; CHECK-NEXT:vl %v0, 0(%r3), 3
; CHECK-NEXT:vrepb %v1, %v1, 15// ===> vrepb %v1, %v1, 15
; CHECK-NEXT:vslb %v0, %v0, %v1
; CHECK-NEXT:vsl %v0, %v0, %v1
; CHECK-NEXT:vst %v0, 0(%r2), 3
; CHECK-NEXT:br %r14
  %s = add i128 %sh, %t
  %res = shl i128 %a, %s
  ret i128 %res
}
```
As a side question:   I forgot why we can get CCMask '5' here: it seems it 
should be CCMASK_CMP_NE ('6'), if we reverse the LOC operation..?
```
 VTM killed %5:vr128bit, killed %4:vr128bit, implicit-def $cc
  %6:gr64bit = LOCGR killed %3:gr64bit(tied-def 0), killed %2:gr64bit, 13, 8, 
implicit killed $cc

# *** IR Dump After Two-Address instruction pass (twoaddressinstruction) ***:
(SystemZInstrInfo::commuteInstructionImpl)

  VTM killed %5:vr128bit, killed %4:vr128bit, implicit-def $cc
  %6:gr64bit = COPY killed %2:gr64bit
  %6:gr64bit = LOCGR %6:gr64bit(tied-def 0), killed %3:gr64bit, 13, 5, implicit 
killed $cc
```


https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-13 Thread Jonas Paulsson via cfe-commits


@@ -6481,6 +6737,71 @@ SDValue SystemZTargetLowering::combineLOAD(
 SDNode *N, DAGCombinerInfo ) const {
   SelectionDAG  = DCI.DAG;
   EVT LdVT = N->getValueType(0);
+  SDLoc DL(N);
+
+  // Replace an i128 load that is used solely to move its value into GPRs
+  // by separate loads of both halves.
+  if (LdVT == MVT::i128) {
+LoadSDNode *LD = cast(N);
+if (!LD->isSimple() || !ISD::isNormalLoad(LD))
+  return SDValue();
+
+// Scan through all users.
+SmallVector, 2> Users;
+int UsedElements = 0;
+for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+  // Skip the uses of the chain.
+  if (UI.getUse().getResNo() != 0)
+continue;
+
+  // Verify every user is a TRUNCATE to i64 of the low or high half ...
+  SDNode *User = *UI;
+  int Index = 1;
+  if (User->getOpcode() == ISD::SRL &&
+  User->getOperand(1).getOpcode() == ISD::Constant &&
+  cast(User->getOperand(1))->getZExtValue() == 64 &&
+  User->hasOneUse()) {
+User = *User->use_begin();
+Index = 0;
+  }
+  if (User->getOpcode() != ISD::TRUNCATE ||
+  User->getValueType(0) != MVT::i64)
+return SDValue();
+
+  // ... and no half is extracted twice.
+  if (UsedElements & (1 << Index))
+return SDValue();
+
+  UsedElements |= 1 << Index;
+  Users.push_back(std::make_pair(User, Index));
+}
+
+// Rewrite each extraction as an independent load.
+SmallVector ArgChains;
+for (auto UserAndIndex : Users) {
+  SDNode *User = UserAndIndex.first;
+  unsigned Offset = User->getValueType(0).getStoreSize() * 
UserAndIndex.second;
+  SDValue Ptr =

JonPsson1 wrote:

Offset = 8 * ...   ?

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-13 Thread Jonas Paulsson via cfe-commits


@@ -2918,16 +3049,17 @@ static Comparison getCmp(SelectionDAG , SDValue 
CmpOp0, SDValue CmpOp1,
  bool IsSignaling = false) {
   if (CmpOp1.getOpcode() == ISD::Constant) {
 assert(!Chain);
-uint64_t Constant = cast(CmpOp1)->getZExtValue();
 unsigned Opcode, CCValid;

JonPsson1 wrote:

Was 'Constant = ...' removed on purpose? (I would think that normally it would 
be factored out like it was, but maybe doesn't matter)

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-13 Thread Jonas Paulsson via cfe-commits


@@ -2772,6 +2837,27 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, 
unsigned CCMask,
 // Update the arguments with the TM version if so.
 static void adjustForTestUnderMask(SelectionDAG , const SDLoc ,
Comparison ) {
+  // Use VECTOR TEST UNDER MASK for i128 operations.
+  if (C.Op0.getValueType() == MVT::i128) {

JonPsson1 wrote:

Beneficial? VTM is 3 cycles, while VN + VCEQ = 2...

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-13 Thread Jonas Paulsson via cfe-commits


@@ -1466,7 +1509,15 @@ static SDValue convertValVTToLocVT(SelectionDAG , 
const SDLoc ,
 static SDValue lowerI128ToGR128(SelectionDAG , SDValue In) {
   SDLoc DL(In);
   SDValue Lo, Hi;
-  std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
+  if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
+Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);

JonPsson1 wrote:

All tests pass even if I remove this entire clause...?

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-13 Thread Jonas Paulsson via cfe-commits


@@ -1183,6 +1187,35 @@ void SystemZDAGToDAGISel::loadVectorConstant(
   SelectCode(Op.getNode());
 }
 
+SDNode *SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val, EVT VT, SDLoc 
DL) {
+  SDNode *ResNode;
+  assert (VT.getSizeInBits() == 128);
+
+  SDValue CP = CurDAG->getTargetConstantPool(
+  ConstantInt::get(Type::getInt128Ty(*CurDAG->getContext()), Val),
+  TLI->getPointerTy(CurDAG->getDataLayout()));
+
+  EVT PtrVT = CP.getValueType();
+  SDValue Ops[] = {
+SDValue(CurDAG->getMachineNode(SystemZ::LARL, DL, PtrVT, CP), 0),
+CurDAG->getTargetConstant(0, DL, PtrVT),
+CurDAG->getRegister(0, PtrVT),
+CurDAG->getEntryNode()
+  };
+  ResNode = CurDAG->getMachineNode(SystemZ::VL, DL, VT, MVT::Other, Ops);
+
+  // Annotate ResNode with memory operand information so that MachineInstr
+  // queries work properly. This e.g. gives the register allocation the
+  // required information for rematerialization.
+  MachineFunction& MF = CurDAG->getMachineFunction();
+  MachineMemOperand *MemOp =
+  MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+  MachineMemOperand::MOLoad, 8, Align(8));
+

JonPsson1 wrote:

I think the third argument is the size in bytes, and it should be 16.

https://github.com/llvm/llvm-project/pull/74625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)

2023-12-06 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 384f916ea899ea6ac9af4a3fb9d0a5b03937acfe 
a3e9b3a923df055c5ebe9ec045ed99c909a16dd2 -- clang/lib/Headers/vecintrin.h 
clang/test/CodeGen/SystemZ/builtins-systemz-error2.c 
clang/test/CodeGen/SystemZ/builtins-systemz-vector.c 
clang/test/CodeGen/SystemZ/builtins-systemz-vector2-error.c 
clang/test/CodeGen/SystemZ/builtins-systemz-vector2.c 
clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c 
clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-error.c 
clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c 
llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp 
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp 
llvm/lib/Target/SystemZ/SystemZISelLowering.h
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h
index 1f51e32c0d..886770b262 100644
--- a/clang/lib/Headers/vecintrin.h
+++ b/clang/lib/Headers/vecintrin.h
@@ -8388,8 +8388,8 @@ vec_addc(__vector unsigned long long __a, __vector 
unsigned long long __b) {
 
 static inline __ATTRS_ai __vector unsigned char
 vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) {
-  return (__vector unsigned char)
- __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b);
+  return (__vector unsigned char)__builtin_s390_vaccq((unsigned __int128)__a,
+  (unsigned __int128)__b);
 }
 
 /*-- vec_adde_u128 --*/
@@ -8397,9 +8397,8 @@ vec_addc_u128(__vector unsigned char __a, __vector 
unsigned char __b) {
 static inline __ATTRS_ai __vector unsigned char
 vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b,
   __vector unsigned char __c) {
-  return (__vector unsigned char)
- __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b,
- (unsigned __int128)__c);
+  return (__vector unsigned char)__builtin_s390_vacq(
+  (unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c);
 }
 
 /*-- vec_addec_u128 -*/
@@ -8407,9 +8406,8 @@ vec_adde_u128(__vector unsigned char __a, __vector 
unsigned char __b,
 static inline __ATTRS_ai __vector unsigned char
 vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
-  return (__vector unsigned char)
- __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b,
-   (unsigned __int128)__c);
+  return (__vector unsigned char)__builtin_s390_vacccq(
+  (unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c);
 }
 
 /*-- vec_avg */
@@ -8512,8 +8510,8 @@ static inline __ATTRS_o_ai __vector unsigned char
 vec_gfmsum_accum_128(__vector unsigned long long __a,
  __vector unsigned long long __b,
  __vector unsigned char __c) {
-  return (__vector unsigned char)
- __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c);
+  return (__vector unsigned char)__builtin_s390_vgfmag(__a, __b,
+   (unsigned __int128)__c);
 }
 
 /*-- vec_mladd --*/
@@ -8805,12 +8803,11 @@ vec_mulo(__vector unsigned int __a, __vector unsigned 
int __b) {
 #if __ARCH__ >= 12
 extern __ATTRS_o __vector unsigned char
 vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b,
-  __vector unsigned char __c, int __d)
-  __constant_range(__d, 0, 15);
+  __vector unsigned char __c, int __d) __constant_range(__d, 0, 
15);
 
-#define vec_msum_u128(X, Y, Z, W) \
-  ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W \
-   __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W)))
+#define vec_msum_u128(X, Y, Z, W)  
\
+  ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W__builtin_s390_vmslg(  
\
+  (X), (Y), (unsigned __int128)(Z), (W)))
 #endif
 
 /*-- vec_sub_u128 ---*/
@@ -8846,8 +8843,8 @@ vec_subc(__vector unsigned long long __a, __vector 
unsigned long long __b) {
 
 static inline __ATTRS_ai __vector unsigned char
 vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) {
-  return (__vector unsigned char)
- __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b);
+  return (__vector unsigned char)__builtin_s390_vscbiq((unsigned __int128)__a,
+   (unsigned __int128)__b);
 }
 
 /*-- vec_sube_u128