[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
uweigand wrote: Fixed merge conflicts, updated as described above, and fixed support for i128 parameters in the z/OS XPLINK ABI. https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
uweigand wrote: > I have looked through the changes and made some comments inline. Thanks for the review! > Commenting: Fixed, thanks! > I happened to notice some cases with room for improvement: Good catch. I've not addressed these right now, this can be done as a follow-up. (The memory case is a bit tedious due to TableGen pattern limitations ...) > As a side question: I forgot why we can get CCMask '5' here: it seems it > should be CCMASK_CMP_NE ('6'), if we reverse the LOC operation..? No, 5 is correct here. Reversing XORs the mask with the set of valid bits, so we have 13 ^ 8 == 5. Looking at the VTM instruction, we have the following valid condition codes (making up the 13, i.e. 0, 1, or 3): 0 - Selected bits all zeros; or all mask bits zero 1 - Selected bits a mix of zeros and ones 2 - n/a 3 - Selected bits all ones The original mask is 8, i.e. condition code 0 ("selected bits all zeros"). Reversing this needs to check for condition codes 1 or 3, i.e. mask 5 ("selected bits a mix of zeros and ones" or "selected bits all ones"). https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -1516,48 +1536,206 @@ let Predicates = [FeatureVector] in { } } +//===--===// +// Support for 128-bit integer values in vector registers +//===--===// + +// Loads and stores. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (load bdxaddr12only:$addr)), +(VL bdxaddr12only:$addr)>; + def : Pat<(store (i128 VR128:$src), bdxaddr12only:$addr), +(VST VR128:$src, bdxaddr12only:$addr)>; +} + +// Full i128 move from GPR pair. +let Predicates = [FeatureVector] in + def : Pat<(i128 (or (zext GR64:$x), (shl (anyext GR64:$y), (i32 64, +(VLVGP GR64:$y, GR64:$x)>; + +// Any-extensions from GPR to i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (anyext GR32:$x)), (VLVGP32 GR32:$x, GR32:$x)>; + def : Pat<(i128 (anyext GR64:$x)), (VLVGP GR64:$x, GR64:$x)>; +} + +// Any-extending loads into i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (extloadi8 bdxaddr12only:$addr)), +(VLREPB bdxaddr12only:$addr)>; + def : Pat<(i128 (extloadi16 bdxaddr12only:$addr)), +(VLREPH bdxaddr12only:$addr)>; + def : Pat<(i128 (extloadi32 bdxaddr12only:$addr)), +(VLREPF bdxaddr12only:$addr)>; + def : Pat<(i128 (extloadi64 bdxaddr12only:$addr)), +(VLREPG bdxaddr12only:$addr)>; +} + +// Truncations from i128 to GPR. +let Predicates = [FeatureVector] in { + def : Pat<(i32 (trunc (i128 VR128:$vec))), +(EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 3), subreg_l32)>; + def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 32, +(EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 2), subreg_l32)>; + def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 64, +(EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 1), subreg_l32)>; + def : Pat<(i32 (trunc (srl (i128 VR128:$vec), (i32 96, +(EXTRACT_SUBREG (VLGVF VR128:$vec, zero_reg, 0), subreg_l32)>; + def : Pat<(i64 (trunc (i128 VR128:$vec))), +(VLGVG VR128:$vec, zero_reg, 1)>; + def : Pat<(i64 (trunc (srl (i128 VR128:$vec), (i32 64, +(VLGVG VR128:$vec, zero_reg, 0)>; +} + +// Truncating stores from i128. +let Predicates = [FeatureVector] in { + def : Pat<(truncstorei8 (i128 VR128:$x), bdxaddr12only:$addr), +(VSTEB VR128:$x, bdxaddr12only:$addr, 15)>; + def : Pat<(truncstorei16 (i128 VR128:$x), bdxaddr12only:$addr), +(VSTEH VR128:$x, bdxaddr12only:$addr, 7)>; + def : Pat<(truncstorei32 (i128 VR128:$x), bdxaddr12only:$addr), +(VSTEF VR128:$x, bdxaddr12only:$addr, 3)>; + def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 32)), bdxaddr12only:$addr), +(VSTEF VR128:$x, bdxaddr12only:$addr, 2)>; + def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 64)), bdxaddr12only:$addr), +(VSTEF VR128:$x, bdxaddr12only:$addr, 1)>; + def : Pat<(truncstorei32 (srl (i128 VR128:$x), (i32 96)), bdxaddr12only:$addr), +(VSTEF VR128:$x, bdxaddr12only:$addr, 0)>; + def : Pat<(truncstorei64 (i128 VR128:$x), bdxaddr12only:$addr), +(VSTEG VR128:$x, bdxaddr12only:$addr, 1)>; + def : Pat<(truncstorei64 (srl (i128 VR128:$x), (i32 64)), bdxaddr12only:$addr), +(VSTEG VR128:$x, bdxaddr12only:$addr, 0)>; +} + +// Zero-extensions from GPR to i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (zext8 (anyext GR32:$x))), +(VLVGB (VGBM 0), GR32:$x, zero_reg, 15)>; + def : Pat<(i128 (zext16 (anyext GR32:$x))), +(VLVGH (VGBM 0), GR32:$x, zero_reg, 7)>; + def : Pat<(i128 (zext GR32:$x)), +(VLVGF (VGBM 0), GR32:$x, zero_reg, 3)>; + def : Pat<(i128 (zext GR64:$x)), +(VLVGG (VGBM 0), GR64:$x, zero_reg, 1)>; +} + +// Zero-extending loads into i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (zextloadi8 bdxaddr12only:$addr)), +(VLEB (VGBM 0), bdxaddr12only:$addr, 15)>; + def : Pat<(i128 (zextloadi16 bdxaddr12only:$addr)), +(VLEH (VGBM 0), bdxaddr12only:$addr, 7)>; + def : Pat<(i128 (zextloadi32 bdxaddr12only:$addr)), +(VLEF (VGBM 0), bdxaddr12only:$addr, 3)>; + def : Pat<(i128 (zextloadi64 bdxaddr12only:$addr)), +(VLEG (VGBM 0), bdxaddr12only:$addr, 1)>; +} + +// In-register i128 sign-extensions. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (sext_inreg VR128:$x, i8)), +(VSRAB (VREPB VR128:$x, 15), (VREPIB 120))>; + def : Pat<(i128 (sext_inreg VR128:$x, i16)), +(VSRAB (VREPH VR128:$x, 7), (VREPIB 112))>; + def : Pat<(i128 (sext_inreg VR128:$x, i32)), +(VSRAB (VREPF VR128:$x, 3), (VREPIB 96))>; + def : Pat<(i128 (sext_inreg VR128:$x, i64)), +(VSRAB (VREPG VR128:$x, 1), (VREPIB 64))>; +} + +// Sign-extensions from GPR to i128. +let Predicates = [FeatureVector] in { + def : Pat<(i128 (sext_inreg
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -6481,6 +6737,71 @@ SDValue SystemZTargetLowering::combineLOAD( SDNode *N, DAGCombinerInfo ) const { SelectionDAG = DCI.DAG; EVT LdVT = N->getValueType(0); + SDLoc DL(N); + + // Replace an i128 load that is used solely to move its value into GPRs + // by separate loads of both halves. + if (LdVT == MVT::i128) { +LoadSDNode *LD = cast(N); +if (!LD->isSimple() || !ISD::isNormalLoad(LD)) + return SDValue(); + +// Scan through all users. +SmallVector, 2> Users; +int UsedElements = 0; +for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) +continue; + + // Verify every user is a TRUNCATE to i64 of the low or high half ... + SDNode *User = *UI; + int Index = 1; + if (User->getOpcode() == ISD::SRL && + User->getOperand(1).getOpcode() == ISD::Constant && + cast(User->getOperand(1))->getZExtValue() == 64 && + User->hasOneUse()) { +User = *User->use_begin(); +Index = 0; + } + if (User->getOpcode() != ISD::TRUNCATE || + User->getValueType(0) != MVT::i64) +return SDValue(); + + // ... and no half is extracted twice. + if (UsedElements & (1 << Index)) +return SDValue(); + + UsedElements |= 1 << Index; + Users.push_back(std::make_pair(User, Index)); +} + +// Rewrite each extraction as an independent load. +SmallVector ArgChains; +for (auto UserAndIndex : Users) { + SDNode *User = UserAndIndex.first; + unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second; + SDValue Ptr = uweigand wrote: Well, `getStoreSize` should be 8 here, but seems clearer to refer to it symbolically. https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -2918,16 +3049,17 @@ static Comparison getCmp(SelectionDAG , SDValue CmpOp0, SDValue CmpOp1, bool IsSignaling = false) { if (CmpOp1.getOpcode() == ISD::Constant) { assert(!Chain); -uint64_t Constant = cast(CmpOp1)->getZExtValue(); unsigned Opcode, CCValid; uweigand wrote: `getZExtValue` will crash when the constant is 128-bit, which can now happen. Therefore I'm now only evaluating it in code paths where we know the constant cannot be 128-bit. https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -2772,6 +2837,27 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, // Update the arguments with the TM version if so. static void adjustForTestUnderMask(SelectionDAG , const SDLoc , Comparison ) { + // Use VECTOR TEST UNDER MASK for i128 operations. + if (C.Op0.getValueType() == MVT::i128) { uweigand wrote: Well, we need `VCEQS` to set CC which takes an extra cycle, so it should both be three cycles, but then VTM is shorter. https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -1466,7 +1509,15 @@ static SDValue convertValVTToLocVT(SelectionDAG , const SDLoc , static SDValue lowerI128ToGR128(SelectionDAG , SDValue In) { SDLoc DL(In); SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64); + if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { +Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In); uweigand wrote: Hmmm. According to the specs, `EXTRACT_ELEMENT` (and therefore `SplitScalar`) is only supposed to be used on non-legal types, for values that will be broken up into multiple registers. However, the actual implementation doesn't appear to verify this, and expands `EXTRACT_ELEMENT` even for a legal `i128`, to the same code ... I would still prefer to not rely on that undocumented behavior, and use the explicit expansion. Also, this keeps the implementation symmetrical to `lowerGR128ToI128`, and may be slightly more efficient as it omits generating and then immediately eliminating the `EXTRACT_ELEMENT` nodes. https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -1183,6 +1187,35 @@ void SystemZDAGToDAGISel::loadVectorConstant( SelectCode(Op.getNode()); } +SDNode *SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL) { + SDNode *ResNode; + assert (VT.getSizeInBits() == 128); + + SDValue CP = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt128Ty(*CurDAG->getContext()), Val), + TLI->getPointerTy(CurDAG->getDataLayout())); + + EVT PtrVT = CP.getValueType(); + SDValue Ops[] = { +SDValue(CurDAG->getMachineNode(SystemZ::LARL, DL, PtrVT, CP), 0), +CurDAG->getTargetConstant(0, DL, PtrVT), +CurDAG->getRegister(0, PtrVT), +CurDAG->getEntryNode() + }; + ResNode = CurDAG->getMachineNode(SystemZ::VL, DL, VT, MVT::Other, Ops); + + // Annotate ResNode with memory operand information so that MachineInstr + // queries work properly. This e.g. gives the register allocation the + // required information for rematerialization. + MachineFunction& MF = CurDAG->getMachineFunction(); + MachineMemOperand *MemOp = + MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), + MachineMemOperand::MOLoad, 8, Align(8)); + uweigand wrote: Fixed, thanks! https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
JonPsson1 wrote: I have looked through the changes and made some comments inline. I built this with expensive checks enabled with all checks passing, and SPEC built successfully. Commenting: ``` @@ -293,7 +293,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine , setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); - // Even though i128 is not a legal type, we still need to custom lower + // Even though i128 is not a legal type, we still need to custom lower **// Update comment** @@ -2144,7 +2145,7 @@ CanLowerReturn(CallingConv::ID CallConv, VerifyVectorTypes(Outs); // Special case that we cannot easily detect in RetCC_SystemZ since - // i128 is not a legal type. + // i128 is not a legal type. **// Update comment** +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -124,7 +124,7 @@ defm GRX32 : SystemZRegClass<"GRX32", [i32], 32, R12L,R12H,R13L,R13H,R14L,R14H,R15L,R15H) ]>; -// The architecture doesn't really have any i128 support, so model the +// The architecture doesn't really have any i128 support, so model the **// Update comment** ``` I happened to notice some cases with room for improvement: ``` ; Scalar load + insertion + replication could be just a vlrepb. define i128 @fun0(i128 %a, i128 %sh) { ; CHECK-LABEL: fun0: ; CHECK: # %bb.0: ; CHECK-NEXT:l %r0, 12(%r4) // ; CHECK-NEXT:vlvgp %v1, %r0, %r0 // ; CHECK-NEXT:vl %v0, 0(%r3), 3 ; CHECK-NEXT:vrepb %v1, %v1, 15 // ===> vlrepb %v1, 12(%r4) ? ; CHECK-NEXT:vslb %v0, %v0, %v1 ; CHECK-NEXT:vsl %v0, %v0, %v1 ; CHECK-NEXT:vst %v0, 0(%r2), 3 ; CHECK-NEXT:br %r14 %res = shl i128 %a, %sh ret i128 %res } ; %v1 is the shift amount in a VR already. define i128 @fun1(i128 %a, i128 %sh, i128 %t) { ; CHECK-LABEL: fun1: ; CHECK: # %bb.0: ; CHECK-NEXT:vl %v1, 0(%r5), 3 ; CHECK-NEXT:vl %v2, 0(%r4), 3 ; CHECK-NEXT:vaq %v1, %v2, %v1 ; CHECK-NEXT:vlgvf %r0, %v1, 3 // ; CHECK-NEXT:vlvgp %v1, %r0, %r0 // ; CHECK-NEXT:vl %v0, 0(%r3), 3 ; CHECK-NEXT:vrepb %v1, %v1, 15// ===> vrepb %v1, %v1, 15 ; CHECK-NEXT:vslb %v0, %v0, %v1 ; CHECK-NEXT:vsl %v0, %v0, %v1 ; CHECK-NEXT:vst %v0, 0(%r2), 3 ; CHECK-NEXT:br %r14 %s = add i128 %sh, %t %res = shl i128 %a, %s ret i128 %res } ``` As a side question: I forgot why we can get CCMask '5' here: it seems it should be CCMASK_CMP_NE ('6'), if we reverse the LOC operation..? ``` VTM killed %5:vr128bit, killed %4:vr128bit, implicit-def $cc %6:gr64bit = LOCGR killed %3:gr64bit(tied-def 0), killed %2:gr64bit, 13, 8, implicit killed $cc # *** IR Dump After Two-Address instruction pass (twoaddressinstruction) ***: (SystemZInstrInfo::commuteInstructionImpl) VTM killed %5:vr128bit, killed %4:vr128bit, implicit-def $cc %6:gr64bit = COPY killed %2:gr64bit %6:gr64bit = LOCGR %6:gr64bit(tied-def 0), killed %3:gr64bit, 13, 5, implicit killed $cc ``` https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -6481,6 +6737,71 @@ SDValue SystemZTargetLowering::combineLOAD( SDNode *N, DAGCombinerInfo ) const { SelectionDAG = DCI.DAG; EVT LdVT = N->getValueType(0); + SDLoc DL(N); + + // Replace an i128 load that is used solely to move its value into GPRs + // by separate loads of both halves. + if (LdVT == MVT::i128) { +LoadSDNode *LD = cast(N); +if (!LD->isSimple() || !ISD::isNormalLoad(LD)) + return SDValue(); + +// Scan through all users. +SmallVector, 2> Users; +int UsedElements = 0; +for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) +continue; + + // Verify every user is a TRUNCATE to i64 of the low or high half ... + SDNode *User = *UI; + int Index = 1; + if (User->getOpcode() == ISD::SRL && + User->getOperand(1).getOpcode() == ISD::Constant && + cast(User->getOperand(1))->getZExtValue() == 64 && + User->hasOneUse()) { +User = *User->use_begin(); +Index = 0; + } + if (User->getOpcode() != ISD::TRUNCATE || + User->getValueType(0) != MVT::i64) +return SDValue(); + + // ... and no half is extracted twice. + if (UsedElements & (1 << Index)) +return SDValue(); + + UsedElements |= 1 << Index; + Users.push_back(std::make_pair(User, Index)); +} + +// Rewrite each extraction as an independent load. +SmallVector ArgChains; +for (auto UserAndIndex : Users) { + SDNode *User = UserAndIndex.first; + unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second; + SDValue Ptr = JonPsson1 wrote: Offset = 8 * ... ? https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -2918,16 +3049,17 @@ static Comparison getCmp(SelectionDAG , SDValue CmpOp0, SDValue CmpOp1, bool IsSignaling = false) { if (CmpOp1.getOpcode() == ISD::Constant) { assert(!Chain); -uint64_t Constant = cast(CmpOp1)->getZExtValue(); unsigned Opcode, CCValid; JonPsson1 wrote: Was 'Constant = ...' removed on purpose? (I would think that normally it would be factored out like it was, but maybe doesn't matter) https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -2772,6 +2837,27 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, // Update the arguments with the TM version if so. static void adjustForTestUnderMask(SelectionDAG , const SDLoc , Comparison ) { + // Use VECTOR TEST UNDER MASK for i128 operations. + if (C.Op0.getValueType() == MVT::i128) { JonPsson1 wrote: Beneficial? VTM is 3 cycles, while VN + VCEQ = 2... https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -1466,7 +1509,15 @@ static SDValue convertValVTToLocVT(SelectionDAG , const SDLoc , static SDValue lowerI128ToGR128(SelectionDAG , SDValue In) { SDLoc DL(In); SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64); + if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { +Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In); JonPsson1 wrote: All tests pass even if I remove this entire clause...? https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
@@ -1183,6 +1187,35 @@ void SystemZDAGToDAGISel::loadVectorConstant( SelectCode(Op.getNode()); } +SDNode *SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL) { + SDNode *ResNode; + assert (VT.getSizeInBits() == 128); + + SDValue CP = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt128Ty(*CurDAG->getContext()), Val), + TLI->getPointerTy(CurDAG->getDataLayout())); + + EVT PtrVT = CP.getValueType(); + SDValue Ops[] = { +SDValue(CurDAG->getMachineNode(SystemZ::LARL, DL, PtrVT, CP), 0), +CurDAG->getTargetConstant(0, DL, PtrVT), +CurDAG->getRegister(0, PtrVT), +CurDAG->getEntryNode() + }; + ResNode = CurDAG->getMachineNode(SystemZ::VL, DL, VT, MVT::Other, Ops); + + // Annotate ResNode with memory operand information so that MachineInstr + // queries work properly. This e.g. gives the register allocation the + // required information for rematerialization. + MachineFunction& MF = CurDAG->getMachineFunction(); + MachineMemOperand *MemOp = + MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), + MachineMemOperand::MOLoad, 8, Align(8)); + JonPsson1 wrote: I think the third argument is the size in bytes, and it should be 16. https://github.com/llvm/llvm-project/pull/74625 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SystemZ] Support i128 as legal type in VRs (PR #74625)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 384f916ea899ea6ac9af4a3fb9d0a5b03937acfe a3e9b3a923df055c5ebe9ec045ed99c909a16dd2 -- clang/lib/Headers/vecintrin.h clang/test/CodeGen/SystemZ/builtins-systemz-error2.c clang/test/CodeGen/SystemZ/builtins-systemz-vector.c clang/test/CodeGen/SystemZ/builtins-systemz-vector2-error.c clang/test/CodeGen/SystemZ/builtins-systemz-vector2.c clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-error.c clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp llvm/lib/Target/SystemZ/SystemZISelLowering.cpp llvm/lib/Target/SystemZ/SystemZISelLowering.h `` View the diff from clang-format here. ``diff diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h index 1f51e32c0d..886770b262 100644 --- a/clang/lib/Headers/vecintrin.h +++ b/clang/lib/Headers/vecintrin.h @@ -8388,8 +8388,8 @@ vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) { static inline __ATTRS_ai __vector unsigned char vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) { - return (__vector unsigned char) - __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b); + return (__vector unsigned char)__builtin_s390_vaccq((unsigned __int128)__a, + (unsigned __int128)__b); } /*-- vec_adde_u128 --*/ @@ -8397,9 +8397,8 @@ vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) { static inline __ATTRS_ai __vector unsigned char vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return (__vector unsigned char) - __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b, - (unsigned __int128)__c); + return (__vector unsigned char)__builtin_s390_vacq( + (unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } /*-- vec_addec_u128 -*/ @@ -8407,9 +8406,8 @@ vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b, static inline __ATTRS_ai __vector unsigned char vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return (__vector unsigned char) - __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b, - (unsigned __int128)__c); + return (__vector unsigned char)__builtin_s390_vacccq( + (unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } /*-- vec_avg */ @@ -8512,8 +8510,8 @@ static inline __ATTRS_o_ai __vector unsigned char vec_gfmsum_accum_128(__vector unsigned long long __a, __vector unsigned long long __b, __vector unsigned char __c) { - return (__vector unsigned char) - __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c); + return (__vector unsigned char)__builtin_s390_vgfmag(__a, __b, + (unsigned __int128)__c); } /*-- vec_mladd --*/ @@ -8805,12 +8803,11 @@ vec_mulo(__vector unsigned int __a, __vector unsigned int __b) { #if __ARCH__ >= 12 extern __ATTRS_o __vector unsigned char vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b, - __vector unsigned char __c, int __d) - __constant_range(__d, 0, 15); + __vector unsigned char __c, int __d) __constant_range(__d, 0, 15); -#define vec_msum_u128(X, Y, Z, W) \ - ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W \ - __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W))) +#define vec_msum_u128(X, Y, Z, W) \ + ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W__builtin_s390_vmslg( \ + (X), (Y), (unsigned __int128)(Z), (W))) #endif /*-- vec_sub_u128 ---*/ @@ -8846,8 +8843,8 @@ vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) { static inline __ATTRS_ai __vector unsigned char vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) { - return (__vector unsigned char) - __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b); + return (__vector unsigned char)__builtin_s390_vscbiq((unsigned __int128)__a, + (unsigned __int128)__b); } /*-- vec_sube_u128