================
@@ -422,6 +433,33 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
return MadeChange;
}
+unsigned
+AMDGPURewriteAGPRCopyMFMAImpl::getSubRegFromReload(MachineInstr &MI,
+ Register Reg) const {
+ unsigned NumRegs = TRI.getRegSizeInBits(*MRI.getRegClass(Reg)) / 32;
+ unsigned SubReg = 0;
+ // SubReg accesses for the tuple registers are of interest here.
+ // Note: We don't support 16-bit subreg reloads. If that assuption is
+ // changed in the future, this function should be revised.
+ if (NumRegs == 1)
+ return SubReg;
+
+ unsigned NumSpilledRegs = TRI.getNumSubRegsForSpillOp(MI);
+ // Skip if the entire tuple is reloaded.
+ if (NumRegs == NumSpilledRegs)
+ return SubReg;
+
+ // Construct the covering lanes for the reloaded portion.
+ unsigned SubRegIdx =
+ TII.getNamedOperand(MI, AMDGPU::OpName::offset)->getImm() / 4;
+ // Subreg lane masks are maintained in terms of regunits and each 32-bit
+ // register consists of two regunits.
+ uint64_t Lanes = (1ULL << NumSpilledRegs * 2) - 1;
+ LaneBitmask CoveringLanes = LaneBitmask(Lanes << SubRegIdx * 2);
----------------
arsenm wrote:
You shouldn't be making LaneBitmask layout assumptions like this. You are not
supposed to directly use the value, only perform overlap checks
https://github.com/llvm/llvm-project/pull/174998
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits