================
@@ -1626,35 +1644,66 @@ void
SITargetLowering::getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &Infos,
case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:
case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:
case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {
+ // Entry 0: Load from source (global/flat).
Info.opc = ISD::INTRINSIC_VOID;
Info.memVT = EVT::getIntegerVT(CI.getContext(), getIntrMemWidth(IntrID));
- Info.ptrVal = CI.getArgOperand(1);
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.ptrVal = CI.getArgOperand(0); // Global pointer
+ Info.offset = cast<ConstantInt>(CI.getArgOperand(2))->getSExtValue();
+ Info.flags |= MachineMemOperand::MOLoad;
+ Infos.push_back(Info);
+
+ // Entry 1: Store to LDS (same offset).
+ Info.flags &= ~MachineMemOperand::MOLoad;
+ Info.flags |= MachineMemOperand::MOStore;
+ Info.ptrVal = CI.getArgOperand(1); // LDS pointer
Infos.push_back(Info);
return;
}
case Intrinsic::amdgcn_global_store_async_from_lds_b8:
case Intrinsic::amdgcn_global_store_async_from_lds_b32:
case Intrinsic::amdgcn_global_store_async_from_lds_b64:
case Intrinsic::amdgcn_global_store_async_from_lds_b128: {
+ // Entry 0: Load from LDS.
Info.opc = ISD::INTRINSIC_VOID;
Info.memVT = EVT::getIntegerVT(CI.getContext(), getIntrMemWidth(IntrID));
- Info.ptrVal = CI.getArgOperand(0);
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.ptrVal = CI.getArgOperand(1); // LDS pointer
+ Info.offset = cast<ConstantInt>(CI.getArgOperand(2))->getSExtValue();
+ Info.flags |= MachineMemOperand::MOLoad;
+ Infos.push_back(Info);
+
+ // Entry 1: Store to global (same offset).
+ Info.flags &= ~MachineMemOperand::MOLoad;
+ Info.flags |= MachineMemOperand::MOStore;
+ Info.ptrVal = CI.getArgOperand(0); // Global pointer
Infos.push_back(Info);
return;
}
case Intrinsic::amdgcn_load_to_lds:
case Intrinsic::amdgcn_global_load_lds: {
- Info.opc = ISD::INTRINSIC_VOID;
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
- Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
- Info.ptrVal = CI.getArgOperand(1);
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
- if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
+ bool IsVolatile = Aux->getZExtValue() & AMDGPU::CPol::VOLATILE;
+
+ // Entry 0: Load from source (global/flat).
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
+ Info.ptrVal = CI.getArgOperand(0); // Source pointer
+ Info.offset = cast<ConstantInt>(CI.getArgOperand(3))->getSExtValue();
+ Info.flags |= MachineMemOperand::MOLoad;
+ if (IsVolatile)
Info.flags |= MachineMemOperand::MOVolatile;
Infos.push_back(Info);
+
+ // Entry 1: Store to LDS.
+ // Same offset from the instruction, but an additional per-lane offset is
+ // added. Represent that using a wider memory type.
+ auto &ST = MF.getSubtarget<GCNSubtarget>();
+ Info.memVT =
+ EVT::getIntegerVT(CI.getContext(), Width * 8 * ST.getWavefrontSize());
----------------
arsenm wrote:
```suggestion
EVT::getIntegerVT(CI.getContext(), Width * 8 *
Subtarget->getWavefrontSize());
```
https://github.com/llvm/llvm-project/pull/175845
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits