Matthew Poremba has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/66753?usp=email )
Change subject: arch-vega: Add DPP support for V_AND_B32
......................................................................
arch-vega: Add DPP support for V_AND_B32
A DPP variant of V_AND_B32 was found in rocPRIM. With this changeset the
unit tests for rocPRIM scan_inclusive are passing.
Change-Id: I5a65f2cf6b56ac13609b191e3b3dfeb55e630942
---
M src/arch/amdgpu/vega/insts/instructions.cc
1 file changed, 42 insertions(+), 4 deletions(-)
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc
b/src/arch/amdgpu/vega/insts/instructions.cc
index 5612f29..3570e32 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -6838,15 +6838,41 @@
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
- ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
+ VecOperandU32 src1(gpuDynInst, instData.VSRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
src0.readSrc();
src1.read();
- for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
- if (wf->execMask(lane)) {
- vdst[lane] = src0[lane] & src1[lane];
+ if (isDPPInst()) {
+ VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
+ src0_dpp.read();
+
+ DPRINTF(VEGA, "Handling V_AND_B32 SRC DPP. SRC0: register
v[%d], "
+ "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
+ "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
+ "BANK_MASK: %d, ROW_MASK: %d\n",
extData.iFmt_VOP_DPP.SRC0,
+ extData.iFmt_VOP_DPP.DPP_CTRL,
+ extData.iFmt_VOP_DPP.SRC0_ABS,
+ extData.iFmt_VOP_DPP.SRC0_NEG,
+ extData.iFmt_VOP_DPP.SRC1_ABS,
+ extData.iFmt_VOP_DPP.SRC1_NEG,
+ extData.iFmt_VOP_DPP.BC,
+ extData.iFmt_VOP_DPP.BANK_MASK,
+ extData.iFmt_VOP_DPP.ROW_MASK);
+
+ processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
+
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src0_dpp[lane] & src1[lane];
+ }
+ }
+ } else {
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src0[lane] & src1[lane];
+ }
}
}
--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/66753?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I5a65f2cf6b56ac13609b191e3b3dfeb55e630942
Gerrit-Change-Number: 66753
Gerrit-PatchSet: 1
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org