[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: arch-gcn3: Implement LDS accesses in Flat instructions

2021-07-26 Thread Matt Sinclair (Gerrit) via gem5-dev
Matt Sinclair has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/48343 )


Change subject: arch-gcn3: Implement LDS accesses in Flat instructions
..

arch-gcn3: Implement LDS accesses in Flat instructions

Add support for LDS accesses by allowing Flat instructions to dispatch
into the local memory pipeline if the requested address is in the group
aperture.

This requires implementing LDS accesses in the Flat initMemRead/Write
functions, in a similar fashion to the DS functions of the same name.

Because we now can potentially dispatch to the local memory pipeline,
this change also adds a check to regain any tokens we requested as a
flat instruction.

Change-Id: Id26191f7ee43291a5e5ca5f39af06af981ec23ab
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/48343
Reviewed-by: Matt Sinclair 
Reviewed-by: Matthew Poremba 
Maintainer: Matt Sinclair 
Tested-by: kokoro 
---
M src/arch/amdgpu/gcn3/insts/instructions.cc
M src/arch/amdgpu/gcn3/insts/op_encodings.hh
M src/gpu-compute/gpu_dyn_inst.cc
M src/gpu-compute/local_memory_pipeline.cc
4 files changed, 184 insertions(+), 32 deletions(-)

Approvals:
  Matthew Poremba: Looks good to me, approved
  Matt Sinclair: Looks good to me, but someone else must approve; Looks  
good to me, approved

  kokoro: Regressions pass



diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc  
b/src/arch/amdgpu/gcn3/insts/instructions.cc

index 79af7ac..65d008b 100644
--- a/src/arch/amdgpu/gcn3/insts/instructions.cc
+++ b/src/arch/amdgpu/gcn3/insts/instructions.cc
@@ -36314,7 +36314,7 @@
 gpuDynInst->computeUnit()->globalMemoryPipe.
 issueRequest(gpuDynInst);
 } else {
-fatal("Non global flat instructions not implemented yet.\n");
+fatal("Unsupported scope for flat instruction.\n");
 }
 }

@@ -36363,7 +36363,7 @@
 gpuDynInst->computeUnit()->globalMemoryPipe.
 issueRequest(gpuDynInst);
 } else {
-fatal("Non global flat instructions not implemented yet.\n");
+fatal("Unsupported scope for flat instruction.\n");
 }
 }
 void
@@ -39384,8 +39384,11 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
-fatal("Non global flat instructions not implemented yet.\n");
+fatal("Unsupported scope for flat instruction.\n");
 }
 } // execute

@@ -39448,8 +39451,11 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
-fatal("Non global flat instructions not implemented yet.\n");
+fatal("Unsupported scope for flat instruction.\n");
 }
 }

@@ -39511,8 +39517,11 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
-fatal("Non global flat instructions not implemented yet.\n");
+fatal("Unsupported scope for flat instruction.\n");
 }
 }

@@ -39603,8 +39612,11 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
-fatal("Non global flat instructions not implemented yet.\n");
+fatal("Unsupported scope for flat instruction.\n");
 }
 }

@@ -39667,8 +39679,11 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
-fatal("Non global flat instructions not implemented yet.\n");
+fatal("Unsupported scope for flat instruction.\n");
 }
 }

@@ -39731,8 +39746,11 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
  

[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: arch-gcn3: Implement LDS accesses in Flat instructions

2021-07-20 Thread Kyle Roarty (Gerrit) via gem5-dev
Kyle Roarty has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/48343 )



Change subject: arch-gcn3: Implement LDS accesses in Flat instructions
..

arch-gcn3: Implement LDS accesses in Flat instructions

Add support for LDS accesses by allowing Flat instructions to dispatch
into the local memory pipeline if the requested address is in the group
aperture.

This requires implementing LDS accesses in the Flat initMemRead/Write
functions, in a similar fashion to the DS functions of the same name.

Because we now can potentially dispatch to the local memory pipeline,
this change also adds a check to regain any tokens we requested as a
flat instruction.

Change-Id: Id26191f7ee43291a5e5ca5f39af06af981ec23ab
---
M src/arch/amdgpu/gcn3/insts/instructions.cc
M src/arch/amdgpu/gcn3/insts/op_encodings.hh
M src/gpu-compute/gpu_dyn_inst.cc
M src/gpu-compute/local_memory_pipeline.cc
4 files changed, 156 insertions(+), 6 deletions(-)



diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc  
b/src/arch/amdgpu/gcn3/insts/instructions.cc

index 79af7ac..95af790 100644
--- a/src/arch/amdgpu/gcn3/insts/instructions.cc
+++ b/src/arch/amdgpu/gcn3/insts/instructions.cc
@@ -39384,6 +39384,9 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
 fatal("Non global flat instructions not implemented yet.\n");
 }
@@ -39448,6 +39451,9 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
 fatal("Non global flat instructions not implemented yet.\n");
 }
@@ -39511,6 +39517,9 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
 fatal("Non global flat instructions not implemented yet.\n");
 }
@@ -39603,6 +39612,9 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
 fatal("Non global flat instructions not implemented yet.\n");
 }
@@ -39667,6 +39679,9 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
 fatal("Non global flat instructions not implemented yet.\n");
 }
@@ -39731,6 +39746,9 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
 fatal("Non global flat instructions not implemented yet.\n");
 }
@@ -39804,6 +39822,9 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
 fatal("Non global flat instructions not implemented yet.\n");
 }
@@ -39889,6 +39910,9 @@
 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
 gpuDynInst->computeUnit()->globalMemoryPipe
 .issueRequest(gpuDynInst);
+} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
+gpuDynInst->computeUnit()->localMemoryPipe
+.issueRequest(gpuDynInst);
 } else {
 fatal("Non global flat instructions not implemented yet.\n");
 }
@@ -39952,6 +39976,9 @@
 if (gpuDynInst->executedAs()