Matthew Poremba has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47106 )

Change subject: arch-vega: Add GLOBAL instruction format
......................................................................

arch-vega: Add GLOBAL instruction format

GLOBAL instructions are new in Vega and are essentially FLAT
instructions from GCN3 but gauranteed to go to global memory where as
FLAT can go to global or local memory. This adds the op encoding, flags,
and checks in the gpu-compute pipeline for supporting GLOBAL memory
instructions. Actually global memory instructions will be added in a
future patch.

Change-Id: I1db4a3742aeec62424189e54c38c59d6b1a8d3c1
---
M src/arch/amdgpu/vega/gpu_decoder.hh
M src/arch/amdgpu/vega/insts/op_encodings.cc
M src/arch/amdgpu/vega/insts/op_encodings.hh
M src/gpu-compute/GPUStaticInstFlags.py
M src/gpu-compute/compute_unit.cc
M src/gpu-compute/gpu_dyn_inst.cc
M src/gpu-compute/gpu_dyn_inst.hh
M src/gpu-compute/gpu_static_inst.hh
M src/gpu-compute/scoreboard_check_stage.cc
9 files changed, 224 insertions(+), 2 deletions(-)



diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh
index 69954f8..31638d8 100644
--- a/src/arch/amdgpu/vega/gpu_decoder.hh
+++ b/src/arch/amdgpu/vega/gpu_decoder.hh
@@ -1648,6 +1648,11 @@
         unsigned int      VDST : 8;
     };

+ // GLOBAL is the same as FLAT in Vega. We use a typedef rather than using
+    // the FLAT format in instructions in case they diverge in the future.
+    typedef InFmt_FLAT InFmt_GLOBAL;
+    typedef InFmt_FLAT_1 InFmt_GLOBAL_1;
+
     struct InFmt_INST {
         unsigned int  ENCODING : 32;
     };
@@ -1904,6 +1909,8 @@
         InFmt_EXP_1         iFmt_EXP_1;
         InFmt_FLAT          iFmt_FLAT;
         InFmt_FLAT_1        iFmt_FLAT_1;
+        InFmt_GLOBAL        iFmt_GLOBAL;
+        InFmt_GLOBAL_1      iFmt_GLOBAL_1;
         InFmt_INST          iFmt_INST;
         InFmt_MIMG          iFmt_MIMG;
         InFmt_MIMG_1        iFmt_MIMG_1;
diff --git a/src/arch/amdgpu/vega/insts/op_encodings.cc b/src/arch/amdgpu/vega/insts/op_encodings.cc
index 1c25f6b..50c6e1a 100644
--- a/src/arch/amdgpu/vega/insts/op_encodings.cc
+++ b/src/arch/amdgpu/vega/insts/op_encodings.cc
@@ -1589,4 +1589,115 @@

         disassembly = dis_stream.str();
     }
+
+    // --- Inst_GLOBAL base class methods ---
+
+    Inst_GLOBAL::Inst_GLOBAL(InFmt_GLOBAL *iFmt, const std::string &opcode)
+        : VEGAGPUStaticInst(opcode)
+    {
+        setFlag(FlatGlobal);
+        // copy first instruction DWORD
+        instData = iFmt[0];
+        // copy second instruction DWORD
+        extData = ((InFmt_GLOBAL_1 *)iFmt)[1];
+        _srcLiteral = *reinterpret_cast<uint32_t*>(&iFmt[1]);
+
+        if (instData.GLC)
+            setFlag(GroupCoherent);
+
+        if (instData.SLC)
+            setFlag(SystemCoherent);
+    } // Inst_GLOBAL
+
+    Inst_GLOBAL::~Inst_GLOBAL()
+    {
+    } // ~Inst_GLOBAL
+
+    void
+    Inst_GLOBAL::initOperandInfo()
+    {
+        //3 formats:
+        // 1 dst + 2 src (load)
+        // 0 dst + 3 src (store)
+        // 1 dst + 3 src (atomic)
+        int opNum = 0;
+
+        // Needed because can't take addr of bitfield
+        int reg = 0;
+
+        if (getNumOperands() > 3)
+            assert(isAtomic());
+
+        reg = extData.ADDR;
+        srcOps.emplace_back(reg, getOperandSize(opNum), true,
+                              false, true, false);
+        opNum++;
+
+        if (numSrcRegOperands() == 2) {
+            reg = extData.SADDR;
+            // 0x7f (off) means the sgpr is not used. Don't read it
+            if (reg != 0x7f) {
+                srcOps.emplace_back(reg, getOperandSize(opNum), true,
+                                      true, false, false);
+            }
+            opNum++;
+        }
+
+        if (numSrcRegOperands() == 3) {
+            reg = extData.DATA;
+            srcOps.emplace_back(reg, getOperandSize(opNum), true,
+                                  false, true, false);
+            opNum++;
+
+            reg = extData.SADDR;
+            // 0x7f (off) means the sgpr is not used. Don't read it
+            if (reg != 0x7f) {
+                srcOps.emplace_back(reg, getOperandSize(opNum), true,
+                                      true, false, false);
+            }
+            opNum++;
+        }
+
+        if (numDstRegOperands()) {
+            reg = extData.VDST;
+            dstOps.emplace_back(reg, getOperandSize(opNum), false,
+                                  false, true, false);
+        }
+
+        reg = extData.SADDR;
+        if (reg != 0x7f) {
+            assert(srcOps.size() == numSrcRegOperands());
+        } else {
+            assert(srcOps.size() == numSrcRegOperands() - 1);
+        }
+        assert(dstOps.size() == numDstRegOperands());
+    }
+
+    int
+    Inst_GLOBAL::instSize() const
+    {
+        return 8;
+    } // instSize
+
+    void
+    Inst_GLOBAL::generateDisassembly()
+    {
+        std::stringstream dis_stream;
+        dis_stream << _opcode << " ";
+
+        if (isLoad())
+            dis_stream << "v" << extData.VDST << ", ";
+
+ dis_stream << "v[" << extData.ADDR << ":" << extData.ADDR + 1 << "]";
+
+        if (isStore())
+            dis_stream << ", v" << extData.DATA;
+
+        if (extData.SADDR == 0x7f)
+            dis_stream << ", off";
+        else
+            dis_stream << ", " << extData.SADDR; // Not sure about fmt here
+
+        disassembly = dis_stream.str();
+    }
 } // namespace VegaISA
diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh
index 6952c1f..0317ef3 100644
--- a/src/arch/amdgpu/vega/insts/op_encodings.hh
+++ b/src/arch/amdgpu/vega/insts/op_encodings.hh
@@ -806,6 +806,99 @@
         // second instruction DWORD
         InFmt_FLAT_1 extData;
     }; // Inst_FLAT
+
+    class Inst_GLOBAL : public VEGAGPUStaticInst
+    {
+      public:
+        Inst_GLOBAL(InFmt_GLOBAL*, const std::string &opcode);
+        ~Inst_GLOBAL();
+
+        int instSize() const override;
+        void generateDisassembly() override;
+
+        void initOperandInfo() override;
+
+      protected:
+        template<typename T>
+        void
+        initMemRead(GPUDynInstPtr gpuDynInst)
+        {
+            initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
+        }
+
+        template<int N>
+        void
+        initMemRead(GPUDynInstPtr gpuDynInst)
+        {
+            initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
+        }
+
+        template<typename T>
+        void
+        initMemWrite(GPUDynInstPtr gpuDynInst)
+        {
+            initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
+        }
+
+        template<int N>
+        void
+        initMemWrite(GPUDynInstPtr gpuDynInst)
+        {
+            initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
+        }
+
+        template<typename T>
+        void
+        initAtomicAccess(GPUDynInstPtr gpuDynInst)
+        {
+            initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
+        }
+
+        void
+        calcAddr32(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr,
+                   ConstScalarOperandU32 &saddr, ScalarRegU32 offset)
+        {
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->exec_mask[lane]) {
+                    gpuDynInst->addr.at(lane) =
+ (vaddr[lane] + saddr.rawData() + offset) & 0xffffffff;
+                }
+            }
+        }
+
+        void
+        calcAddr64(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr,
+                   ScalarRegU32 offset)
+        {
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->exec_mask[lane]) {
+                    gpuDynInst->addr.at(lane) = vaddr[lane] + offset;
+                }
+            }
+        }
+
+        void
+        calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr,
+                 ScalarRegU32 saddr, ScalarRegU32 offset)
+        {
+ // If saddr = 0x7f there is no scalar reg to read and address will
+            // be a 64-bit address. Otherwise, saddr is the reg index for a
+            // scalar reg used as the base address for a 32-bit address.
+            if (saddr == 0x7f) {
+                calcAddr64(gpuDynInst, vaddr, offset);
+            } else {
+                ConstScalarOperandU32 sbase(gpuDynInst, saddr);
+                sbase.read();
+
+                calcAddr32(gpuDynInst, vaddr, sbase, offset);
+            }
+        }
+
+        // first instruction DWORD
+        InFmt_GLOBAL instData;
+        // second instruction DWORD
+        InFmt_GLOBAL_1 extData;
+    }; // Inst_GLOBAL
 } // namespace VegaISA

 #endif // __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
diff --git a/src/gpu-compute/GPUStaticInstFlags.py b/src/gpu-compute/GPUStaticInstFlags.py
index bb3d7b9..b5632ed 100644
--- a/src/gpu-compute/GPUStaticInstFlags.py
+++ b/src/gpu-compute/GPUStaticInstFlags.py
@@ -58,6 +58,7 @@
         'MemSync',           # Synchronizing instruction
         'MemoryRef',         # References memory (load, store, or atomic)
         'Flat',              # Flat memory op
+        'FlatGlobal',        # Global memory op
         'Load',              # Reads from memory
         'Store',             # Writes to memory

diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index d6fa2b4..b8ff750 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -1826,6 +1826,8 @@
             } else {
                 stats.flatVMemInsts++;
             }
+        } else if (gpuDynInst->isFlatGlobal()) {
+            stats.flatVMemInsts++;
         } else if (gpuDynInst->isLocalMem()) {
             stats.ldsNoFlatInsts++;
         } else if (gpuDynInst->isLoad()) {
diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc
index ea64640..f251254 100644
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -414,6 +414,12 @@
 }

 bool
+GPUDynInst::isFlatGlobal() const
+{
+    return _staticInst->isFlatGlobal();
+}
+
+bool
 GPUDynInst::isLoad() const
 {
     return _staticInst->isLoad();
diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh
index b82a47c..15ddfc5 100644
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -232,6 +232,7 @@
     bool isMemSync() const;
     bool isMemRef() const;
     bool isFlat() const;
+    bool isFlatGlobal() const;
     bool isLoad() const;
     bool isStore() const;

diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh
index 6ce0392..491f68d 100644
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -128,6 +128,7 @@
     bool isMemSync() const { return _flags[MemSync]; }
     bool isMemRef() const { return _flags[MemoryRef]; }
     bool isFlat() const { return _flags[Flat]; }
+    bool isFlatGlobal() const { return _flags[FlatGlobal]; }
     bool isLoad() const { return _flags[Load]; }
     bool isStore() const { return _flags[Store]; }

@@ -176,7 +177,7 @@
     {
         return _flags[MemoryRef] && (_flags[GlobalSegment] ||
                _flags[PrivateSegment] || _flags[ReadOnlySegment] ||
-               _flags[SpillSegment]);
+               _flags[SpillSegment] || _flags[FlatGlobal]);
     }

     bool
diff --git a/src/gpu-compute/scoreboard_check_stage.cc b/src/gpu-compute/scoreboard_check_stage.cc
index f0ff664..dc8013d 100644
--- a/src/gpu-compute/scoreboard_check_stage.cc
+++ b/src/gpu-compute/scoreboard_check_stage.cc
@@ -153,7 +153,7 @@
if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
          ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
          ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat() ||
-         ii->isSleep())) {
+         ii->isFlatGlobal() || ii->isSleep())) {
panic("next instruction: %s is of unknown type\n", ii->disassemble());
     }


--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47106
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I1db4a3742aeec62424189e54c38c59d6b1a8d3c1
Gerrit-Change-Number: 47106
Gerrit-PatchSet: 1
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to