Matthew Poremba has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/57649 )

Change subject: configs: Add GPU TLBs for GPU full system
......................................................................

configs: Add GPU TLBs for GPU full system

Add the constructors for the Vega TLB and TLB coalescers in the python
config. These need a pointer to the gpu device which is added as a
parameter. The last level TLB's page table walker is added as a dma
device to the system so that the port is connected to the GPU device
memory in the disjoint VIPER configuration file.

A portion of the the GPUFS system configuration file needs to be
shuffled around so that the shader CPU is created before the TLBs are
created so they can be connected to the shader's ports. This means the
real CPU init code needs to break once reaching the shader. The vendor
string must also be set after createThreads is called on real CPUs.

Change-Id: I36ed93db262b21427f3eaf4904a1c897a2894835
---
M configs/common/GPUTLBConfig.py
M configs/example/gpufs/system/system.py
2 files changed, 96 insertions(+), 32 deletions(-)



diff --git a/configs/common/GPUTLBConfig.py b/configs/common/GPUTLBConfig.py
index f6aaccf..0f67de0 100644
--- a/configs/common/GPUTLBConfig.py
+++ b/configs/common/GPUTLBConfig.py
@@ -34,41 +34,66 @@
 import m5
 from m5.objects import *

-def TLB_constructor(level):
+def TLB_constructor(options, level, gpu_ctrl=None):

-    constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
-            assoc = options.L%(level)dTLBassoc, \
-            hitLatency = options.L%(level)dAccessLatency,\
-            missLatency2 = options.L%(level)dMissLatency,\
-            maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
-            accessDistance = options.L%(level)dAccessDistanceStat,\
-            clk_domain = SrcClockDomain(\
-                clock = options.gpu_clock,\
-                voltage_domain = VoltageDomain(\
-                    voltage = options.gpu_voltage)))" % locals()
-    return constructor_call
-
-def Coalescer_constructor(level):
-
-    constructor_call = "TLBCoalescer(probesPerCycle = \
-                options.L%(level)dProbesPerCycle, \
-                coalescingWindow = options.L%(level)dCoalescingWindow,\
-                disableCoalescing = options.L%(level)dDisableCoalescing,\
+    if options.full_system:
+        constructor_call = "VegaGPUTLB(\
+                gpu_device = gpu_ctrl, \
+                size = options.L%(level)dTLBentries, \
+                assoc = options.L%(level)dTLBassoc, \
+                hitLatency = options.L%(level)dAccessLatency,\
+                missLatency1 = options.L%(level)dMissLatency,\
+                missLatency2 = options.L%(level)dMissLatency,\
+                maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
+                clk_domain = SrcClockDomain(\
+                    clock = options.gpu_clock,\
+                    voltage_domain = VoltageDomain(\
+                        voltage = options.gpu_voltage)))" % locals()
+    else:
+ constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
+                assoc = options.L%(level)dTLBassoc, \
+                hitLatency = options.L%(level)dAccessLatency,\
+                missLatency2 = options.L%(level)dMissLatency,\
+                maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
+                accessDistance = options.L%(level)dAccessDistanceStat,\
                 clk_domain = SrcClockDomain(\
                     clock = options.gpu_clock,\
                     voltage_domain = VoltageDomain(\
                         voltage = options.gpu_voltage)))" % locals()
     return constructor_call

+def Coalescer_constructor(options, level):
+
+    if options.full_system:
+        constructor_call = "VegaTLBCoalescer(probesPerCycle = \
+            options.L%(level)dProbesPerCycle, \
+            tlb_level  = %(level)d ,\
+            coalescingWindow = options.L%(level)dCoalescingWindow,\
+            disableCoalescing = options.L%(level)dDisableCoalescing,\
+            clk_domain = SrcClockDomain(\
+                clock = options.gpu_clock,\
+                voltage_domain = VoltageDomain(\
+                    voltage = options.gpu_voltage)))" % locals()
+    else:
+        constructor_call = "TLBCoalescer(probesPerCycle = \
+            options.L%(level)dProbesPerCycle, \
+            coalescingWindow = options.L%(level)dCoalescingWindow,\
+            disableCoalescing = options.L%(level)dDisableCoalescing,\
+            clk_domain = SrcClockDomain(\
+                clock = options.gpu_clock,\
+                voltage_domain = VoltageDomain(\
+                    voltage = options.gpu_voltage)))" % locals()
+    return constructor_call
+
 def create_TLB_Coalescer(options, my_level, my_index, tlb_name,
-    coalescer_name):
+    coalescer_name, gpu_ctrl=None):
     # arguments: options, TLB level, number of private structures for this
     # Level, TLB name and  Coalescer name
     for i in range(my_index):
-        tlb_name.append(eval(TLB_constructor(my_level)))
-        coalescer_name.append(eval(Coalescer_constructor(my_level)))
+        tlb_name.append(eval(TLB_constructor(options, my_level, gpu_ctrl)))
+ coalescer_name.append(eval(Coalescer_constructor(options, my_level)))

-def config_tlb_hierarchy(options, system, shader_idx):
+def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl):
     n_cu = options.num_compute_units

     if options.TLB_config == "perLane":
@@ -121,7 +146,7 @@
                     options.L1TLBassoc = options.L1TLBentries
             # call the constructors for the TLB and the Coalescer
             create_TLB_Coalescer(options, level, TLB_index,\
-                TLB_array, Coalescer_array)
+                TLB_array, Coalescer_array, gpu_ctrl)

             system_TLB_name = TLB_type['name'] + '_tlb'
             system_Coalescer_name = TLB_type['name'] + '_coalescer'
@@ -198,8 +223,19 @@
                     system.l2_coalescer[0].cpu_side_ports[%d]' % \
                     (name, index, l2_coalescer_index))
             l2_coalescer_index += 1
+
     # L2 <-> L3
     system.l2_tlb[0].mem_side_ports[0] = \
         system.l3_coalescer[0].cpu_side_ports[0]

+    # L3 TLB Vega page table walker to memory for full system only
+    if options.full_system:
+        for TLB_type in L3:
+            name = TLB_type['name']
+            for index in range(TLB_type['width']):
+                print('system._dma_ports.append(system.%s_tlb[%d])' % \
+                        (name, index))
+ exec('system._dma_ports.append(system.%s_tlb[%d].walker)' % \
+                        (name, index))
+
     return system
diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py
index 3eb8dbc..81dbd2d 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -33,6 +33,7 @@

 from common.Benchmarks import *
 from common.FSConfig import *
+from common import GPUTLBConfig
 from common import Simulation
 from ruby import Ruby

@@ -90,6 +91,10 @@
     shader = createGPU(system, args)
     connectGPU(system, args)

+ # The shader core will be whatever is after the CPU cores are accounted for
+    shader_idx = args.num_cpus
+    system.cpu.append(shader)
+
     # This arbitrary address is something in the X86 I/O hole
     hsapp_gpu_map_paddr = 0xe00000000
     hsapp_pt_walker = VegaPagetableWalker()
@@ -150,9 +155,12 @@
     device_ih.pio = system.iobus.mem_side_ports
     pm4_pkt_proc.pio = system.iobus.mem_side_ports

-    # Create Ruby system using Ruby.py for now
-    #Ruby.create_system(args, True, system, system.iobus,
-    #                   system._dma_ports)
+    # Full system needs special TLBs for SQC, Scalar, and vector data ports
+    args.full_system = True
+    GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx,
+                                      system.pc.south_bridge.gpu)
+
+    # Create Ruby system using disjoint VIPER topology
     system.ruby = Disjoint_VIPER()
     system.ruby.create(args, system, system.iobus, system._dma_ports)

@@ -161,6 +169,10 @@
                                    voltage_domain = system.voltage_domain)

     for (i, cpu) in enumerate(system.cpu):
+        # Break once we reach the shader "CPU"
+        if i == args.num_cpus:
+            break
+
         #
         # Tie the cpu ports to the correct ruby system ports
         #
@@ -170,14 +182,9 @@

         system.ruby._cpu_ports[i].connectCpuPorts(cpu)

-    for i in range(len(system.cpu)):
         for j in range(len(system.cpu[i].isa)):
             system.cpu[i].isa[j].vendor_string = "AuthenticAMD"

- # The shader core will be whatever is after the CPU cores are accounted for
-    shader_idx = args.num_cpus
-    system.cpu.append(shader)
-
     gpu_port_idx = len(system.ruby._cpu_ports) \
                    - args.num_compute_units - args.num_sqc \
                    - args.num_scalar_cache

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/57649
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I36ed93db262b21427f3eaf4904a1c897a2894835
Gerrit-Change-Number: 57649
Gerrit-PatchSet: 1
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to