[gem5-dev] changeset in gem5: gpu-compute: AMD's baseline GPU model

Tony Gutierrez Tue, 19 Jan 2016 12:34:54 -0800

changeset 7d8836fd043d in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=7d8836fd043d
description:
        gpu-compute: AMD's baseline GPU model


diffstat:

 SConstruct                                                                     
       |    41 +-
 build_opts/HSAIL_X86                                                           
       |     5 +
 build_opts/X86_MOESI_AMD_Base                                                  
       |     3 +
 configs/common/GPUTLBConfig.py                                                 
       |   203 +
 configs/common/GPUTLBOptions.py                                                
       |   109 +
 configs/example/apu_se.py                                                      
       |   499 +
 configs/example/ruby_gpu_random_test.py                                        
       |   187 +
 configs/ruby/AMD_Base_Constructor.py                                           
       |   134 +
 configs/ruby/GPU_RfO.py                                                        
       |   751 +
 configs/ruby/GPU_VIPER.py                                                      
       |   674 +
 configs/ruby/GPU_VIPER_Baseline.py                                             
       |   588 +
 configs/ruby/GPU_VIPER_Region.py                                               
       |   758 +
 configs/ruby/MOESI_AMD_Base.py                                                 
       |   326 +
 src/SConscript                                                                 
       |    55 +-
 src/arch/SConscript                                                            
       |     8 +
 src/arch/hsail/Brig.h                                                          
       |    67 +
 src/arch/hsail/Brig_new.hpp                                                    
       |  1587 ++
 src/arch/hsail/SConscript                                                      
       |    54 +
 src/arch/hsail/SConsopts                                                       
       |    40 +
 src/arch/hsail/gen.py                                                          
       |   806 +
 src/arch/hsail/generic_types.cc                                                
       |    47 +
 src/arch/hsail/generic_types.hh                                                
       |    16 +
 src/arch/hsail/gpu_decoder.hh                                                  
       |    77 +
 src/arch/hsail/gpu_types.hh                                                    
       |    69 +
 src/arch/hsail/insts/branch.cc                                                 
       |    86 +
 src/arch/hsail/insts/branch.hh                                                 
       |   442 +
 src/arch/hsail/insts/decl.hh                                                   
       |  1106 +
 src/arch/hsail/insts/gpu_static_inst.cc                                        
       |    64 +
 src/arch/hsail/insts/gpu_static_inst.hh                                        
       |    65 +
 src/arch/hsail/insts/main.cc                                                   
       |   208 +
 src/arch/hsail/insts/mem.cc                                                    
       |   139 +
 src/arch/hsail/insts/mem.hh                                                    
       |  1629 ++
 src/arch/hsail/insts/mem_impl.hh                                               
       |   660 +
 src/arch/hsail/insts/pseudo_inst.cc                                            
       |   787 +
 src/arch/hsail/operand.cc                                                      
       |   449 +
 src/arch/hsail/operand.hh                                                      
       |   768 +
 src/gpu-compute/GPU.py                                                         
       |   310 +
 src/gpu-compute/LdsState.py                                                    
       |    51 +
 src/gpu-compute/SConscript                                                     
       |    99 +
 src/gpu-compute/X86GPUTLB.py                                                   
       |    77 +
 src/gpu-compute/brig_object.cc                                                 
       |   474 +
 src/gpu-compute/brig_object.hh                                                 
       |   134 +
 src/gpu-compute/cl_driver.cc                                                   
       |   272 +
 src/gpu-compute/cl_driver.hh                                                   
       |    77 +
 src/gpu-compute/cl_event.hh                                                    
       |    51 +
 src/gpu-compute/code_enums.hh                                                  
       |   116 +
 src/gpu-compute/compute_unit.cc                                                
       |  1817 +++
 src/gpu-compute/compute_unit.hh                                                
       |   767 +
 src/gpu-compute/condition_register_state.cc                                    
       |    83 +
 src/gpu-compute/condition_register_state.hh                                    
       |   101 +
 src/gpu-compute/dispatcher.cc                                                  
       |   394 +
 src/gpu-compute/dispatcher.hh                                                  
       |   163 +
 src/gpu-compute/exec_stage.cc                                                  
       |   203 +
 src/gpu-compute/exec_stage.hh                                                  
       |   129 +
 src/gpu-compute/fetch_stage.cc                                                 
       |   106 +
 src/gpu-compute/fetch_stage.hh                                                 
       |    78 +
 src/gpu-compute/fetch_unit.cc                                                  
       |   293 +
 src/gpu-compute/fetch_unit.hh                                                  
       |    89 +
 src/gpu-compute/global_memory_pipeline.cc                                      
       |   242 +
 src/gpu-compute/global_memory_pipeline.hh                                      
       |   123 +
 src/gpu-compute/gpu_dyn_inst.cc                                                
       |   198 +
 src/gpu-compute/gpu_dyn_inst.hh                                                
       |   464 +
 src/gpu-compute/gpu_exec_context.cc                                            
       |    53 +
 src/gpu-compute/gpu_exec_context.hh                                            
       |    54 +
 src/gpu-compute/gpu_static_inst.cc                                             
       |    42 +
 src/gpu-compute/gpu_static_inst.hh                                             
       |   166 +
 src/gpu-compute/gpu_tlb.cc                                                     
       |  1801 +++
 src/gpu-compute/gpu_tlb.hh                                                     
       |   465 +
 src/gpu-compute/hsa_code.hh                                                    
       |   101 +
 src/gpu-compute/hsa_kernel_info.hh                                             
       |    79 +
 src/gpu-compute/hsa_object.cc                                                  
       |    76 +
 src/gpu-compute/hsa_object.hh                                                  
       |    74 +
 src/gpu-compute/hsail_code.cc                                                  
       |   453 +
 src/gpu-compute/hsail_code.hh                                                  
       |   447 +
 src/gpu-compute/kernel_cfg.cc                                                  
       |   296 +
 src/gpu-compute/kernel_cfg.hh                                                  
       |   133 +
 src/gpu-compute/lds_state.cc                                                   
       |   341 +
 src/gpu-compute/lds_state.hh                                                   
       |   512 +
 src/gpu-compute/local_memory_pipeline.cc                                       
       |   200 +
 src/gpu-compute/local_memory_pipeline.hh                                       
       |    98 +
 src/gpu-compute/misc.hh                                                        
       |   162 +
 src/gpu-compute/ndrange.hh                                                     
       |    70 +
 src/gpu-compute/of_scheduling_policy.cc                                        
       |    76 +
 src/gpu-compute/of_scheduling_policy.hh                                        
       |    61 +
 src/gpu-compute/pool_manager.cc                                                
       |    42 +
 src/gpu-compute/pool_manager.hh                                                
       |    66 +
 src/gpu-compute/qstruct.hh                                                     
       |   201 +
 src/gpu-compute/rr_scheduling_policy.cc                                        
       |    67 +
 src/gpu-compute/rr_scheduling_policy.hh                                        
       |    65 +
 src/gpu-compute/schedule_stage.cc                                              
       |   151 +
 src/gpu-compute/schedule_stage.hh                                              
       |    95 +
 src/gpu-compute/scheduler.cc                                                   
       |    71 +
 src/gpu-compute/scheduler.hh                                                   
       |    63 +
 src/gpu-compute/scheduling_policy.hh                                           
       |    57 +
 src/gpu-compute/scoreboard_check_stage.cc                                      
       |   173 +
 src/gpu-compute/scoreboard_check_stage.hh                                      
       |   106 +
 src/gpu-compute/shader.cc                                                      
       |   412 +
 src/gpu-compute/shader.hh                                                      
       |   212 +
 src/gpu-compute/simple_pool_manager.cc                                         
       |   108 +
 src/gpu-compute/simple_pool_manager.hh                                         
       |    72 +
 src/gpu-compute/tlb_coalescer.cc                                               
       |   583 +
 src/gpu-compute/tlb_coalescer.hh                                               
       |   252 +
 src/gpu-compute/vector_register_file.cc                                        
       |   251 +
 src/gpu-compute/vector_register_file.hh                                        
       |   142 +
 src/gpu-compute/vector_register_state.cc                                       
       |    58 +
 src/gpu-compute/vector_register_state.hh                                       
       |   101 +
 src/gpu-compute/wavefront.cc                                                   
       |   925 +
 src/gpu-compute/wavefront.hh                                                   
       |   368 +
 src/mem/protocol/GPU_RfO-SQC.sm                                                
       |   667 +
 src/mem/protocol/GPU_RfO-TCC.sm                                                
       |  1199 ++
 src/mem/protocol/GPU_RfO-TCCdir.sm                                             
       |  2672 ++++
 src/mem/protocol/GPU_RfO-TCP.sm                                                
       |  1009 +
 src/mem/protocol/GPU_RfO.slicc                                                 
       |    11 +
 src/mem/protocol/GPU_VIPER-SQC.sm                                              
       |   322 +
 src/mem/protocol/GPU_VIPER-TCC.sm                                              
       |   739 +
 src/mem/protocol/GPU_VIPER-TCP.sm                                              
       |   747 +
 src/mem/protocol/GPU_VIPER.slicc                                               
       |     9 +
 src/mem/protocol/GPU_VIPER_Baseline.slicc                                      
       |     9 +
 src/mem/protocol/GPU_VIPER_Region-TCC.sm                                       
       |   773 +
 src/mem/protocol/GPU_VIPER_Region.slicc                                        
       |    11 +
 src/mem/protocol/MOESI_AMD_Base-CorePair.sm                                    
       |  2904 ++++
 src/mem/protocol/MOESI_AMD_Base-L3cache.sm                                     
       |  1130 +
 src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm                             
       |  3009 +++++
 src/mem/protocol/MOESI_AMD_Base-Region-dir.sm                                  
       |  2038 +++
 src/mem/protocol/MOESI_AMD_Base-Region-msg.sm                                  
       |   291 +
 src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm                                
       |  1368 ++
 src/mem/protocol/MOESI_AMD_Base-RegionDir.sm                                   
       |  1187 ++
 src/mem/protocol/MOESI_AMD_Base-dir.sm                                         
       |  1137 +
 src/mem/protocol/MOESI_AMD_Base-msg.sm                                         
       |   362 +
 src/mem/protocol/MOESI_AMD_Base-probeFilter.sm                                 
       |  1408 ++
 src/mem/protocol/MOESI_AMD_Base.slicc                                          
       |     6 +
 src/mem/protocol/RubySlicc_ComponentMapping.sm                                 
       |     3 +
 src/mem/protocol/RubySlicc_Exports.sm                                          
       |    11 +-
 src/mem/protocol/RubySlicc_Types.sm                                            
       |    45 +-
 src/mem/protocol/SConsopts                                                     
       |     5 +
 src/mem/ruby/SConscript                                                        
       |    15 +-
 src/mem/ruby/profiler/Profiler.cc                                              
       |     4 +-
 src/mem/ruby/slicc_interface/AbstractCacheEntry.hh                             
       |     6 +
 src/mem/ruby/slicc_interface/AbstractController.cc                             
       |     6 +
 src/mem/ruby/slicc_interface/AbstractController.hh                             
       |     3 +-
 src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh                     
       |    29 +
 src/mem/ruby/structures/CacheMemory.cc                                         
       |    50 +-
 src/mem/ruby/structures/CacheMemory.hh                                         
       |     5 +-
 src/mem/ruby/structures/RubyCache.py                                           
       |     1 +
 src/mem/ruby/system/GPUCoalescer.cc                                            
       |  1397 ++
 src/mem/ruby/system/GPUCoalescer.hh                                            
       |   368 +
 src/mem/ruby/system/GPUCoalescer.py                                            
       |    48 +
 src/mem/ruby/system/RubyPort.cc                                                
       |     3 +-
 src/mem/ruby/system/RubyPort.hh                                                
       |     4 +
 src/mem/ruby/system/RubySystem.cc                                              
       |     2 +-
 src/mem/ruby/system/SConscript                                                 
       |    10 +
 src/mem/ruby/system/Sequencer.cc                                               
       |     5 +-
 src/mem/ruby/system/Sequencer.hh                                               
       |     3 +
 src/mem/ruby/system/Sequencer.py                                               
       |    86 +-
 src/mem/ruby/system/VIPERCoalescer.cc                                          
       |   287 +
 src/mem/ruby/system/VIPERCoalescer.hh                                          
       |    75 +
 src/mem/ruby/system/VIPERCoalescer.py                                          
       |    45 +
 src/mem/ruby/system/WeightedLRUPolicy.cc                                       
       |   113 +
 src/mem/ruby/system/WeightedLRUPolicy.hh                                       
       |    62 +
 src/mem/ruby/system/WeightedLRUReplacementPolicy.py                            
       |    45 +
 src/mem/slicc/symbols/StateMachine.py                                          
       |    44 +-
 tests/SConscript                                                               
       |    22 +-
 tests/configs/gpu-randomtest-ruby.py                                           
       |   151 +
 tests/configs/gpu-ruby.py                                                      
       |   353 +
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_RfO/config.ini                
       |  4423 +++++++
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_RfO/simerr                    
       |     5 +
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_RfO/simout                    
       |    21 +
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_RfO/stats.txt                 
       |  3202 +++++
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER/config.ini              
       |  4063 ++++++
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER/simerr                  
       |     5 +
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER/simout                  
       |    21 +
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER/stats.txt               
       |  3201 +++++
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Baseline/config.ini     
       |  4089 ++++++
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Baseline/simerr         
       |     5 +
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Baseline/simout         
       |    21 +
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Baseline/stats.txt      
       |  3200 +++++
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Region/config.ini       
       |  5094 ++++++++
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Region/simerr           
       |     5 +
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Region/simout           
       |    21 +
 tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Region/stats.txt        
       |  3418 +++++
 tests/quick/se/04.gpu/test.py                                                  
       |    48 +
 
tests/quick/se/60.gpu-randomtest/ref/x86/linux/gpu-randomtest-ruby-GPU_RfO/config.ini
 |  5862 ++++++++++
 
tests/quick/se/60.gpu-randomtest/ref/x86/linux/gpu-randomtest-ruby-GPU_RfO/simerr
     |    10 +
 
tests/quick/se/60.gpu-randomtest/ref/x86/linux/gpu-randomtest-ruby-GPU_RfO/simout
     |    11 +
 
tests/quick/se/60.gpu-randomtest/ref/x86/linux/gpu-randomtest-ruby-GPU_RfO/stats.txt
  |  1072 +
 tests/quick/se/60.gpu-randomtest/test.py                                       
       |    35 +
 tests/test-progs/gpu-hello/bin/x86/linux/gpu-hello                             
       |   Bin 
 tests/test-progs/gpu-hello/bin/x86/linux/gpu-hello-kernel.asm                  
       |   Bin 
 tests/test-progs/gpu-hello/src/gpu-hello-kernel.cl                             
       |    78 +
 tests/test-progs/gpu-hello/src/gpu-hello.cpp                                   
       |   332 +
 util/regress                                                                   
       |     3 +-
 191 files changed, 95286 insertions(+), 92 deletions(-)

diffs (truncated from 96551 to 300 lines):

diff -r bd7d06ea90f5 -r 7d8836fd043d SConstruct
--- a/SConstruct        Tue Jan 19 14:05:03 2016 -0500
+++ b/SConstruct        Tue Jan 19 14:28:22 2016 -0500
@@ -1065,7 +1065,9 @@
 
 # Define the universe of supported ISAs
 all_isa_list = [ ]
+all_gpu_isa_list = [ ]
 Export('all_isa_list')
+Export('all_gpu_isa_list')
 
 class CpuModel(object):
     '''The CpuModel class encapsulates everything the ISA parser needs to
@@ -1121,9 +1123,11 @@
             SConscript(joinpath(root, 'SConsopts'))
 
 all_isa_list.sort()
+all_gpu_isa_list.sort()
 
 sticky_vars.AddVariables(
     EnumVariable('TARGET_ISA', 'Target ISA', 'alpha', all_isa_list),
+    EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'hsail', 
all_gpu_isa_list),
     ListVariable('CPU_MODELS', 'CPU models',
                  sorted(n for n,m in CpuModel.dict.iteritems() if m.default),
                  sorted(CpuModel.dict.keys())),
@@ -1139,6 +1143,7 @@
     BoolVariable('USE_FENV', 'Use <fenv.h> IEEE mode control', have_fenv),
     BoolVariable('CP_ANNOTATE', 'Enable critical path annotation capability', 
False),
     BoolVariable('USE_KVM', 'Enable hardware virtualized (KVM) CPU models', 
have_kvm),
+    BoolVariable('BUILD_GPU', 'Build the compute-GPU model', False),
     EnumVariable('PROTOCOL', 'Coherence protocol for Ruby', 'None',
                   all_protocols),
     EnumVariable('BACKTRACE_IMPL', 'Post-mortem dump implementation',
@@ -1146,9 +1151,9 @@
     )
 
 # These variables get exported to #defines in config/*.hh (see src/SConscript).
-export_vars += ['USE_FENV', 'SS_COMPATIBLE_FP', 'TARGET_ISA', 'CP_ANNOTATE',
-                'USE_POSIX_CLOCK', 'USE_KVM', 'PROTOCOL', 'HAVE_PROTOBUF',
-                'HAVE_PERF_ATTR_EXCLUDE_HOST']
+export_vars += ['USE_FENV', 'SS_COMPATIBLE_FP', 'TARGET_ISA', 'TARGET_GPU_ISA',
+                'CP_ANNOTATE', 'USE_POSIX_CLOCK', 'USE_KVM', 'PROTOCOL',
+                'HAVE_PROTOBUF', 'HAVE_PERF_ATTR_EXCLUDE_HOST']
 
 ###################################################
 #
@@ -1226,6 +1231,7 @@
 ###################################################
 
 main['ALL_ISA_LIST'] = all_isa_list
+main['ALL_GPU_ISA_LIST'] = all_gpu_isa_list
 all_isa_deps = {}
 def make_switching_dir(dname, switch_headers, env):
     # Generate the header.  target[0] is the full path of the output
@@ -1258,6 +1264,35 @@
 
 Export('make_switching_dir')
 
+def make_gpu_switching_dir(dname, switch_headers, env):
+    # Generate the header.  target[0] is the full path of the output
+    # header to generate.  'source' is a dummy variable, since we get the
+    # list of ISAs from env['ALL_ISA_LIST'].
+    def gen_switch_hdr(target, source, env):
+        fname = str(target[0])
+
+        isa = env['TARGET_GPU_ISA'].lower()
+
+        try:
+            f = open(fname, 'w')
+            print >>f, '#include "%s/%s/%s"' % (dname, isa, basename(fname))
+            f.close()
+        except IOError:
+            print "Failed to create %s" % fname
+            raise
+
+    # Build SCons Action object. 'varlist' specifies env vars that this
+    # action depends on; when env['ALL_ISA_LIST'] changes these actions
+    # should get re-executed.
+    switch_hdr_action = MakeAction(gen_switch_hdr,
+                          Transform("GENERATE"), varlist=['ALL_ISA_GPU_LIST'])
+
+    # Instantiate actions for each header
+    for hdr in switch_headers:
+        env.Command(hdr, [], switch_hdr_action)
+
+Export('make_gpu_switching_dir')
+
 # all-isas -> all-deps -> all-environs -> all_targets
 main.Alias('#all-isas', [])
 main.Alias('#all-deps', '#all-isas')
diff -r bd7d06ea90f5 -r 7d8836fd043d build_opts/HSAIL_X86
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/build_opts/HSAIL_X86      Tue Jan 19 14:28:22 2016 -0500
@@ -0,0 +1,5 @@
+PROTOCOL = 'GPU_RfO'
+TARGET_ISA = 'x86'
+TARGET_GPU_ISA = 'hsail'
+BUILD_GPU = True
+CPU_MODELS = 'AtomicSimpleCPU,O3CPU,TimingSimpleCPU'
diff -r bd7d06ea90f5 -r 7d8836fd043d build_opts/X86_MOESI_AMD_Base
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/build_opts/X86_MOESI_AMD_Base     Tue Jan 19 14:28:22 2016 -0500
@@ -0,0 +1,3 @@
+PROTOCOL = 'MOESI_AMD_Base'
+TARGET_ISA = 'x86'
+CPU_MODELS = 'AtomicSimpleCPU,O3CPU,TimingSimpleCPU'
\ No newline at end of file
diff -r bd7d06ea90f5 -r 7d8836fd043d configs/common/GPUTLBConfig.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/common/GPUTLBConfig.py    Tue Jan 19 14:28:22 2016 -0500
@@ -0,0 +1,203 @@
+#
+#  Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+#  All rights reserved.
+#
+#  For use for simulation and test purposes only
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#  this list of conditions and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright notice,
+#  this list of conditions and the following disclaimer in the documentation
+#  and/or other materials provided with the distribution.
+#
+#  3. Neither the name of the copyright holder nor the names of its 
contributors
+#  may be used to endorse or promote products derived from this software
+#  without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+#
+#  Author: Lisa Hsu
+#
+
+# Configure the TLB hierarchy
+# Places which would probably need to be modified if you
+# want a different hierarchy are specified by a <Modify here .. >'
+# comment
+import m5
+from m5.objects import *
+
+def TLB_constructor(level):
+
+    constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
+            assoc = options.L%(level)dTLBassoc, \
+            hitLatency = options.L%(level)dAccessLatency,\
+            missLatency2 = options.L%(level)dMissLatency,\
+            maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
+            accessDistance = options.L%(level)dAccessDistanceStat,\
+            clk_domain = SrcClockDomain(\
+                clock = options.GPUClock,\
+                voltage_domain = VoltageDomain(\
+                    voltage = options.gpu_voltage)))" % locals()
+    return constructor_call
+
+def Coalescer_constructor(level):
+
+    constructor_call = "TLBCoalescer(probesPerCycle = \
+                options.L%(level)dProbesPerCycle, \
+                coalescingWindow = options.L%(level)dCoalescingWindow,\
+                disableCoalescing = options.L%(level)dDisableCoalescing,\
+                clk_domain = SrcClockDomain(\
+                    clock = options.GPUClock,\
+                    voltage_domain = VoltageDomain(\
+                        voltage = options.gpu_voltage)))" % locals()
+    return constructor_call
+
+def create_TLB_Coalescer(options, my_level, my_index, TLB_name, 
Coalescer_name):
+    # arguments: options, TLB level, number of private structures for this 
Level,
+    # TLB name and  Coalescer name
+    for i in xrange(my_index):
+        TLB_name.append(eval(TLB_constructor(my_level)))
+        Coalescer_name.append(eval(Coalescer_constructor(my_level)))
+
+def config_tlb_hierarchy(options, system, shader_idx):
+    n_cu = options.num_compute_units
+    # Make this configurable now, instead of the hard coded val.  The 
dispatcher
+    # is always the last item in the system.cpu list.
+    dispatcher_idx = len(system.cpu) - 1
+
+    if options.TLB_config == "perLane":
+        num_TLBs = 64 * n_cu
+    elif options.TLB_config == "mono":
+        num_TLBs = 1
+    elif options.TLB_config == "perCU":
+        num_TLBs = n_cu
+    elif options.TLB_config == "2CU":
+        num_TLBs = n_cu >> 1
+    else:
+        print "Bad option for TLB Configuration."
+        sys.exit(1)
+
+    
#----------------------------------------------------------------------------------------
+    # A visual representation of the TLB hierarchy
+    # for ease of configuration
+    # < Modify here the width and the number of levels if you want a different 
configuration >
+    # width is the number of TLBs of the given type (i.e., D-TLB, I-TLB etc) 
for this level
+    L1 = [{'name': 'sqc', 'width': options.num_sqc, 'TLBarray': [], 
'CoalescerArray': []},
+          {'name': 'dispatcher', 'width': 1, 'TLBarray': [], 'CoalescerArray': 
[]},
+          {'name': 'l1', 'width': num_TLBs, 'TLBarray': [], 'CoalescerArray': 
[]}]
+
+    L2 = [{'name': 'l2', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
+    L3 = [{'name': 'l3', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
+
+    TLB_hierarchy = [L1, L2, L3]
+
+    
#----------------------------------------------------------------------------------------
+    # Create the hiearchy
+    # Call the appropriate constructors and add objects to the system
+
+    for i in xrange(len(TLB_hierarchy)):
+        hierarchy_level = TLB_hierarchy[i]
+        level = i+1
+        for TLB_type in hierarchy_level:
+            TLB_index = TLB_type['width']
+            TLB_array = TLB_type['TLBarray']
+            Coalescer_array = TLB_type['CoalescerArray']
+            # If the sim calls for a fixed L1 TLB size across CUs,
+            # override the TLB entries option
+            if options.tot_L1TLB_size:
+                options.L1TLBentries = options.tot_L1TLB_size / num_TLBs
+                if options.L1TLBassoc > options.L1TLBentries:
+                    options.L1TLBassoc = options.L1TLBentries
+            # call the constructors for the TLB and the Coalescer
+            create_TLB_Coalescer(options, level, TLB_index,\
+                TLB_array, Coalescer_array)
+
+            system_TLB_name = TLB_type['name'] + '_tlb'
+            system_Coalescer_name = TLB_type['name'] + '_coalescer'
+
+            # add the different TLB levels to the system
+            # Modify here if you want to make the TLB hierarchy a child of
+            # the shader.
+            exec('system.%s = TLB_array' % system_TLB_name)
+            exec('system.%s = Coalescer_array' % system_Coalescer_name)
+
+    #===========================================================
+    # Specify the TLB hierarchy (i.e., port connections)
+    # All TLBs but the last level TLB need to have a memSidePort (master)
+    #===========================================================
+
+    # Each TLB is connected with its Coalescer through a single port.
+    # There is a one-to-one mapping of TLBs to Coalescers at a given level
+    # This won't be modified no matter what the hierarchy looks like.
+    for i in xrange(len(TLB_hierarchy)):
+        hierarchy_level = TLB_hierarchy[i]
+        level = i+1
+        for TLB_type in hierarchy_level:
+            name = TLB_type['name']
+            for index in range(TLB_type['width']):
+                exec('system.%s_coalescer[%d].master[0] = \
+                        system.%s_tlb[%d].slave[0]' % \
+                        (name, index, name, index))
+
+    # Connect the cpuSidePort (slave) of all the coalescers in level 1
+    # < Modify here if you want a different configuration >
+    for TLB_type in L1:
+        name = TLB_type['name']
+        num_TLBs = TLB_type['width']
+        if name == 'l1':     # L1 D-TLBs
+            tlb_per_cu = num_TLBs / n_cu
+            for cu_idx in range(n_cu):
+                if tlb_per_cu:
+                    for tlb in range(tlb_per_cu):
+                        exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
+                                system.l1_coalescer[%d].slave[%d]' % \
+                                (shader_idx, cu_idx, tlb, 
cu_idx*tlb_per_cu+tlb, 0))
+                else:
+                    exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
+                            system.l1_coalescer[%d].slave[%d]' % \
+                            (shader_idx, cu_idx, tlb_per_cu, cu_idx / (n_cu / 
num_TLBs), cu_idx % (n_cu / num_TLBs)))
+
+        elif name == 'dispatcher': # Dispatcher TLB
+            for index in range(TLB_type['width']):
+                exec('system.cpu[%d].translation_port = \
+                        system.dispatcher_coalescer[%d].slave[0]' % \
+                        (dispatcher_idx, index))
+        elif name == 'sqc': # I-TLB
+            for index in range(n_cu):
+                sqc_tlb_index = index / options.cu_per_sqc
+                sqc_tlb_port_id = index % options.cu_per_sqc
+                exec('system.cpu[%d].CUs[%d].sqc_tlb_port = \
+                        system.sqc_coalescer[%d].slave[%d]' % \
+                        (shader_idx, index, sqc_tlb_index, sqc_tlb_port_id))
+
+
+    # Connect the memSidePorts (masters) of all the TLBs with the
+    # cpuSidePorts (slaves) of the Coalescers of the next level
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

[gem5-dev] changeset in gem5: gpu-compute: AMD's baseline GPU model

Reply via email to