changeset 7d8836fd043d in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=7d8836fd043d
description:
gpu-compute: AMD's baseline GPU model
diffstat:
SConstruct
| 41 +-
build_opts/HSAIL_X86
| 5 +
build_opts/X86_MOESI_AMD_Base
| 3 +
configs/common/GPUTLBConfig.py
| 203 +
configs/common/GPUTLBOptions.py
| 109 +
configs/example/apu_se.py
| 499 +
configs/example/ruby_gpu_random_test.py
| 187 +
configs/ruby/AMD_Base_Constructor.py
| 134 +
configs/ruby/GPU_RfO.py
| 751 +
configs/ruby/GPU_VIPER.py
| 674 +
configs/ruby/GPU_VIPER_Baseline.py
| 588 +
configs/ruby/GPU_VIPER_Region.py
| 758 +
configs/ruby/MOESI_AMD_Base.py
| 326 +
src/SConscript
| 55 +-
src/arch/SConscript
| 8 +
src/arch/hsail/Brig.h
| 67 +
src/arch/hsail/Brig_new.hpp
| 1587 ++
src/arch/hsail/SConscript
| 54 +
src/arch/hsail/SConsopts
| 40 +
src/arch/hsail/gen.py
| 806 +
src/arch/hsail/generic_types.cc
| 47 +
src/arch/hsail/generic_types.hh
| 16 +
src/arch/hsail/gpu_decoder.hh
| 77 +
src/arch/hsail/gpu_types.hh
| 69 +
src/arch/hsail/insts/branch.cc
| 86 +
src/arch/hsail/insts/branch.hh
| 442 +
src/arch/hsail/insts/decl.hh
| 1106 +
src/arch/hsail/insts/gpu_static_inst.cc
| 64 +
src/arch/hsail/insts/gpu_static_inst.hh
| 65 +
src/arch/hsail/insts/main.cc
| 208 +
src/arch/hsail/insts/mem.cc
| 139 +
src/arch/hsail/insts/mem.hh
| 1629 ++
src/arch/hsail/insts/mem_impl.hh
| 660 +
src/arch/hsail/insts/pseudo_inst.cc
| 787 +
src/arch/hsail/operand.cc
| 449 +
src/arch/hsail/operand.hh
| 768 +
src/gpu-compute/GPU.py
| 310 +
src/gpu-compute/LdsState.py
| 51 +
src/gpu-compute/SConscript
| 99 +
src/gpu-compute/X86GPUTLB.py
| 77 +
src/gpu-compute/brig_object.cc
| 474 +
src/gpu-compute/brig_object.hh
| 134 +
src/gpu-compute/cl_driver.cc
| 272 +
src/gpu-compute/cl_driver.hh
| 77 +
src/gpu-compute/cl_event.hh
| 51 +
src/gpu-compute/code_enums.hh
| 116 +
src/gpu-compute/compute_unit.cc
| 1817 +++
src/gpu-compute/compute_unit.hh
| 767 +
src/gpu-compute/condition_register_state.cc
| 83 +
src/gpu-compute/condition_register_state.hh
| 101 +
src/gpu-compute/dispatcher.cc
| 394 +
src/gpu-compute/dispatcher.hh
| 163 +
src/gpu-compute/exec_stage.cc
| 203 +
src/gpu-compute/exec_stage.hh
| 129 +
src/gpu-compute/fetch_stage.cc
| 106 +
src/gpu-compute/fetch_stage.hh
| 78 +
src/gpu-compute/fetch_unit.cc
| 293 +
src/gpu-compute/fetch_unit.hh
| 89 +
src/gpu-compute/global_memory_pipeline.cc
| 242 +
src/gpu-compute/global_memory_pipeline.hh
| 123 +
src/gpu-compute/gpu_dyn_inst.cc
| 198 +
src/gpu-compute/gpu_dyn_inst.hh
| 464 +
src/gpu-compute/gpu_exec_context.cc
| 53 +
src/gpu-compute/gpu_exec_context.hh
| 54 +
src/gpu-compute/gpu_static_inst.cc
| 42 +
src/gpu-compute/gpu_static_inst.hh
| 166 +
src/gpu-compute/gpu_tlb.cc
| 1801 +++
src/gpu-compute/gpu_tlb.hh
| 465 +
src/gpu-compute/hsa_code.hh
| 101 +
src/gpu-compute/hsa_kernel_info.hh
| 79 +
src/gpu-compute/hsa_object.cc
| 76 +
src/gpu-compute/hsa_object.hh
| 74 +
src/gpu-compute/hsail_code.cc
| 453 +
src/gpu-compute/hsail_code.hh
| 447 +
src/gpu-compute/kernel_cfg.cc
| 296 +
src/gpu-compute/kernel_cfg.hh
| 133 +
src/gpu-compute/lds_state.cc
| 341 +
src/gpu-compute/lds_state.hh
| 512 +
src/gpu-compute/local_memory_pipeline.cc
| 200 +
src/gpu-compute/local_memory_pipeline.hh
| 98 +
src/gpu-compute/misc.hh
| 162 +
src/gpu-compute/ndrange.hh
| 70 +
src/gpu-compute/of_scheduling_policy.cc
| 76 +
src/gpu-compute/of_scheduling_policy.hh
| 61 +
src/gpu-compute/pool_manager.cc
| 42 +
src/gpu-compute/pool_manager.hh
| 66 +
src/gpu-compute/qstruct.hh
| 201 +
src/gpu-compute/rr_scheduling_policy.cc
| 67 +
src/gpu-compute/rr_scheduling_policy.hh
| 65 +
src/gpu-compute/schedule_stage.cc
| 151 +
src/gpu-compute/schedule_stage.hh
| 95 +
src/gpu-compute/scheduler.cc
| 71 +
src/gpu-compute/scheduler.hh
| 63 +
src/gpu-compute/scheduling_policy.hh
| 57 +
src/gpu-compute/scoreboard_check_stage.cc
| 173 +
src/gpu-compute/scoreboard_check_stage.hh
| 106 +
src/gpu-compute/shader.cc
| 412 +
src/gpu-compute/shader.hh
| 212 +
src/gpu-compute/simple_pool_manager.cc
| 108 +
src/gpu-compute/simple_pool_manager.hh
| 72 +
src/gpu-compute/tlb_coalescer.cc
| 583 +
src/gpu-compute/tlb_coalescer.hh
| 252 +
src/gpu-compute/vector_register_file.cc
| 251 +
src/gpu-compute/vector_register_file.hh
| 142 +
src/gpu-compute/vector_register_state.cc
| 58 +
src/gpu-compute/vector_register_state.hh
| 101 +
src/gpu-compute/wavefront.cc
| 925 +
src/gpu-compute/wavefront.hh
| 368 +
src/mem/protocol/GPU_RfO-SQC.sm
| 667 +
src/mem/protocol/GPU_RfO-TCC.sm
| 1199 ++
src/mem/protocol/GPU_RfO-TCCdir.sm
| 2672 ++++
src/mem/protocol/GPU_RfO-TCP.sm
| 1009 +
src/mem/protocol/GPU_RfO.slicc
| 11 +
src/mem/protocol/GPU_VIPER-SQC.sm
| 322 +
src/mem/protocol/GPU_VIPER-TCC.sm
| 739 +
src/mem/protocol/GPU_VIPER-TCP.sm
| 747 +
src/mem/protocol/GPU_VIPER.slicc
| 9 +
src/mem/protocol/GPU_VIPER_Baseline.slicc
| 9 +
src/mem/protocol/GPU_VIPER_Region-TCC.sm
| 773 +
src/mem/protocol/GPU_VIPER_Region.slicc
| 11 +
src/mem/protocol/MOESI_AMD_Base-CorePair.sm
| 2904 ++++
src/mem/protocol/MOESI_AMD_Base-L3cache.sm
| 1130 +
src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm
| 3009 +++++
src/mem/protocol/MOESI_AMD_Base-Region-dir.sm
| 2038 +++
src/mem/protocol/MOESI_AMD_Base-Region-msg.sm
| 291 +
src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm
| 1368 ++
src/mem/protocol/MOESI_AMD_Base-RegionDir.sm
| 1187 ++
src/mem/protocol/MOESI_AMD_Base-dir.sm
| 1137 +
src/mem/protocol/MOESI_AMD_Base-msg.sm
| 362 +
src/mem/protocol/MOESI_AMD_Base-probeFilter.sm
| 1408 ++
src/mem/protocol/MOESI_AMD_Base.slicc
| 6 +
src/mem/protocol/RubySlicc_ComponentMapping.sm
| 3 +
src/mem/protocol/RubySlicc_Exports.sm
| 11 +-
src/mem/protocol/RubySlicc_Types.sm
| 45 +-
src/mem/protocol/SConsopts
| 5 +
src/mem/ruby/SConscript
| 15 +-
src/mem/ruby/profiler/Profiler.cc
| 4 +-
src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
| 6 +
src/mem/ruby/slicc_interface/AbstractController.cc
| 6 +
src/mem/ruby/slicc_interface/AbstractController.hh
| 3 +-
src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
| 29 +
src/mem/ruby/structures/CacheMemory.cc
| 50 +-
src/mem/ruby/structures/CacheMemory.hh
| 5 +-
src/mem/ruby/structures/RubyCache.py
| 1 +
src/mem/ruby/system/GPUCoalescer.cc
| 1397 ++
src/mem/ruby/system/GPUCoalescer.hh
| 368 +
src/mem/ruby/system/GPUCoalescer.py
| 48 +
src/mem/ruby/system/RubyPort.cc
| 3 +-
src/mem/ruby/system/RubyPort.hh
| 4 +
src/mem/ruby/system/RubySystem.cc
| 2 +-
src/mem/ruby/system/SConscript
| 10 +
src/mem/ruby/system/Sequencer.cc
| 5 +-
src/mem/ruby/system/Sequencer.hh
| 3 +
src/mem/ruby/system/Sequencer.py
| 86 +-
src/mem/ruby/system/VIPERCoalescer.cc
| 287 +
src/mem/ruby/system/VIPERCoalescer.hh
| 75 +
src/mem/ruby/system/VIPERCoalescer.py
| 45 +
src/mem/ruby/system/WeightedLRUPolicy.cc
| 113 +
src/mem/ruby/system/WeightedLRUPolicy.hh
| 62 +
src/mem/ruby/system/WeightedLRUReplacementPolicy.py
| 45 +
src/mem/slicc/symbols/StateMachine.py
| 44 +-
tests/SConscript
| 22 +-
tests/configs/gpu-randomtest-ruby.py
| 151 +
tests/configs/gpu-ruby.py
| 353 +
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_RfO/config.ini
| 4423 +++++++
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_RfO/simerr
| 5 +
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_RfO/simout
| 21 +
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_RfO/stats.txt
| 3202 +++++
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER/config.ini
| 4063 ++++++
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER/simerr
| 5 +
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER/simout
| 21 +
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER/stats.txt
| 3201 +++++
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Baseline/config.ini
| 4089 ++++++
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Baseline/simerr
| 5 +
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Baseline/simout
| 21 +
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Baseline/stats.txt
| 3200 +++++
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Region/config.ini
| 5094 ++++++++
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Region/simerr
| 5 +
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Region/simout
| 21 +
tests/quick/se/04.gpu/ref/x86/linux/gpu-ruby-GPU_VIPER_Region/stats.txt
| 3418 +++++
tests/quick/se/04.gpu/test.py
| 48 +
tests/quick/se/60.gpu-randomtest/ref/x86/linux/gpu-randomtest-ruby-GPU_RfO/config.ini
| 5862 ++++++++++
tests/quick/se/60.gpu-randomtest/ref/x86/linux/gpu-randomtest-ruby-GPU_RfO/simerr
| 10 +
tests/quick/se/60.gpu-randomtest/ref/x86/linux/gpu-randomtest-ruby-GPU_RfO/simout
| 11 +
tests/quick/se/60.gpu-randomtest/ref/x86/linux/gpu-randomtest-ruby-GPU_RfO/stats.txt
| 1072 +
tests/quick/se/60.gpu-randomtest/test.py
| 35 +
tests/test-progs/gpu-hello/bin/x86/linux/gpu-hello
| Bin
tests/test-progs/gpu-hello/bin/x86/linux/gpu-hello-kernel.asm
| Bin
tests/test-progs/gpu-hello/src/gpu-hello-kernel.cl
| 78 +
tests/test-progs/gpu-hello/src/gpu-hello.cpp
| 332 +
util/regress
| 3 +-
191 files changed, 95286 insertions(+), 92 deletions(-)
diffs (truncated from 96551 to 300 lines):
diff -r bd7d06ea90f5 -r 7d8836fd043d SConstruct
--- a/SConstruct Tue Jan 19 14:05:03 2016 -0500
+++ b/SConstruct Tue Jan 19 14:28:22 2016 -0500
@@ -1065,7 +1065,9 @@
# Define the universe of supported ISAs
all_isa_list = [ ]
+all_gpu_isa_list = [ ]
Export('all_isa_list')
+Export('all_gpu_isa_list')
class CpuModel(object):
'''The CpuModel class encapsulates everything the ISA parser needs to
@@ -1121,9 +1123,11 @@
SConscript(joinpath(root, 'SConsopts'))
all_isa_list.sort()
+all_gpu_isa_list.sort()
sticky_vars.AddVariables(
EnumVariable('TARGET_ISA', 'Target ISA', 'alpha', all_isa_list),
+ EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'hsail',
all_gpu_isa_list),
ListVariable('CPU_MODELS', 'CPU models',
sorted(n for n,m in CpuModel.dict.iteritems() if m.default),
sorted(CpuModel.dict.keys())),
@@ -1139,6 +1143,7 @@
BoolVariable('USE_FENV', 'Use <fenv.h> IEEE mode control', have_fenv),
BoolVariable('CP_ANNOTATE', 'Enable critical path annotation capability',
False),
BoolVariable('USE_KVM', 'Enable hardware virtualized (KVM) CPU models',
have_kvm),
+ BoolVariable('BUILD_GPU', 'Build the compute-GPU model', False),
EnumVariable('PROTOCOL', 'Coherence protocol for Ruby', 'None',
all_protocols),
EnumVariable('BACKTRACE_IMPL', 'Post-mortem dump implementation',
@@ -1146,9 +1151,9 @@
)
# These variables get exported to #defines in config/*.hh (see src/SConscript).
-export_vars += ['USE_FENV', 'SS_COMPATIBLE_FP', 'TARGET_ISA', 'CP_ANNOTATE',
- 'USE_POSIX_CLOCK', 'USE_KVM', 'PROTOCOL', 'HAVE_PROTOBUF',
- 'HAVE_PERF_ATTR_EXCLUDE_HOST']
+export_vars += ['USE_FENV', 'SS_COMPATIBLE_FP', 'TARGET_ISA', 'TARGET_GPU_ISA',
+ 'CP_ANNOTATE', 'USE_POSIX_CLOCK', 'USE_KVM', 'PROTOCOL',
+ 'HAVE_PROTOBUF', 'HAVE_PERF_ATTR_EXCLUDE_HOST']
###################################################
#
@@ -1226,6 +1231,7 @@
###################################################
main['ALL_ISA_LIST'] = all_isa_list
+main['ALL_GPU_ISA_LIST'] = all_gpu_isa_list
all_isa_deps = {}
def make_switching_dir(dname, switch_headers, env):
# Generate the header. target[0] is the full path of the output
@@ -1258,6 +1264,35 @@
Export('make_switching_dir')
+def make_gpu_switching_dir(dname, switch_headers, env):
+ # Generate the header. target[0] is the full path of the output
+ # header to generate. 'source' is a dummy variable, since we get the
+ # list of ISAs from env['ALL_ISA_LIST'].
+ def gen_switch_hdr(target, source, env):
+ fname = str(target[0])
+
+ isa = env['TARGET_GPU_ISA'].lower()
+
+ try:
+ f = open(fname, 'w')
+ print >>f, '#include "%s/%s/%s"' % (dname, isa, basename(fname))
+ f.close()
+ except IOError:
+ print "Failed to create %s" % fname
+ raise
+
+ # Build SCons Action object. 'varlist' specifies env vars that this
+ # action depends on; when env['ALL_ISA_LIST'] changes these actions
+ # should get re-executed.
+ switch_hdr_action = MakeAction(gen_switch_hdr,
+ Transform("GENERATE"), varlist=['ALL_ISA_GPU_LIST'])
+
+ # Instantiate actions for each header
+ for hdr in switch_headers:
+ env.Command(hdr, [], switch_hdr_action)
+
+Export('make_gpu_switching_dir')
+
# all-isas -> all-deps -> all-environs -> all_targets
main.Alias('#all-isas', [])
main.Alias('#all-deps', '#all-isas')
diff -r bd7d06ea90f5 -r 7d8836fd043d build_opts/HSAIL_X86
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/build_opts/HSAIL_X86 Tue Jan 19 14:28:22 2016 -0500
@@ -0,0 +1,5 @@
+PROTOCOL = 'GPU_RfO'
+TARGET_ISA = 'x86'
+TARGET_GPU_ISA = 'hsail'
+BUILD_GPU = True
+CPU_MODELS = 'AtomicSimpleCPU,O3CPU,TimingSimpleCPU'
diff -r bd7d06ea90f5 -r 7d8836fd043d build_opts/X86_MOESI_AMD_Base
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/build_opts/X86_MOESI_AMD_Base Tue Jan 19 14:28:22 2016 -0500
@@ -0,0 +1,3 @@
+PROTOCOL = 'MOESI_AMD_Base'
+TARGET_ISA = 'x86'
+CPU_MODELS = 'AtomicSimpleCPU,O3CPU,TimingSimpleCPU'
\ No newline at end of file
diff -r bd7d06ea90f5 -r 7d8836fd043d configs/common/GPUTLBConfig.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/common/GPUTLBConfig.py Tue Jan 19 14:28:22 2016 -0500
@@ -0,0 +1,203 @@
+#
+# Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: Lisa Hsu
+#
+
+# Configure the TLB hierarchy
+# Places which would probably need to be modified if you
+# want a different hierarchy are specified by a <Modify here .. >'
+# comment
+import m5
+from m5.objects import *
+
+def TLB_constructor(level):
+
+ constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
+ assoc = options.L%(level)dTLBassoc, \
+ hitLatency = options.L%(level)dAccessLatency,\
+ missLatency2 = options.L%(level)dMissLatency,\
+ maxOutstandingReqs = options.L%(level)dMaxOutstandingReqs,\
+ accessDistance = options.L%(level)dAccessDistanceStat,\
+ clk_domain = SrcClockDomain(\
+ clock = options.GPUClock,\
+ voltage_domain = VoltageDomain(\
+ voltage = options.gpu_voltage)))" % locals()
+ return constructor_call
+
+def Coalescer_constructor(level):
+
+ constructor_call = "TLBCoalescer(probesPerCycle = \
+ options.L%(level)dProbesPerCycle, \
+ coalescingWindow = options.L%(level)dCoalescingWindow,\
+ disableCoalescing = options.L%(level)dDisableCoalescing,\
+ clk_domain = SrcClockDomain(\
+ clock = options.GPUClock,\
+ voltage_domain = VoltageDomain(\
+ voltage = options.gpu_voltage)))" % locals()
+ return constructor_call
+
+def create_TLB_Coalescer(options, my_level, my_index, TLB_name,
Coalescer_name):
+ # arguments: options, TLB level, number of private structures for this
Level,
+ # TLB name and Coalescer name
+ for i in xrange(my_index):
+ TLB_name.append(eval(TLB_constructor(my_level)))
+ Coalescer_name.append(eval(Coalescer_constructor(my_level)))
+
+def config_tlb_hierarchy(options, system, shader_idx):
+ n_cu = options.num_compute_units
+ # Make this configurable now, instead of the hard coded val. The
dispatcher
+ # is always the last item in the system.cpu list.
+ dispatcher_idx = len(system.cpu) - 1
+
+ if options.TLB_config == "perLane":
+ num_TLBs = 64 * n_cu
+ elif options.TLB_config == "mono":
+ num_TLBs = 1
+ elif options.TLB_config == "perCU":
+ num_TLBs = n_cu
+ elif options.TLB_config == "2CU":
+ num_TLBs = n_cu >> 1
+ else:
+ print "Bad option for TLB Configuration."
+ sys.exit(1)
+
+
#----------------------------------------------------------------------------------------
+ # A visual representation of the TLB hierarchy
+ # for ease of configuration
+ # < Modify here the width and the number of levels if you want a different
configuration >
+ # width is the number of TLBs of the given type (i.e., D-TLB, I-TLB etc)
for this level
+ L1 = [{'name': 'sqc', 'width': options.num_sqc, 'TLBarray': [],
'CoalescerArray': []},
+ {'name': 'dispatcher', 'width': 1, 'TLBarray': [], 'CoalescerArray':
[]},
+ {'name': 'l1', 'width': num_TLBs, 'TLBarray': [], 'CoalescerArray':
[]}]
+
+ L2 = [{'name': 'l2', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
+ L3 = [{'name': 'l3', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
+
+ TLB_hierarchy = [L1, L2, L3]
+
+
#----------------------------------------------------------------------------------------
+ # Create the hiearchy
+ # Call the appropriate constructors and add objects to the system
+
+ for i in xrange(len(TLB_hierarchy)):
+ hierarchy_level = TLB_hierarchy[i]
+ level = i+1
+ for TLB_type in hierarchy_level:
+ TLB_index = TLB_type['width']
+ TLB_array = TLB_type['TLBarray']
+ Coalescer_array = TLB_type['CoalescerArray']
+ # If the sim calls for a fixed L1 TLB size across CUs,
+ # override the TLB entries option
+ if options.tot_L1TLB_size:
+ options.L1TLBentries = options.tot_L1TLB_size / num_TLBs
+ if options.L1TLBassoc > options.L1TLBentries:
+ options.L1TLBassoc = options.L1TLBentries
+ # call the constructors for the TLB and the Coalescer
+ create_TLB_Coalescer(options, level, TLB_index,\
+ TLB_array, Coalescer_array)
+
+ system_TLB_name = TLB_type['name'] + '_tlb'
+ system_Coalescer_name = TLB_type['name'] + '_coalescer'
+
+ # add the different TLB levels to the system
+ # Modify here if you want to make the TLB hierarchy a child of
+ # the shader.
+ exec('system.%s = TLB_array' % system_TLB_name)
+ exec('system.%s = Coalescer_array' % system_Coalescer_name)
+
+ #===========================================================
+ # Specify the TLB hierarchy (i.e., port connections)
+ # All TLBs but the last level TLB need to have a memSidePort (master)
+ #===========================================================
+
+ # Each TLB is connected with its Coalescer through a single port.
+ # There is a one-to-one mapping of TLBs to Coalescers at a given level
+ # This won't be modified no matter what the hierarchy looks like.
+ for i in xrange(len(TLB_hierarchy)):
+ hierarchy_level = TLB_hierarchy[i]
+ level = i+1
+ for TLB_type in hierarchy_level:
+ name = TLB_type['name']
+ for index in range(TLB_type['width']):
+ exec('system.%s_coalescer[%d].master[0] = \
+ system.%s_tlb[%d].slave[0]' % \
+ (name, index, name, index))
+
+ # Connect the cpuSidePort (slave) of all the coalescers in level 1
+ # < Modify here if you want a different configuration >
+ for TLB_type in L1:
+ name = TLB_type['name']
+ num_TLBs = TLB_type['width']
+ if name == 'l1': # L1 D-TLBs
+ tlb_per_cu = num_TLBs / n_cu
+ for cu_idx in range(n_cu):
+ if tlb_per_cu:
+ for tlb in range(tlb_per_cu):
+ exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
+ system.l1_coalescer[%d].slave[%d]' % \
+ (shader_idx, cu_idx, tlb,
cu_idx*tlb_per_cu+tlb, 0))
+ else:
+ exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
+ system.l1_coalescer[%d].slave[%d]' % \
+ (shader_idx, cu_idx, tlb_per_cu, cu_idx / (n_cu /
num_TLBs), cu_idx % (n_cu / num_TLBs)))
+
+ elif name == 'dispatcher': # Dispatcher TLB
+ for index in range(TLB_type['width']):
+ exec('system.cpu[%d].translation_port = \
+ system.dispatcher_coalescer[%d].slave[0]' % \
+ (dispatcher_idx, index))
+ elif name == 'sqc': # I-TLB
+ for index in range(n_cu):
+ sqc_tlb_index = index / options.cu_per_sqc
+ sqc_tlb_port_id = index % options.cu_per_sqc
+ exec('system.cpu[%d].CUs[%d].sqc_tlb_port = \
+ system.sqc_coalescer[%d].slave[%d]' % \
+ (shader_idx, index, sqc_tlb_index, sqc_tlb_port_id))
+
+
+ # Connect the memSidePorts (masters) of all the TLBs with the
+ # cpuSidePorts (slaves) of the Coalescers of the next level
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev