[gem5-dev] Change in gem5/gem5[develop]: configs: Update Vega properties file to use gem5 parameters
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/57309 ) Change subject: configs: Update Vega properties file to use gem5 parameters .. configs: Update Vega properties file to use gem5 parameters Previously, some values were hardcoded which resulted in Vega and the underlying ROCm runtime assuming a 64 CU config, even when ran with a lower number of CUs in gem5. This caused an error in DNNMark, where it looked for the wrong cachefile database Change-Id: I76cd1d04fd00df2b66c9ecfae9b364f553f5e5e4 --- M configs/example/hsaTopology.py 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py index b11a8df..b462b95 100644 --- a/configs/example/hsaTopology.py +++ b/configs/example/hsaTopology.py @@ -184,36 +184,39 @@ # Populate GPU node properties cu_scratch = options.simds_per_cu * options.wfs_per_simd -node_prop = 'cpu_cores_count 0\n' + \ -'simd_count 256\n' + \ -'mem_banks_count 1\n' + \ -'caches_count %s\n' % caches+ \ -'io_links_count %s\n' % io_links+ \ -'cpu_core_id_base 0\n' + \ -'simd_id_base 2147487744\n' + \ -'max_waves_per_simd 10\n' + \ -'lds_size_in_kb 64\n' + \ -'gds_size_in_kb 0\n'+ \ -'wave_front_size 64\n' + \ -'array_count 4\n' + \ -'simd_arrays_per_engine 1\n'+ \ -'cu_per_simd_array 16\n'+ \ -'simd_per_cu 4\n' + \ -'max_slots_scratch_cu %s\n' % cu_scratch+ \ -'vendor_id 4098\n' + \ -'device_id 26720\n' + \ -'location_id 1024\n'+ \ -'drm_render_minor %s\n' % drm_num + \ -'hive_id 0\n' + \ -'num_sdma_engines 2\n' + \ -'num_sdma_xgmi_engines 0\n' + \ -'max_engine_clk_fcompute 1500\n'+ \ -'local_mem_size 17163091968\n' + \ -'fw_version 421\n' + \ -'capability 238208\n' + \ -'debug_prop 32768\n'+ \ -'sdma_fw_version 430\n' + \ -'max_engine_clk_ccompute 3400\n' +node_prop = 'cpu_cores_count 0\n' + \ +'simd_count %d\n' \ +% (options.num_compute_units * options.simds_per_cu)+ \ +'mem_banks_count 1\n' + \ +'caches_count %s\n' % caches+ \ +'io_links_count %s\n' % io_links+ \ +'cpu_core_id_base 0\n' + \ +'simd_id_base 2147487744\n' + \ +'max_waves_per_simd %s\n' % options.wfs_per_simd+ \ +'lds_size_in_kb %d\n' % int(options.lds_size / 1024)+ \ +'gds_size_in_kb 0\n'+ \ +'wave_front_size %s\n'% options.wf_size + \ +'array_count 4\n' + \ +'simd_arrays_per_engine %s\n' % options.sa_per_complex + \ +'cu_per_simd_array %s\n' % options.cu_per_sa+ \ +'simd_per_cu %s\n' % options.simds_per_cu + \ +'max_slots_scratch_cu %s\n' % cu_scratch+ \ +'vendor_id 4098\n' + \ +'device_id 26720\n' + \ +'location_id 1024\n'+ \ +'drm_render_minor %s\n' % drm_num + \ +'hive_id 0\n' + \ +'num_sdma_engines 2\n' + \ +'num_sdma_xgmi_engines 0\n' + \ +'max_engine_clk_fcompute %s\n'
[gem5-dev] Change in gem5/gem5[develop]: arch-vega: Handle signed offsets in Global/Scratch instructions
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/57209 ) Change subject: arch-vega: Handle signed offsets in Global/Scratch instructions .. arch-vega: Handle signed offsets in Global/Scratch instructions The offset field in Flat-style instructions is treated differently based on if the instruction is Flat or Global/Scratch. In Flat insts, the offset is treated as a 12-bit unsigned number. In Global/Scratch insts, the offset is treated as a 13-bit signed number. This patch updates the calcAddr function for Flat-style instructions to properly sign-extend the offset on Global/Scratch instructions Change-Id: I57f10258c23d900da9bf6ded6717c6e8abd177b7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/57209 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matthew Poremba --- M src/arch/amdgpu/vega/insts/op_encodings.hh 1 file changed, 35 insertions(+), 3 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index 2642cd7..e9ce4cc 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -905,8 +905,16 @@ void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, - ScalarRegU32 saddr, ScalarRegU32 offset) + ScalarRegU32 saddr, ScalarRegI32 offset) { +// Offset is a 13-bit field w/the following meanings: +// In Flat instructions, offset is a 12-bit unsigned number +// In Global/Scratch instructions, offset is a 13-bit signed number +if (isFlat()) { +offset = offset & 0xfff; +} else { +offset = (ScalarRegI32)sext<13>(offset); +} // If saddr = 0x7f there is no scalar reg to read and address will // be a 64-bit address. Otherwise, saddr is the reg index for a // scalar reg used as the base address for a 32-bit address. @@ -956,7 +964,7 @@ void calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, - ConstScalarOperandU64 &saddr, ScalarRegU32 offset) + ConstScalarOperandU64 &saddr, ScalarRegI32 offset) { // Use SGPR pair as a base address and add VGPR-offset and // instruction offset. The VGPR-offset is always 32-bits so we @@ -971,7 +979,7 @@ void calcAddrVgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr, - ScalarRegU32 offset) + ScalarRegI32 offset) { for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/57209 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I57f10258c23d900da9bf6ded6717c6e8abd177b7 Gerrit-Change-Number: 57209 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-vega: Handle signed offsets in Global/Scratch instructions
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/57209 ) Change subject: arch-vega: Handle signed offsets in Global/Scratch instructions .. arch-vega: Handle signed offsets in Global/Scratch instructions The offset field in Flat-style instructions is treated differently based on if the instruction is Flat or Global/Scratch. In Flat insts, the offset is treated as a 12-bit unsigned number. In Global/Scratch insts, the offset is treated as a 13-bit signed number. This patch updates the calcAddr function for Flat-style instructions to properly sign-extend the offset on Global/Scratch instructions Change-Id: I57f10258c23d900da9bf6ded6717c6e8abd177b7 --- M src/arch/amdgpu/vega/insts/op_encodings.hh 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index 2642cd7..e9ce4cc 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -905,8 +905,16 @@ void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, - ScalarRegU32 saddr, ScalarRegU32 offset) + ScalarRegU32 saddr, ScalarRegI32 offset) { +// Offset is a 13-bit field w/the following meanings: +// In Flat instructions, offset is a 12-bit unsigned number +// In Global/Scratch instructions, offset is a 13-bit signed number +if (isFlat()) { +offset = offset & 0xfff; +} else { +offset = (ScalarRegI32)sext<13>(offset); +} // If saddr = 0x7f there is no scalar reg to read and address will // be a 64-bit address. Otherwise, saddr is the reg index for a // scalar reg used as the base address for a 32-bit address. @@ -956,7 +964,7 @@ void calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, - ConstScalarOperandU64 &saddr, ScalarRegU32 offset) + ConstScalarOperandU64 &saddr, ScalarRegI32 offset) { // Use SGPR pair as a base address and add VGPR-offset and // instruction offset. The VGPR-offset is always 32-bits so we @@ -971,7 +979,7 @@ void calcAddrVgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr, - ScalarRegU32 offset) + ScalarRegI32 offset) { for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/57209 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I57f10258c23d900da9bf6ded6717c6e8abd177b7 Gerrit-Change-Number: 57209 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Fix register checking and allocation in dyn manager
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/56909 ) Change subject: gpu-compute: Fix register checking and allocation in dyn manager .. gpu-compute: Fix register checking and allocation in dyn manager This patch updates the canAllocate function to account both for the number of regions of registers that need to be allocated, and for the fact that the registers aren't one continuous chunk. The patch also consolidates the registers as much as possible when a register chunk is freed. This prevents fragmentation from making it impossible to allocate enough registers Change-Id: Ic95cfe614d247add475f7139d3703991042f8149 --- M src/gpu-compute/dyn_pool_manager.cc 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/src/gpu-compute/dyn_pool_manager.cc b/src/gpu-compute/dyn_pool_manager.cc index 62a39a9..3db5e7f 100644 --- a/src/gpu-compute/dyn_pool_manager.cc +++ b/src/gpu-compute/dyn_pool_manager.cc @@ -93,8 +93,24 @@ DynPoolManager::canAllocate(uint32_t numRegions, uint32_t size) { uint32_t actualSize = minAllocatedElements(size); -DPRINTF(GPUVRF,"Can Allocate %d\n",actualSize); -return (_totRegSpaceAvailable >= actualSize); +uint32_t numAvailChunks = 0; +DPRINTF(GPUVRF, "Checking if we can allocate %d regions of size %d " +"registers\n", numRegions, actualSize); +for (auto it : freeSpaceRecord) { +numAvailChunks += (it.second - it.first)/actualSize; +} + +if (numAvailChunks >= numRegions) { +DPRINTF(GPUVRF, "Able to allocate %d regions of size %d; " +"number of available regions: %d\n", +numRegions, actualSize, numAvailChunks); +return true; +} else { +DPRINTF(GPUVRF, "Unable to allocate %d regions of size %d; " +"number of available regions: %d\n", +numRegions, actualSize, numAvailChunks); +return false; +} } uint32_t @@ -105,7 +121,8 @@ uint32_t actualSize = minAllocatedElements(size); auto it = freeSpaceRecord.begin(); while (it != freeSpaceRecord.end()) { -if (it->second >= actualSize) { +uint32_t curChunkSize = it->second - it->first; +if (curChunkSize >= actualSize) { // assign the next block starting from here startIdx = it->first; _regionSize = actualSize; @@ -115,14 +132,13 @@ // This case sees if this chunk size is exactly equal to // the size of the requested chunk. If yes, then this can't // contribute to future requests and hence, should be removed -if (it->second == actualSize) { +if (curChunkSize == actualSize) { it = freeSpaceRecord.erase(it); // once entire freeSpaceRecord allocated, increment // reservedSpaceRecord count ++reservedSpaceRecord; } else { it->first += actualSize; -it->second -= actualSize; } break; } @@ -144,7 +160,32 @@ // Current dynamic register allocation does not handle wraparound assert(firstIdx < lastIdx); _totRegSpaceAvailable += lastIdx-firstIdx; -freeSpaceRecord.push_back(std::make_pair(firstIdx,lastIdx-firstIdx)); + +// Consolidate with other regions. Need to check if firstIdx or lastIdx +// already exist +auto firstIt = std::find_if( +freeSpaceRecord.begin(), +freeSpaceRecord.end(), +[&](const std::pair& element){ +return element.second == firstIdx;} ); + +auto lastIt = std::find_if( +freeSpaceRecord.begin(), +freeSpaceRecord.end(), +[&](const std::pair& element){ +return element.first == lastIdx;} ); + +if (firstIt != freeSpaceRecord.end() && lastIt != freeSpaceRecord.end()) { +firstIt->second = lastIt->second; +freeSpaceRecord.erase(lastIt); +} else if (firstIt != freeSpaceRecord.end()) { +firstIt->second = lastIdx; +} else if (lastIt != freeSpaceRecord.end()) { +lastIt->first = firstIdx; +} else { +freeSpaceRecord.push_back(std::make_pair(firstIdx, lastIdx)); +} + // remove corresponding entry from reservedSpaceRecord too --reservedSpaceRecord; } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/56909 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ic95cfe614d247add475f7139d3703991042f8149 Gerrit-Change-Number: 56909 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Set scratch_base, lds_base for gfx902
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/54663 ) Change subject: gpu-compute: Set scratch_base, lds_base for gfx902 .. gpu-compute: Set scratch_base, lds_base for gfx902 When updating how scratch_base and lds_base were set, gfx902 was left out. This adds in gfx902 to the case statement, allowing the apertures to be set and for simulations using gfx902 to not error out Change-Id: I0e1adbdf63f7c129186fb835e30adac9cd4b72d0 --- M src/gpu-compute/gpu_compute_driver.cc 1 file changed, 15 insertions(+), 0 deletions(-) diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index e908f4e..d98f4c6 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -331,6 +331,7 @@ ldsApeBase(i + 1); break; case GfxVersion::gfx900: + case GfxVersion::gfx902: args->process_apertures[i].scratch_base = scratchApeBaseV9(); args->process_apertures[i].lds_base = @@ -631,6 +632,7 @@ ape_args->lds_base = ldsApeBase(i + 1); break; case GfxVersion::gfx900: + case GfxVersion::gfx902: ape_args->scratch_base = scratchApeBaseV9(); ape_args->lds_base = ldsApeBaseV9(); break; -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/54663 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I0e1adbdf63f7c129186fb835e30adac9cd4b72d0 Gerrit-Change-Number: 54663 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-2]: arch-gcn3,arch-vega: Select proper data on misaligned access
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/54503 ) Change subject: arch-gcn3,arch-vega: Select proper data on misaligned access .. arch-gcn3,arch-vega: Select proper data on misaligned access req1->getSize() returns the size in bytes, but because we're using it in an array index, we need to scale it by the size of the data type. This ensures we give the second request the proper data. Change-Id: I578665406762d5d0c95f2ea8297c362e1cc0620b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/54503 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matthew Poremba --- M src/arch/amdgpu/vega/gpu_mem_helpers.hh M src/arch/amdgpu/gcn3/gpu_mem_helpers.hh 2 files changed, 23 insertions(+), 2 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh b/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh index 009bb7c..05299e1 100644 --- a/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh +++ b/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh @@ -107,7 +107,8 @@ pkt1->dataStatic(&(reinterpret_cast( gpuDynInst->d_data))[lane * N]); pkt2->dataStatic(&(reinterpret_cast( -gpuDynInst->d_data))[lane * N + req1->getSize()]); +gpuDynInst->d_data))[lane * N + + req1->getSize()/sizeof(T)]); DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory " "request for %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId, lane, diff --git a/src/arch/amdgpu/vega/gpu_mem_helpers.hh b/src/arch/amdgpu/vega/gpu_mem_helpers.hh index c60325d..a5a9ec9 100644 --- a/src/arch/amdgpu/vega/gpu_mem_helpers.hh +++ b/src/arch/amdgpu/vega/gpu_mem_helpers.hh @@ -107,7 +107,8 @@ pkt1->dataStatic(&(reinterpret_cast( gpuDynInst->d_data))[lane * N]); pkt2->dataStatic(&(reinterpret_cast( -gpuDynInst->d_data))[lane * N + req1->getSize()]); +gpuDynInst->d_data))[lane * N + + req1->getSize()/sizeof(T)]); DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory " "request for %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId, lane, -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/54503 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-2 Gerrit-Change-Id: I578665406762d5d0c95f2ea8297c362e1cc0620b Gerrit-Change-Number: 54503 Gerrit-PatchSet: 3 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-CC: Bobby Bruce Gerrit-CC: Jason Lowe-Power Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-2]: arch-gcn3,arch-vega: Select proper data on misaligned access
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/54503 ) Change subject: arch-gcn3,arch-vega: Select proper data on misaligned access .. arch-gcn3,arch-vega: Select proper data on misaligned access req1->getData() returns the size in bytes, but because we're using it in an array index, we need to scale it by the size of the data type. This ensures we give the second request the proper data. Change-Id: I578665406762d5d0c95f2ea8297c362e1cc0620b --- M src/arch/amdgpu/vega/gpu_mem_helpers.hh M src/arch/amdgpu/gcn3/gpu_mem_helpers.hh 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh b/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh index 009bb7c..05299e1 100644 --- a/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh +++ b/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh @@ -107,7 +107,8 @@ pkt1->dataStatic(&(reinterpret_cast( gpuDynInst->d_data))[lane * N]); pkt2->dataStatic(&(reinterpret_cast( -gpuDynInst->d_data))[lane * N + req1->getSize()]); +gpuDynInst->d_data))[lane * N + + req1->getSize()/sizeof(T)]); DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory " "request for %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId, lane, diff --git a/src/arch/amdgpu/vega/gpu_mem_helpers.hh b/src/arch/amdgpu/vega/gpu_mem_helpers.hh index c60325d..a5a9ec9 100644 --- a/src/arch/amdgpu/vega/gpu_mem_helpers.hh +++ b/src/arch/amdgpu/vega/gpu_mem_helpers.hh @@ -107,7 +107,8 @@ pkt1->dataStatic(&(reinterpret_cast( gpuDynInst->d_data))[lane * N]); pkt2->dataStatic(&(reinterpret_cast( -gpuDynInst->d_data))[lane * N + req1->getSize()]); +gpuDynInst->d_data))[lane * N + + req1->getSize()/sizeof(T)]); DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory " "request for %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId, lane, -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/54503 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-2 Gerrit-Change-Id: I578665406762d5d0c95f2ea8297c362e1cc0620b Gerrit-Change-Number: 54503 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3,arch-vega: Don't write exec in v_cmp_f_i32
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/52445 ) Change subject: arch-gcn3,arch-vega: Don't write exec in v_cmp_f_i32 .. arch-gcn3,arch-vega: Don't write exec in v_cmp_f_i32 Per the GCN3 and VEGA ISAs, v_cmpx_* writes exec, while v_cmp_* doesn't. This removes the erroneous exec write in the VOP3 implementation of v_cmp_f_i32. Change-Id: I048e35917163c45b879f38d31a88f3f3d56c0baf Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52445 Maintainer: Matt Sinclair Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Tested-by: kokoro --- M src/arch/amdgpu/vega/insts/instructions.cc M src/arch/amdgpu/gcn3/insts/instructions.cc 2 files changed, 19 insertions(+), 2 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc index 65d008b..bb15957 100644 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -20601,7 +20601,6 @@ } } -wf->execMask() = sdst.rawData(); sdst.write(); } diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 757bfa8..1e07f0b 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -22454,7 +22454,6 @@ } } -wf->execMask() = sdst.rawData(); sdst.write(); } // execute // --- Inst_VOP3__V_CMP_LT_I32 class methods --- -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52445 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I048e35917163c45b879f38d31a88f3f3d56c0baf Gerrit-Change-Number: 52445 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3,arch-vega: Don't write exec in v_cmp_f_i32
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/52445 ) Change subject: arch-gcn3,arch-vega: Don't write exec in v_cmp_f_i32 .. arch-gcn3,arch-vega: Don't write exec in v_cmp_f_i32 Per the GCN3 and VEGA ISAs, v_cmpx_* writes exec, while v_cmp_* doesn't. This removes the erroneous exec write in the VOP3 implementation of v_cmp_f_i32. Change-Id: I048e35917163c45b879f38d31a88f3f3d56c0baf --- M src/arch/amdgpu/vega/insts/instructions.cc M src/arch/amdgpu/gcn3/insts/instructions.cc 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc index 65d008b..bb15957 100644 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -20601,7 +20601,6 @@ } } -wf->execMask() = sdst.rawData(); sdst.write(); } diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 757bfa8..1e07f0b 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -22454,7 +22454,6 @@ } } -wf->execMask() = sdst.rawData(); sdst.write(); } // execute // --- Inst_VOP3__V_CMP_LT_I32 class methods --- -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52445 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I048e35917163c45b879f38d31a88f3f3d56c0baf Gerrit-Change-Number: 52445 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Fix MUBUF out-of-bounds case 1
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/51127 ) Change subject: arch-gcn3: Fix MUBUF out-of-bounds case 1 .. arch-gcn3: Fix MUBUF out-of-bounds case 1 This patch upates the out-of-bounds check to properly check against the correct buffer_offset, which is different depending on if the const_swizzle_enable is true or false. Change-Id: I5c687c09ee7f8e446618084b8545b74a84211d4d --- M src/arch/amdgpu/gcn3/insts/op_encodings.hh 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.hh b/src/arch/amdgpu/gcn3/insts/op_encodings.hh index 24edfa7..be96924 100644 --- a/src/arch/amdgpu/gcn3/insts/op_encodings.hh +++ b/src/arch/amdgpu/gcn3/insts/op_encodings.hh @@ -634,6 +634,7 @@ Addr stride = 0; Addr buf_idx = 0; Addr buf_off = 0; +Addr buffer_offset = 0; BufferRsrcDescriptor rsrc_desc; std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(), @@ -656,6 +657,26 @@ buf_off = v_off[lane] + inst_offset; +if (rsrc_desc.swizzleEn) { +Addr idx_stride = 8 << rsrc_desc.idxStride; +Addr elem_size = 2 << rsrc_desc.elemSize; +Addr idx_msb = buf_idx / idx_stride; +Addr idx_lsb = buf_idx % idx_stride; +Addr off_msb = buf_off / elem_size; +Addr off_lsb = buf_off % elem_size; +DPRINTF(GCN3, "mubuf swizzled lane %d: " +"idx_stride = %llx, elem_size = %llx, " +"idx_msb = %llx, idx_lsb = %llx, " +"off_msb = %llx, off_lsb = %llx\n", +lane, idx_stride, elem_size, idx_msb, idx_lsb, +off_msb, off_lsb); + +buffer_offset =(idx_msb * stride + off_msb * elem_size) +* idx_stride + idx_lsb * elem_size + off_lsb; +} else { +buffer_offset = buf_off + stride * buf_idx; +} + /** * Range check behavior causes out of range accesses to @@ -665,7 +686,7 @@ * basis. */ if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) { -if (buf_off + stride * buf_idx >= +if (buffer_offset >= rsrc_desc.numRecords - s_offset.rawData()) { DPRINTF(GCN3, "mubuf out-of-bounds condition 1: " "lane = %d, buffer_offset = %llx, " @@ -692,25 +713,7 @@ } } -if (rsrc_desc.swizzleEn) { -Addr idx_stride = 8 << rsrc_desc.idxStride; -Addr elem_size = 2 << rsrc_desc.elemSize; -Addr idx_msb = buf_idx / idx_stride; -Addr idx_lsb = buf_idx % idx_stride; -Addr off_msb = buf_off / elem_size; -Addr off_lsb = buf_off % elem_size; -DPRINTF(GCN3, "mubuf swizzled lane %d: " -"idx_stride = %llx, elem_size = %llx, " -"idx_msb = %llx, idx_lsb = %llx, " -"off_msb = %llx, off_lsb = %llx\n", -lane, idx_stride, elem_size, idx_msb, idx_lsb, -off_msb, off_lsb); - -vaddr += ((idx_msb * stride + off_msb * elem_size) -* idx_stride + idx_lsb * elem_size + off_lsb); -} else { -vaddr += buf_off + stride * buf_idx; -} +vaddr += buffer_offset; DPRINTF(GCN3, "Calculating mubuf address for lane %d: " "vaddr = %llx, base_addr = %llx, " -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/51127 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I5c687c09ee7f8e446618084b8545b74a84211d4d Gerrit-Change-Number: 51127 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: util-docker: Fix building gcn-gpu image
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/50847 ) Change subject: util-docker: Fix building gcn-gpu image .. util-docker: Fix building gcn-gpu image In the gcn-gpu image, rocBLAS wasn't able to be installed. This was due to us not installing rocm-cmake, as rocBLAS is dependent on it and will download the most recent version of rocm-cmake if it isn't installed. The most recent version of rocm-cmake wasn't compatible with the version of ROCm we're using. This patch installs rocm-cmake before building and installing rocBLAS instead of after. Change-Id: Iaaa34d5e0d6594fddd0d1a7d147f43405163ca89 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/50847 Reviewed-by: Matt Sinclair Reviewed-by: Bobby R. Bruce Maintainer: Matt Sinclair Maintainer: Bobby R. Bruce Tested-by: kokoro --- M util/dockerfiles/gcn-gpu/Dockerfile 1 file changed, 5 insertions(+), 1 deletion(-) Approvals: Matt Sinclair: Looks good to me, approved; Looks good to me, approved Bobby R. Bruce: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index 360ab1f..dee02b0 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -98,6 +98,10 @@ RUN ln -s /HIP/build/rocclr/CMakeFiles/Export/_opt/rocm/hip/lib/cmake/hip/* /opt/rocm/hip/lib/cmake/hip/ WORKDIR / +# rocBLAS downloads the most recent rocm-cmake if it isn't installed before +# building +RUN apt install rocm-cmake + RUN git clone -b rocm-4.0.0 \ https://github.com/ROCmSoftwarePlatform/rocBLAS.git && mkdir rocBLAS/build @@ -109,7 +113,7 @@ WORKDIR / # MIOpen dependencies + MIOpen -RUN apt install rocm-cmake rocm-clang-ocl miopen-hip +RUN apt install rocm-clang-ocl miopen-hip # Clone MIOpen repo so that we have the kernel sources available RUN git clone -b rocm-4.0.1 https://github.com/ROCmSoftwarePlatform/MIOpen.git -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/50847 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Iaaa34d5e0d6594fddd0d1a7d147f43405163ca89 Gerrit-Change-Number: 50847 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Bobby R. Bruce Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: util-docker: Fix building gcn-gpu image
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/50847 ) Change subject: util-docker: Fix building gcn-gpu image .. util-docker: Fix building gcn-gpu image In the gcn-gpu image, rocBLAS wasn't able to be installed. This was due to us not installing rocm-cmake, as rocBLAS is dependent on it and will download the most recent version of rocm-cmake if it isn't installed. The most recent version of rocm-cmake wasn't compatible with the version of ROCm we're using. This patch installs rocm-cmake before building and installing rocBLAS instead of after. Change-Id: Iaaa34d5e0d6594fddd0d1a7d147f43405163ca89 --- M util/dockerfiles/gcn-gpu/Dockerfile 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index 360ab1f..dee02b0 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -98,6 +98,10 @@ RUN ln -s /HIP/build/rocclr/CMakeFiles/Export/_opt/rocm/hip/lib/cmake/hip/* /opt/rocm/hip/lib/cmake/hip/ WORKDIR / +# rocBLAS downloads the most recent rocm-cmake if it isn't installed before +# building +RUN apt install rocm-cmake + RUN git clone -b rocm-4.0.0 \ https://github.com/ROCmSoftwarePlatform/rocBLAS.git && mkdir rocBLAS/build @@ -109,7 +113,7 @@ WORKDIR / # MIOpen dependencies + MIOpen -RUN apt install rocm-cmake rocm-clang-ocl miopen-hip +RUN apt install rocm-clang-ocl miopen-hip # Clone MIOpen repo so that we have the kernel sources available RUN git clone -b rocm-4.0.1 https://github.com/ROCmSoftwarePlatform/MIOpen.git -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/50847 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Iaaa34d5e0d6594fddd0d1a7d147f43405163ca89 Gerrit-Change-Number: 50847 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: gpu-compute: Use sorted map for coalescerFIFO
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/48340 ) Change subject: gpu-compute: Use sorted map for coalescerFIFO .. gpu-compute: Use sorted map for coalescerFIFO coalescerFIFO, being a FIFO, should have a consistent ordering. unordered_map is not ordered, which led to a scenario where the first thing placed in the FIFO never got processed. This patch changes the unordered_map to a regular map, which is ordered. Change-Id: I9c7ab32c038d5e60f6b55236266a27b0cae8bfb0 --- M src/gpu-compute/tlb_coalescer.hh 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu-compute/tlb_coalescer.hh b/src/gpu-compute/tlb_coalescer.hh index b97801b..fce8740 100644 --- a/src/gpu-compute/tlb_coalescer.hh +++ b/src/gpu-compute/tlb_coalescer.hh @@ -100,7 +100,7 @@ * option is to change it to curTick(), so we coalesce based * on the receive time. */ -typedef std::unordered_map> +typedef std::map> CoalescingFIFO; CoalescingFIFO coalescerFIFO; -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/48340 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-1 Gerrit-Change-Id: I9c7ab32c038d5e60f6b55236266a27b0cae8bfb0 Gerrit-Change-Number: 48340 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: arch-gcn3: Implement large ds_read/write instructions
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/48342 ) Change subject: arch-gcn3: Implement large ds_read/write instructions .. arch-gcn3: Implement large ds_read/write instructions This implements the 96 and 128b ds_read/write instructions in a similar fashion to the 3 and 4 dword flat_load/store instructions. These instructions are treated as reads/writes of 3 or 4 dwords, instead of as a single 96b/128b memory transaction, due to the limitations of the VecOperand class used in the amdgpu code. In order to handle treating the memory transaction as multiple dwords, the patch also adds in new initMemRead/initMemWrite functions for ds instructions. These are similar to the functions used in flat instructions for the same purpose. Change-Id: I0f2ba3cb7cf040abb876e6eae55a6d38149ee960 --- M src/arch/amdgpu/gcn3/insts/instructions.cc M src/arch/amdgpu/gcn3/insts/instructions.hh M src/arch/amdgpu/gcn3/insts/op_encodings.hh 3 files changed, 232 insertions(+), 4 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc index 21ab58d..79af7ac 100644 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -34335,9 +34335,52 @@ void Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst) { -panicUnimplemented(); +Wavefront *wf = gpuDynInst->wavefront(); +gpuDynInst->execUnitId = wf->execUnitId; +gpuDynInst->latency.init(gpuDynInst->computeUnit()); +gpuDynInst->latency.set( +gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); +ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); +ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); +ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); +ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); + +addr.read(); +data0.read(); +data1.read(); +data2.read(); + +calcAddr(gpuDynInst, addr); + +for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { +if (gpuDynInst->exec_mask[lane]) { +(reinterpret_cast( +gpuDynInst->d_data))[lane * 4] = data0[lane]; +(reinterpret_cast( +gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; +(reinterpret_cast( +gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; +} +} + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); } +void +Inst_DS__DS_WRITE_B96::initiateAcc(GPUDynInstPtr gpuDynInst) +{ +Addr offset0 = instData.OFFSET0; +Addr offset1 = instData.OFFSET1; +Addr offset = (offset1 << 8) | offset0; + +initMemWrite<3>(gpuDynInst, offset); +} // initiateAcc + +void +Inst_DS__DS_WRITE_B96::completeAcc(GPUDynInstPtr gpuDynInst) +{ +} // completeAcc + Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt) : Inst_DS(iFmt, "ds_write_b128") { @@ -34354,9 +34397,56 @@ void Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst) { -panicUnimplemented(); +Wavefront *wf = gpuDynInst->wavefront(); +gpuDynInst->execUnitId = wf->execUnitId; +gpuDynInst->latency.init(gpuDynInst->computeUnit()); +gpuDynInst->latency.set( +gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); +ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); +ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); +ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); +ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); +ConstVecOperandU32 data3(gpuDynInst, extData.DATA0 + 3); + +addr.read(); +data0.read(); +data1.read(); +data2.read(); +data3.read(); + +calcAddr(gpuDynInst, addr); + +for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { +if (gpuDynInst->exec_mask[lane]) { +(reinterpret_cast( +gpuDynInst->d_data))[lane * 4] = data0[lane]; +(reinterpret_cast( +gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; +(reinterpret_cast( +gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; +(reinterpret_cast( +gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; +} +} + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); } +void +Inst_DS__DS_WRITE_B128::initiateAcc(GPUDynInstPtr gpuDynInst) +{ +Addr offset0 = instData.OFFSET0; +Addr offset1 = instData.OFFSET1; +Addr offset = (offset1 << 8) | offset0; + +initMemWrite<4>(gpuDynInst, offs
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: mem-ruby: Account for misaligned accesses in GPUCoalescer
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/48341 ) Change subject: mem-ruby: Account for misaligned accesses in GPUCoalescer .. mem-ruby: Account for misaligned accesses in GPUCoalescer Previously, we assumed that the maximum number of requests that would be issued by an instruction was equal to the number of threads that were active for that instruction. However, if a thread has an access that crosses a cache line, that thread has a misaligned access, and needs to request both cache lines. This patch takes that into account by checking the status vector for each thread in that instruction to determine the number of requests. Change-Id: I1994962c46d504b48654dbd22bcd786c9f382fd9 --- M src/mem/ruby/system/GPUCoalescer.cc 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index c00e7c0..2390ba6 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -645,7 +645,10 @@ // of the exec_mask. int num_packets = 1; if (!m_usingRubyTester) { -num_packets = getDynInst(pkt)->exec_mask.count(); +num_packets = 0; +for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) { +num_packets += getDynInst(pkt)->getLaneStatus(i); +} } // the pkt is temporarily stored in the uncoalesced table until -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/48341 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-1 Gerrit-Change-Id: I1994962c46d504b48654dbd22bcd786c9f382fd9 Gerrit-Change-Number: 48341 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: arch-gcn3: Validate if scalar sources are scalar gprs
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/48344 ) Change subject: arch-gcn3: Validate if scalar sources are scalar gprs .. arch-gcn3: Validate if scalar sources are scalar gprs Scalar sources can either be a general-purpose register or a constant register that holds a single value. If we don't check for if the register is a general-purpose register, it's possible that we get a constant register, which then causes all of the register mapping code to break, as the constant registers aren't supposed to be mapped like the general-purpose registers are. This fix adds an isScalarReg check to the instruction encodings that were missing it. Change-Id: I3d7d5393aa324737301c3269cc227b60e8a159e4 --- M src/arch/amdgpu/gcn3/insts/op_encodings.cc 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.cc b/src/arch/amdgpu/gcn3/insts/op_encodings.cc index cbbb767..cf20a2e 100644 --- a/src/arch/amdgpu/gcn3/insts/op_encodings.cc +++ b/src/arch/amdgpu/gcn3/insts/op_encodings.cc @@ -1277,12 +1277,12 @@ reg = extData.SRSRC; srcOps.emplace_back(reg, getOperandSize(opNum), true, - true, false, false); + isScalarReg(reg), false, false); opNum++; reg = extData.SOFFSET; srcOps.emplace_back(reg, getOperandSize(opNum), true, - true, false, false); + isScalarReg(reg), false, false); opNum++; } @@ -1368,12 +1368,12 @@ reg = extData.SRSRC; srcOps.emplace_back(reg, getOperandSize(opNum), true, - true, false, false); + isScalarReg(reg), false, false); opNum++; reg = extData.SOFFSET; srcOps.emplace_back(reg, getOperandSize(opNum), true, - true, false, false); + isScalarReg(reg), false, false); opNum++; // extData.VDATA moves in the reg list depending on the instruction @@ -1441,13 +1441,13 @@ reg = extData.SRSRC; srcOps.emplace_back(reg, getOperandSize(opNum), true, - true, false, false); + isScalarReg(reg), false, false); opNum++; if (getNumOperands() == 4) { reg = extData.SSAMP; srcOps.emplace_back(reg, getOperandSize(opNum), true, - true, false, false); + isScalarReg(reg), false, false); opNum++; } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/48344 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-1 Gerrit-Change-Id: I3d7d5393aa324737301c3269cc227b60e8a159e4 Gerrit-Change-Number: 48344 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: arch-gcn3: Implement LDS accesses in Flat instructions
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/48343 ) Change subject: arch-gcn3: Implement LDS accesses in Flat instructions .. arch-gcn3: Implement LDS accesses in Flat instructions Add support for LDS accesses by allowing Flat instructions to dispatch into the local memory pipeline if the requested address is in the group aperture. This requires implementing LDS accesses in the Flat initMemRead/Write functions, in a similar fashion to the DS functions of the same name. Because we now can potentially dispatch to the local memory pipeline, this change also adds a check to regain any tokens we requested as a flat instruction. Change-Id: Id26191f7ee43291a5e5ca5f39af06af981ec23ab --- M src/arch/amdgpu/gcn3/insts/instructions.cc M src/arch/amdgpu/gcn3/insts/op_encodings.hh M src/gpu-compute/gpu_dyn_inst.cc M src/gpu-compute/local_memory_pipeline.cc 4 files changed, 156 insertions(+), 6 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc index 79af7ac..95af790 100644 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -39384,6 +39384,9 @@ if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe .issueRequest(gpuDynInst); +} else if (gpuDynInst->executedAs() == enums::SC_GROUP) { +gpuDynInst->computeUnit()->localMemoryPipe +.issueRequest(gpuDynInst); } else { fatal("Non global flat instructions not implemented yet.\n"); } @@ -39448,6 +39451,9 @@ if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe .issueRequest(gpuDynInst); +} else if (gpuDynInst->executedAs() == enums::SC_GROUP) { +gpuDynInst->computeUnit()->localMemoryPipe +.issueRequest(gpuDynInst); } else { fatal("Non global flat instructions not implemented yet.\n"); } @@ -39511,6 +39517,9 @@ if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe .issueRequest(gpuDynInst); +} else if (gpuDynInst->executedAs() == enums::SC_GROUP) { +gpuDynInst->computeUnit()->localMemoryPipe +.issueRequest(gpuDynInst); } else { fatal("Non global flat instructions not implemented yet.\n"); } @@ -39603,6 +39612,9 @@ if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe .issueRequest(gpuDynInst); +} else if (gpuDynInst->executedAs() == enums::SC_GROUP) { +gpuDynInst->computeUnit()->localMemoryPipe +.issueRequest(gpuDynInst); } else { fatal("Non global flat instructions not implemented yet.\n"); } @@ -39667,6 +39679,9 @@ if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe .issueRequest(gpuDynInst); +} else if (gpuDynInst->executedAs() == enums::SC_GROUP) { +gpuDynInst->computeUnit()->localMemoryPipe +.issueRequest(gpuDynInst); } else { fatal("Non global flat instructions not implemented yet.\n"); } @@ -39731,6 +39746,9 @@ if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe .issueRequest(gpuDynInst); +} else if (gpuDynInst->executedAs() == enums::SC_GROUP) { +gpuDynInst->computeUnit()->localMemoryPipe +.issueRequest(gpuDynInst); } else { fatal("Non global flat instructions not implemented yet.\n"); } @@ -39804,6 +39822,9 @@ if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe .issueRequest(gpuDynInst); +} else if (gpuDynInst->executedAs() == enums::SC_GROUP) { +gpuDynInst->computeUnit()->localMemoryPipe +.issueRequest(gpuDynInst); } else { fatal("Non global flat instructions not implemented yet.\n"); } @@ -39889,6 +39910,9 @@ if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe .issueRequest(gpuDynInst); +} else if (gpuDynInst->executedAs() == enums::SC_GROUP) { +gpuDynInst->computeUnit()->localMemoryPipe +.issueRequest(gpuDynInst); } else { fatal("Non global flat instructions not implemented yet.\n"); } @@ -39952,6 +39976,9 @@ if (gpuDynInst->executedAs() ==
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: sim-se: Fix execve syscall
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/48345 ) Change subject: sim-se: Fix execve syscall .. sim-se: Fix execve syscall There were three things preventing execve from working Firstly, the entrypoint for the new program wasn't correct. This was fixed by calling Process::init, which adds a bias to the entrypoint. Secondly, the uname string wasn't being copied over. This meant when the new executable tried to run, it would think the kernel was too old to run on, and would error out. This was fixed by copying over the uname string (the `release` string in Process) when creating the new process. Additionally, this patch also ensures we copy over the uname string in the clone implementation, as otherwise a cloned thread that called execve would crash. Finally, we choose to not delete the new ProcessParams or the old Process. This is done both because it matches what is done in cloneFunc, but also because deleting the old process results in a segfault later on. Change-Id: I4ca201da689e9e37671b4cb477dc76fa12eecf69 --- M src/sim/syscall_emul.hh 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index aa02fd6..09be700 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -1452,6 +1452,7 @@ pp->euid = p->euid(); pp->gid = p->gid(); pp->egid = p->egid(); +pp->release = p->release; /* Find the first free PID that's less than the maximum */ std::set const& pids = p->system->PIDs; @@ -2017,6 +2018,7 @@ pp->errout.assign("cerr"); pp->cwd.assign(p->tgtCwd); pp->system = p->system; +pp->release = p->release; /** * Prevent process object creation with identical PIDs (which will trip * a fatal check in Process constructor). The execve call is supposed to @@ -2027,7 +2029,9 @@ */ p->system->PIDs.erase(p->pid()); Process *new_p = pp->create(); -delete pp; +// TODO: there is no way to know when the Process SimObject is done with +// the params pointer. Both the params pointer (pp) and the process +// pointer (p) are normally managed in python and are never cleaned up. /** * Work through the file descriptor array and close any files marked @@ -2042,10 +2046,10 @@ *new_p->sigchld = true; -delete p; tc->clearArchRegs(); tc->setProcessPtr(new_p); new_p->assignThreadContext(tc->contextId()); +new_p->init(); new_p->initState(); tc->activate(); TheISA::PCState pcState = tc->pcState(); -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/48345 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-1 Gerrit-Change-Id: I4ca201da689e9e37671b4cb477dc76fa12eecf69 Gerrit-Change-Number: 48345 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: sim-se: Properly handle a clone with the VFORK flag
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/48346 ) Change subject: sim-se: Properly handle a clone with the VFORK flag .. sim-se: Properly handle a clone with the VFORK flag When clone is called with the VFORK flag, the calling process is suspended until the child process either exits, or calls execve. This patch adds in a new variable to Process, which is used to store the context of the calling process if this process is created through a clone with VFORK set. This patch also adds the required support in clone to suspend the calling thread, and in exitImpl and execveFunc to wake up the calling thread when the child thread calls either of those functions Change-Id: I85af67544ea1d5df7102dcff1331b5a6f6f4fa7c --- M src/sim/process.cc M src/sim/process.hh M src/sim/syscall_emul.cc M src/sim/syscall_emul.hh 4 files changed, 34 insertions(+), 0 deletions(-) diff --git a/src/sim/process.cc b/src/sim/process.cc index 207c275..272fc9f 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -175,6 +175,9 @@ #ifndef CLONE_THREAD #define CLONE_THREAD 0 #endif +#ifndef CLONE_VFORK +#define CLONE_VFORK 0 +#endif if (CLONE_VM & flags) { /** * Share the process memory address space between the new process @@ -249,6 +252,10 @@ np->exitGroup = exitGroup; } +if (CLONE_VFORK & flags) { +np->vforkContexts.push_back(otc->contextId()); +} + np->argv.insert(np->argv.end(), argv.begin(), argv.end()); np->envp.insert(np->envp.end(), envp.begin(), envp.end()); } diff --git a/src/sim/process.hh b/src/sim/process.hh index 632ba90..34768a0 100644 --- a/src/sim/process.hh +++ b/src/sim/process.hh @@ -284,6 +284,9 @@ // Process was forked with SIGCHLD set. bool *sigchld; +// Contexts to wake up when this thread exits or calls execve +std::vector vforkContexts; + // Track how many system calls are executed statistics::Scalar numSyscalls; }; diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc index 147cb39..713bec4 100644 --- a/src/sim/syscall_emul.cc +++ b/src/sim/syscall_emul.cc @@ -193,6 +193,16 @@ } } +/** + * If we were a thread created by a clone with vfork set, wake up + * the thread that created us + */ +if (!p->vforkContexts.empty()) { +ThreadContext *vtc = sys->threads[p->vforkContexts.front()]; +assert(vtc->status() == ThreadContext::Suspended); +vtc->activate(); +} + tc->halt(); /** diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index 09be700..8695638 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -1521,6 +1521,10 @@ ctc->pcState(cpc); ctc->activate(); +if (flags & OS::TGT_CLONE_VFORK) { +tc->suspend(); +} + return cp->pid(); } @@ -1998,6 +2002,16 @@ }; /** + * If we were a thread created by a clone with vfork set, wake up + * the thread that created us + */ +if (!p->vforkContexts.empty()) { +ThreadContext *vtc = p->system->threads[p->vforkContexts.front()]; +assert(vtc->status() == ThreadContext::Suspended); +vtc->activate(); +} + +/** * Note that ProcessParams is generated by swig and there are no other * examples of how to create anything but this default constructor. The * fields are manually initialized instead of passing parameters to the -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/48346 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-1 Gerrit-Change-Id: I85af67544ea1d5df7102dcff1331b5a6f6f4fa7c Gerrit-Change-Number: 48346 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: arch-gcn3: Free dest registers in non-memory Load DS insts
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/48019 ) Change subject: arch-gcn3: Free dest registers in non-memory Load DS insts .. arch-gcn3: Free dest registers in non-memory Load DS insts Certain DS insts are classfied as Loads, but don't actually go through the memory pipeline. However, any instruction classified as a load marks its destination registers as free in the memory pipeline. Because these instructions didn't use the memory pipeline, they never freed their destination registers, which led to a deadlock. This patch explicitly calls the function used to free the destination registers in the execute() method of those Load instructions that don't use the memory pipeline. Change-Id: Ic2ac2e232c8fbad63d0c62c1862f2bdaeaba4edf Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/48019 Reviewed-by: Matt Sinclair Reviewed-by: Bobby R. Bruce Maintainer: Matt Sinclair Tested-by: kokoro --- M src/arch/amdgpu/gcn3/insts/instructions.cc 1 file changed, 27 insertions(+), 0 deletions(-) Approvals: Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved Bobby R. Bruce: Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc index a421454..21ab58d 100644 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -32397,6 +32397,15 @@ } vdst.write(); + +/** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ +wf->computeUnit->vrf[wf->simdId]-> +scheduleWriteOperandsFromLoad(wf, gpuDynInst); } // execute // --- Inst_DS__DS_PERMUTE_B32 class methods --- @@ -32468,6 +32477,15 @@ wf->decLGKMInstsIssued(); wf->rdLmReqsInPipe--; wf->validateRequestCounters(); + +/** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ +wf->computeUnit->vrf[wf->simdId]-> +scheduleWriteOperandsFromLoad(wf, gpuDynInst); } // execute // --- Inst_DS__DS_BPERMUTE_B32 class methods --- @@ -32539,6 +32557,15 @@ wf->decLGKMInstsIssued(); wf->rdLmReqsInPipe--; wf->validateRequestCounters(); + +/** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ +wf->computeUnit->vrf[wf->simdId]-> +scheduleWriteOperandsFromLoad(wf, gpuDynInst); } // execute // --- Inst_DS__DS_ADD_U64 class methods --- -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/48019 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-1 Gerrit-Change-Id: Ic2ac2e232c8fbad63d0c62c1862f2bdaeaba4edf Gerrit-Change-Number: 48019 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alex Dutu Gerrit-Reviewer: Bobby R. Bruce Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: gpu-compute: Fix off-by-one when creating an AddrRange
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/48020 ) Change subject: gpu-compute: Fix off-by-one when creating an AddrRange .. gpu-compute: Fix off-by-one when creating an AddrRange The end value of an AddrRange is already not included in the range, so subtracting one from the end creates an off-by-one error. This patch removes the extra -1 that was used when determining the end of an AddrRange in allocateGpuVma Change-Id: I75659e9a7fabd991bb37be9aa40f8e409eb21154 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/48020 Reviewed-by: Matt Sinclair Reviewed-by: Bobby R. Bruce Maintainer: Matt Sinclair Tested-by: kokoro --- M src/gpu-compute/gpu_compute_driver.cc 1 file changed, 1 insertion(+), 1 deletion(-) Approvals: Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved Bobby R. Bruce: Looks good to me, approved kokoro: Regressions pass diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index 92ac641..f794b43 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -985,7 +985,7 @@ GPUComputeDriver::allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length) { -AddrRange range = AddrRange(start, start + length - 1); +AddrRange range = AddrRange(start, start + length); DPRINTF(GPUDriver, "Registering [%p - %p] with MTYPE %d\n", range.start(), range.end(), mtype); fatal_if(gpuVmas.insert(range, mtype) == gpuVmas.end(), -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/48020 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-1 Gerrit-Change-Id: I75659e9a7fabd991bb37be9aa40f8e409eb21154 Gerrit-Change-Number: 48020 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alex Dutu Gerrit-Reviewer: Bobby R. Bruce Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: gpu-compute: Fix off-by-one when creating an AddrRange
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/48020 ) Change subject: gpu-compute: Fix off-by-one when creating an AddrRange .. gpu-compute: Fix off-by-one when creating an AddrRange The end value of an AddrRange is already not included in the range, so subtracting one from the end creates an off-by-one error. This patch removes the extra -1 that was used when determining the end of an AddrRange in allocateGpuVma Change-Id: I75659e9a7fabd991bb37be9aa40f8e409eb21154 --- M src/gpu-compute/gpu_compute_driver.cc 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index 92ac641..f794b43 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -985,7 +985,7 @@ GPUComputeDriver::allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length) { -AddrRange range = AddrRange(start, start + length - 1); +AddrRange range = AddrRange(start, start + length); DPRINTF(GPUDriver, "Registering [%p - %p] with MTYPE %d\n", range.start(), range.end(), mtype); fatal_if(gpuVmas.insert(range, mtype) == gpuVmas.end(), -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/48020 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-1 Gerrit-Change-Id: I75659e9a7fabd991bb37be9aa40f8e409eb21154 Gerrit-Change-Number: 48020 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[release-staging-v21-1]: arch-gcn3: Free dest registers in non-memory Load DS insts
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/48019 ) Change subject: arch-gcn3: Free dest registers in non-memory Load DS insts .. arch-gcn3: Free dest registers in non-memory Load DS insts Certain DS insts are classfied as Loads, but don't actually go through the memory pipeline. However, any instruction classified as a load marks its destination registers as free in the memory pipeline. Because these instructions didn't use the memory pipeline, they never freed their destination registers, which led to a deadlock. This patch explicitly calls the function used to free the destination registers in the execute() method of those Load instructions that don't use the memory pipeline. Change-Id: Ic2ac2e232c8fbad63d0c62c1862f2bdaeaba4edf --- M src/arch/amdgpu/gcn3/insts/instructions.cc 1 file changed, 27 insertions(+), 0 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc index a421454..21ab58d 100644 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -32397,6 +32397,15 @@ } vdst.write(); + +/** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ +wf->computeUnit->vrf[wf->simdId]-> +scheduleWriteOperandsFromLoad(wf, gpuDynInst); } // execute // --- Inst_DS__DS_PERMUTE_B32 class methods --- @@ -32468,6 +32477,15 @@ wf->decLGKMInstsIssued(); wf->rdLmReqsInPipe--; wf->validateRequestCounters(); + +/** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ +wf->computeUnit->vrf[wf->simdId]-> +scheduleWriteOperandsFromLoad(wf, gpuDynInst); } // execute // --- Inst_DS__DS_BPERMUTE_B32 class methods --- @@ -32539,6 +32557,15 @@ wf->decLGKMInstsIssued(); wf->rdLmReqsInPipe--; wf->validateRequestCounters(); + +/** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ +wf->computeUnit->vrf[wf->simdId]-> +scheduleWriteOperandsFromLoad(wf, gpuDynInst); } // execute // --- Inst_DS__DS_ADD_U64 class methods --- -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/48019 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: release-staging-v21-1 Gerrit-Change-Id: Ic2ac2e232c8fbad63d0c62c1862f2bdaeaba4edf Gerrit-Change-Number: 48019 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Check for WAX dependences
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47539 ) Change subject: gpu-compute: Check for WAX dependences .. gpu-compute: Check for WAX dependences This adds checking if the destination registers are free or busy in the operandsReady() function for both scalar and vector registers. This allows us to catch WAX dependences between instructions. Change-Id: I0fb0b29e9608fca0d90c059422d4d9500d5b2a7d --- M src/gpu-compute/scalar_register_file.cc M src/gpu-compute/vector_register_file.cc 2 files changed, 22 insertions(+), 0 deletions(-) diff --git a/src/gpu-compute/scalar_register_file.cc b/src/gpu-compute/scalar_register_file.cc index 52e0a2f..3a00093 100644 --- a/src/gpu-compute/scalar_register_file.cc +++ b/src/gpu-compute/scalar_register_file.cc @@ -64,6 +64,17 @@ } } +for (const auto& dstScalarOp : ii->dstScalarRegOperands()) { +for (const auto& physIdx : dstScalarOp.physIndices()) { +if (regBusy(physIdx)) { +DPRINTF(GPUSRF, "WAX stall: WV[%d]: %s: physReg[%d]\n", +w->wfDynId, ii->disassemble(), physIdx); +w->stats.numTimesBlockedDueWAXDependencies++; +return false; +} +} +} + return true; } diff --git a/src/gpu-compute/vector_register_file.cc b/src/gpu-compute/vector_register_file.cc index dc5434d..2355643 100644 --- a/src/gpu-compute/vector_register_file.cc +++ b/src/gpu-compute/vector_register_file.cc @@ -71,6 +71,17 @@ } } +for (const auto& dstVecOp : ii->dstVecRegOperands()) { +for (const auto& physIdx : dstVecOp.physIndices()) { +if (regBusy(physIdx)) { +DPRINTF(GPUVRF, "WAX stall: WV[%d]: %s: physReg[%d]\n", +w->wfDynId, ii->disassemble(), physIdx); +w->stats.numTimesBlockedDueWAXDependencies++; +return false; +} +} +} + return true; } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47539 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I0fb0b29e9608fca0d90c059422d4d9500d5b2a7d Gerrit-Change-Number: 47539 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-vega: Add fatal when decoding missing insts
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47522 ) Change subject: arch-vega: Add fatal when decoding missing insts .. arch-vega: Add fatal when decoding missing insts Certain instructions don't have implementations in instructions.cc, and get decoded as a nullptr. This adds a fatal when decoding a missing instruction, as we aren't able to properly run a program if all its instructions aren't implemented, and it allows us to figure out which instruction i missing due to fatals printing the line they were called. Change-Id: I7e3690f079b790dceee102063773d5fbbc8619f1 --- M src/arch/amdgpu/vega/decoder.cc 1 file changed, 229 insertions(+), 0 deletions(-) diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc index 480d326..94035f6 100644 --- a/src/arch/amdgpu/vega/decoder.cc +++ b/src/arch/amdgpu/vega/decoder.cc @@ -4437,6 +4437,7 @@ GPUStaticInst* Decoder::decode_OP_SOP2__S_MUL_HI_U32(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } @@ -4449,42 +4450,49 @@ GPUStaticInst* Decoder::decode_OP_SOP2__S_LSHL1_ADD_U32(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OP_SOP2__S_LSHL2_ADD_U32(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OP_SOP2__S_LSHL3_ADD_U32(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OP_SOP2__S_LSHL4_ADD_U32(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OP_SOP2__S_PACK_LL_B32_B16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OP_SOP2__S_PACK_LH_B32_B16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OP_SOP2__S_HH_B32_B16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } @@ -4611,6 +4619,7 @@ GPUStaticInst* Decoder::decode_OP_SOPK__S_CALL_B64(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } @@ -6831,108 +6840,126 @@ GPUStaticInst* Decoder::decode_OPU_VOP3__V_MAD_U32_U16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MAD_I32_I16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_XAD_U32(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MIN3_F16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MIN3_I16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MIN3_U16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MAX3_F16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MAX3_I16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MAX3_U16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MED3_F16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MED3_I16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MED3_U16(MachInst iFmt) { +fatal("Trying to decode instruction without a class\n"); return nullptr; } GPUStaticInst* Decoder::decode
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: Ignore mbind syscall
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47526 ) Change subject: arch-x86: Ignore mbind syscall .. arch-x86: Ignore mbind syscall mbind gets called when running with a dGPU in ROCm 4, but we are able to ignore it without breaking anything Change-Id: I7c1ba47656122a5eb856981dca2a05359098e3b2 --- M src/arch/x86/linux/syscall_tbl64.cc 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index 7231595..6f2fad5 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -281,7 +281,7 @@ { 234, "tgkill", tgkillFunc }, { 235, "utimes" }, { 236, "vserver" }, -{ 237, "mbind" }, +{ 237, "mbind", ignoreFunc }, { 238, "set_mempolicy" }, { 239, "get_mempolicy", ignoreFunc }, { 240, "mq_open" }, -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47526 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I7c1ba47656122a5eb856981dca2a05359098e3b2 Gerrit-Change-Number: 47526 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Add mmap functionality to GPURenderDriver
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47523 ) Change subject: gpu-compute: Add mmap functionality to GPURenderDriver .. gpu-compute: Add mmap functionality to GPURenderDriver dGPUs mmap the GPURenderDriver, however it doesn't appear that they do anything with it. This patch implements the mmap function by just returning the address provided, while not doing anything else Change-Id: Ia010a2aebcf7e2c75e22d93dfb440937d1bef3b1 --- M src/gpu-compute/gpu_render_driver.cc M src/gpu-compute/gpu_render_driver.hh 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/gpu-compute/gpu_render_driver.cc b/src/gpu-compute/gpu_render_driver.cc index 1af83cb..41730d2 100644 --- a/src/gpu-compute/gpu_render_driver.cc +++ b/src/gpu-compute/gpu_render_driver.cc @@ -38,7 +38,7 @@ /* ROCm 4 utilizes the render driver located at /dev/dri/renderDXXX. This * patch implements a very simple driver that just returns a file - * descriptor when opened, as testing has shown that's all that's needed + * descriptor when opened. */ int GPURenderDriver::open(ThreadContext *tc, int mode, int flags) @@ -48,3 +48,12 @@ int tgt_fd = process->fds->allocFD(device_fd_entry); return tgt_fd; } + +/* DGPUs try to mmap the driver file. It doesn't appear they do anything + * with it, so we just return the address that's provided + */ +Addr GPURenderDriver::mmap(ThreadContext *tc, Addr start, uint64_t length, + int prot, int tgt_flags, int tgt_fd, off_t offset) +{ +return start; +} diff --git a/src/gpu-compute/gpu_render_driver.hh b/src/gpu-compute/gpu_render_driver.hh index d070668..a992976 100644 --- a/src/gpu-compute/gpu_render_driver.hh +++ b/src/gpu-compute/gpu_render_driver.hh @@ -47,6 +47,9 @@ { return -EBADF; } + +Addr mmap(ThreadContext *tc, Addr start, uint64_t length, + int prot, int tgt_flags, int tgt_fd, off_t offset) override; }; #endif -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47523 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ia010a2aebcf7e2c75e22d93dfb440937d1bef3b1 Gerrit-Change-Number: 47523 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: configs: Don't report CPU cores on Fiji properties
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47525 ) Change subject: configs: Don't report CPU cores on Fiji properties .. configs: Don't report CPU cores on Fiji properties ROCm determines if a device is a dGPU in two ways. The first is by looking at the device ID. The second is through a flag that gets set only if the reported cpu_cores_count is 0. If these don't agree, ROCm breaks when doing memory operations. Previously, cpu_cores_count was non-zero on the Fiji config. This patch sets it to 0 to appease ROCm Change-Id: I0fd0ce724f491ed6a4598188b3799468668585f4 --- M configs/example/hsaTopology.py 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py index 78193e0..28060cc 100644 --- a/configs/example/hsaTopology.py +++ b/configs/example/hsaTopology.py @@ -359,7 +359,7 @@ file_append((io_dir, 'properties'), io_prop) # Populate GPU node properties -node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \ +node_prop = 'cpu_cores_count 0\n' + \ 'simd_count %s\n' \ % (options.num_compute_units * options.simds_per_cu)+ \ 'mem_banks_count 1\n' + \ -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47525 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I0fd0ce724f491ed6a4598188b3799468668585f4 Gerrit-Change-Number: 47525 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: configs: Add shared_cpu_list to cache directories
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47524 ) Change subject: configs: Add shared_cpu_list to cache directories .. configs: Add shared_cpu_list to cache directories The ROCm thunk uses this file instead of the shared_cpu_map file. Change-Id: I985512245c9f51106b8347412ed643f78b567b24 --- M configs/common/FileSystemConfig.py 1 file changed, 2 insertions(+), 0 deletions(-) diff --git a/configs/common/FileSystemConfig.py b/configs/common/FileSystemConfig.py index 0d9f221..66a6315 100644 --- a/configs/common/FileSystemConfig.py +++ b/configs/common/FileSystemConfig.py @@ -217,6 +217,8 @@ file_append((indexdir, 'number_of_sets'), num_sets) file_append((indexdir, 'physical_line_partition'), '1') file_append((indexdir, 'shared_cpu_map'), hex_mask(cpus)) +file_append((indexdir, 'shared_cpu_list'), +','.join(str(cpu) for cpu in cpus)) def _redirect_paths(options): # Redirect filesystem syscalls from src to the first matching dests -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47524 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I985512245c9f51106b8347412ed643f78b567b24 Gerrit-Change-Number: 47524 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Update GET_PROCESS_APERTURES IOCTLs
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47529 ) Change subject: gpu-compute: Update GET_PROCESS_APERTURES IOCTLs .. gpu-compute: Update GET_PROCESS_APERTURES IOCTLs The apertures for non-gfx801 GPUs are set differently. If the apertures aren't set properly, ROCm will error out. This updates the GPUVM apertures, as it is the one that ROCm explicitly checks (WIP) Change-Id: I1fa6f60bc20c7b6eb3896057841d96846460a9f8 --- M src/gpu-compute/gpu_compute_driver.cc 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index 02f1de5..7edbbdb 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -322,9 +322,15 @@ args->process_apertures[i].lds_limit = ldsApeLimit(args->process_apertures[i].lds_base); +if (isdGPU) { +args->process_apertures[i].gpuvm_base = 0x100ull; +args->process_apertures[i].gpuvm_limit = +0x8000ULL - 1; +} else { args->process_apertures[i].gpuvm_base = gpuVmApeBase(i + 1); args->process_apertures[i].gpuvm_limit = gpuVmApeLimit(args->process_apertures[i].gpuvm_base); +} // NOTE: Must match ID populated by hsaTopology.py // @@ -393,14 +399,6 @@ 47) != 0x1); assert(bits(args->process_apertures[i].lds_limit, 63, 47) != 0); -assert(bits(args->process_apertures[i].gpuvm_base, 63, - 47) != 0x1); -assert(bits(args->process_apertures[i].gpuvm_base, 63, - 47) != 0); -assert(bits(args->process_apertures[i].gpuvm_limit, 63, - 47) != 0x1); -assert(bits(args->process_apertures[i].gpuvm_limit, 63, - 47) != 0); } args.copyOut(virt_proxy); @@ -595,8 +593,15 @@ scratchApeLimit(ape_args->scratch_base); ape_args->lds_base = ldsApeBase(i + 1); ape_args->lds_limit = ldsApeLimit(ape_args->lds_base); -ape_args->gpuvm_base = gpuVmApeBase(i + 1); -ape_args->gpuvm_limit = gpuVmApeLimit(ape_args->gpuvm_base); +if (isdGPU) { +ape_args->gpuvm_base = 0x100ull; +ape_args->gpuvm_limit = 0x8000ULL - 1; +} else { +ape_args->gpuvm_base = gpuVmApeBase(i + 1); +ape_args->gpuvm_limit = +gpuVmApeLimit(ape_args->gpuvm_base); +} + // NOTE: Must match ID populated by hsaTopology.py if (isdGPU) { @@ -628,10 +633,6 @@ assert(bits(ape_args->lds_base, 63, 47) != 0); assert(bits(ape_args->lds_limit, 63, 47) != 0x1); assert(bits(ape_args->lds_limit, 63, 47) != 0); -assert(bits(ape_args->gpuvm_base, 63, 47) != 0x1); -assert(bits(ape_args->gpuvm_base, 63, 47) != 0); -assert(bits(ape_args->gpuvm_limit, 63, 47) != 0x1); -assert(bits(ape_args->gpuvm_limit, 63, 47) != 0); ape_args.copyOut(virt_proxy); } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47529 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I1fa6f60bc20c7b6eb3896057841d96846460a9f8 Gerrit-Change-Number: 47529 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: configs,gpu-compute: Add support for gfx902/Raven
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47530 ) Change subject: configs,gpu-compute: Add support for gfx902/Raven .. configs,gpu-compute: Add support for gfx902/Raven This patch adds support for a gfx902 Vega APU, ripping the appropriate values for device_id from the ROCm Thunk (src/topology.c) Note: gfx902 isn't officially supported by ROCm. This means that it may not work for all programs. In particular, rocBLAS is incompatible with gfx902, so anything that uses rocBLAS won't be able to run with gfx902 Change-Id: I48893e7cc9c7e52275fdfd22314f371a9db8e90a --- M configs/example/apu_se.py M configs/example/hsaTopology.py M src/gpu-compute/GPU.py M src/gpu-compute/gpu_compute_driver.cc M src/gpu-compute/gpu_compute_driver.hh 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index 8d49865..1e78f27 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -189,7 +189,7 @@ "between 0-7") parser.add_argument("--gfx-version", type=str, default='gfx801', -help="Gfx version for gpu: gfx801, gfx803, gfx900") +help="Gfx version for gpu: gfx801, gfx803, gfx900, gfx902") Ruby.define_options(parser) @@ -682,7 +682,7 @@ elif args.gfx_version == 'gfx900': hsaTopology.createVegaTopology(args) else: -assert (args.gfx_version in ['gfx801']),\ +assert (args.gfx_version in ['gfx801', 'gfx902']),\ "Incorrect gfx version for APU" hsaTopology.createCarrizoTopology(args) diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py index da3bc57..7996a83 100644 --- a/configs/example/hsaTopology.py +++ b/configs/example/hsaTopology.py @@ -427,13 +427,21 @@ file_append((node_dir, 'gpu_id'), 2765) # must have marketing name -file_append((node_dir, 'name'), 'Carrizo\n') +if options.gfx_version == 'gfx801': +file_append((node_dir, 'name'), 'Carrizo\n') +elif options.gfx_version == 'gfx902': +file_append((node_dir, 'name'), 'Raven\n') mem_banks_cnt = 1 # Should be the same as the render driver filename (dri/renderD) drm_num = 128 +if options.gfx_version == 'gfx801': +device_id = 39028 +elif options.gfx_version == 'gfx902': +device_id = 5597 + # populate global node properties # NOTE: SIMD count triggers a valid GPU agent creation node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \ @@ -454,7 +462,7 @@ 'simd_per_cu %s\n' % options.simds_per_cu + \ 'max_slots_scratch_cu 32\n' + \ 'vendor_id 4098\n' + \ -'device_id 39028\n' + \ +'device_id %s\n' % device_id+ \ 'location_id 8\n' + \ 'drm_render_minor %s\n' % drm_num + \ 'max_engine_clk_fcompute %s\n' \ diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index 107899e..e07f180 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -52,6 +52,7 @@ 'gfx801', 'gfx803', 'gfx900', +'gfx902', ] class PoolManager(SimObject): diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index 7edbbdb..f39576e 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -322,7 +322,7 @@ args->process_apertures[i].lds_limit = ldsApeLimit(args->process_apertures[i].lds_base); -if (isdGPU) { +if (isdGPU || gfxVersion == GfxVersion::gfx902) { args->process_apertures[i].gpuvm_base = 0x100ull; args->process_apertures[i].gpuvm_limit = 0x8000ULL - 1; @@ -355,6 +355,7 @@ } else { switch (gfxVersion) { case GfxVersion::gfx801: + case GfxVersion::gfx902: args->process_apertures[i].gpu_id = 2765; break; default: @@ -593,7 +594,7 @@ scratchApeLimit(ape_args->scratch_base); ape_args->lds_base = ldsApeBase(i + 1); ape_args->lds_limit = ldsApeLimit(ape_args->lds_base); -if (isdGPU) { +if (isdGPU || gfxVersion == GfxVersion::gfx902) { ape_args->gpuvm_base = 0x100ull; ape_args->gpuvm_limit = 0x8000ULL - 1
[gem5-dev] Change in gem5/gem5[develop]: configs,gpu-compute: Set proper dGPUPoolID defaults
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47527 ) Change subject: configs,gpu-compute: Set proper dGPUPoolID defaults .. configs,gpu-compute: Set proper dGPUPoolID defaults In GPU.py, dGPUPoolID is defined as an int, but was defaulted to False. Explicitly set it to 0, instead. In apu_se.py, dGPUPoolID was being set to 1, but that was resulting in crashes. Setting it to 0 avoids those crashes Change-Id: I0f1161588279a335bbd0d8ae7acda97fc23201b5 --- M configs/example/apu_se.py M src/gpu-compute/GPU.py 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index 98a1e19..8d49865 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -434,7 +434,7 @@ # HSA kernel mode driver gpu_driver = GPUComputeDriver(filename = "kfd", isdGPU = args.dgpu, gfxVersion = args.gfx_version, - dGPUPoolID = 1, m_type = args.m_type) + dGPUPoolID = 0, m_type = args.m_type) renderDriNum = 128 render_driver = GPURenderDriver(filename = f'dri/renderD{renderDriNum}') diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index ace83a5..107899e 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -243,7 +243,7 @@ device = Param.GPUCommandProcessor('GPU controlled by this driver') isdGPU = Param.Bool(False, 'Driver is for a dGPU') gfxVersion = Param.GfxVersion('gfx801', 'ISA of gpu to model') -dGPUPoolID = Param.Int(False, 'Pool ID for dGPU.') +dGPUPoolID = Param.Int(0, 'Pool ID for dGPU.') # Default Mtype for caches #-- 1 1 1 C_RW_S (Cached-ReadWrite-Shared) #-- 1 1 0 C_RW_US (Cached-ReadWrite-Unshared) -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47527 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I0f1161588279a335bbd0d8ae7acda97fc23201b5 Gerrit-Change-Number: 47527 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: configs: Set valid heap_type values
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47528 ) Change subject: configs: Set valid heap_type values .. configs: Set valid heap_type values The variables that were used to set heap_type don't exist. Explicitly set them to the proper values. Change-Id: I8df7fca7442f6640be1154ef147c4e302ea491bb --- M configs/example/hsaTopology.py 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py index 28060cc..da3bc57 100644 --- a/configs/example/hsaTopology.py +++ b/configs/example/hsaTopology.py @@ -140,7 +140,7 @@ # CPU memory reporting mem_dir = joinpath(node_dir, 'mem_banks/0') remake_dir(mem_dir) -mem_prop = 'heap_type %s\n' % HsaHeaptype.HSA_HEAPTYPE_SYSTEM.value + \ +mem_prop = 'heap_type 0\n' + \ 'size_in_bytes 33704329216\n'+ \ 'flags 0\n' + \ 'width 72\n' + \ @@ -221,7 +221,7 @@ # TODO: Extract size, clk, and width from sim paramters mem_dir = joinpath(node_dir, 'mem_banks/0') remake_dir(mem_dir) -mem_prop = 'heap_type %s\n' % heap_type.value + \ +mem_prop = 'heap_type 1\n' + \ 'size_in_bytes 17163091968\n'+ \ 'flags 0\n' + \ 'width 2048\n' + \ -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47528 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I8df7fca7442f6640be1154ef147c4e302ea491bb Gerrit-Change-Number: 47528 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-vega: Add decoding for implemented insts
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47521 ) Change subject: arch-vega: Add decoding for implemented insts .. arch-vega: Add decoding for implemented insts Certain instructions were implemented in instructions.cc, but weren't actually being decoded by the decoder, causing the decoder to return nullptr for valid instructions. This patch fixes the decoder to return the proper instruction class for implemented instructions Change-Id: I8d8525a1c435147017cb38d9df8e1675986ef04b --- M src/arch/amdgpu/vega/decoder.cc 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc index 363f7e1..480d326 100644 --- a/src/arch/amdgpu/vega/decoder.cc +++ b/src/arch/amdgpu/vega/decoder.cc @@ -4155,19 +4155,19 @@ GPUStaticInst* Decoder::decode_OP_VOP2__V_ADD_U32(MachInst iFmt) { -return nullptr; +return new Inst_VOP2__V_ADD_U32(&iFmt->iFmt_VOP2); } GPUStaticInst* Decoder::decode_OP_VOP2__V_SUB_U32(MachInst iFmt) { -return nullptr; +return new Inst_VOP2__V_SUB_U32(&iFmt->iFmt_VOP2); } GPUStaticInst* Decoder::decode_OP_VOP2__V_SUBREV_U32(MachInst iFmt) { -return nullptr; +return new Inst_VOP2__V_SUBREV_U32(&iFmt->iFmt_VOP2); } GPUStaticInst* @@ -4443,7 +4443,7 @@ GPUStaticInst* Decoder::decode_OP_SOP2__S_MUL_HI_I32(MachInst iFmt) { -return nullptr; +return new Inst_SOP2__S_MUL_I32(&iFmt->iFmt_SOP2); } GPUStaticInst* @@ -6939,31 +6939,31 @@ GPUStaticInst* Decoder::decode_OPU_VOP3__V_MAD_F16(MachInst iFmt) { -return nullptr; +return new Inst_VOP3__V_MAD_F16(&iFmt->iFmt_VOP3A); } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MAD_U16(MachInst iFmt) { -return nullptr; +return new Inst_VOP3__V_MAD_U16(&iFmt->iFmt_VOP3A); } GPUStaticInst* Decoder::decode_OPU_VOP3__V_MAD_I16(MachInst iFmt) { -return nullptr; +return new Inst_VOP3__V_MAD_I16(&iFmt->iFmt_VOP3A); } GPUStaticInst* Decoder::decode_OPU_VOP3__V_FMA_F16(MachInst iFmt) { -return nullptr; +return new Inst_VOP3__V_FMA_F16(&iFmt->iFmt_VOP3A); } GPUStaticInst* Decoder::decode_OPU_VOP3__V_DIV_FIXUP_F16(MachInst iFmt) { -return nullptr; +return new Inst_VOP3__V_DIV_FIXUP_F16(&iFmt->iFmt_VOP3A); } GPUStaticInst* -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47521 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I8d8525a1c435147017cb38d9df8e1675986ef04b Gerrit-Change-Number: 47521 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-vega: Fix s_endpgm instruction
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47519 ) Change subject: arch-vega: Fix s_endpgm instruction .. arch-vega: Fix s_endpgm instruction Copy over changes that had been made to s_engpgm in GCN3 but weren't added to the Vega implementation Change-Id: I1063f83b1ce8f7c5e451c8c227265715c8f725b9 --- M src/arch/amdgpu/vega/insts/instructions.cc 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index b0a6cb0..74be2cf 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -4134,7 +4134,12 @@ ComputeUnit *cu = gpuDynInst->computeUnit(); // delete extra instructions fetched for completed work-items -wf->instructionBuffer.clear(); +wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1, +wf->instructionBuffer.end()); + +if (wf->pendingFetch) { +wf->dropFetch = true; +} wf->computeUnit->fetchStage.fetchUnit(wf->simdId) .flushBuf(wf->wfSlotId); @@ -4212,8 +4217,11 @@ bool kernelEnd = wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf); +bool relNeeded = +wf->computeUnit->shader->impl_kern_end_rel; + //if it is not a kernel end, then retire the workgroup directly -if (!kernelEnd) { +if (!kernelEnd || !relNeeded) { wf->computeUnit->shader->dispatcher().notifyWgCompl(wf); wf->setStatus(Wavefront::S_STOPPED); wf->computeUnit->completedWGs++; @@ -4229,6 +4237,7 @@ * the complex */ setFlag(MemSync); +setFlag(GlobalSegment); // Notify Memory System of Kernel Completion // Kernel End = isKernel + isMemSync wf->setStatus(Wavefront::S_RETURNING); -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47519 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I1063f83b1ce8f7c5e451c8c227265715c8f725b9 Gerrit-Change-Number: 47519 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-vega: Add missing return to flat_load_dwordx4
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/47520 ) Change subject: arch-vega: Add missing return to flat_load_dwordx4 .. arch-vega: Add missing return to flat_load_dwordx4 Change-Id: Ibf56c25a3d22d3c12ae2c1bb11f00f4a44b5919a --- M src/arch/amdgpu/vega/insts/instructions.cc 1 file changed, 1 insertion(+), 0 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 74be2cf..793bdce 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -42981,6 +42981,7 @@ if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); wf->decLGKMInstsIssued(); +return; } gpuDynInst->execUnitId = wf->execUnitId; -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47520 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ibf56c25a3d22d3c12ae2c1bb11f00f4a44b5919a Gerrit-Change-Number: 47520 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Change certain IOCTL errors to warnings
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46247 ) Change subject: gpu-compute: Change certain IOCTL errors to warnings .. gpu-compute: Change certain IOCTL errors to warnings There are certain IOCTL errors that were triggering with the change to ROCm 4, however they could be set to warnings without causing any errors in the program Change-Id: Ie0052267f3ccfbdbadb90249b6f19e6a1205f57e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46247 Tested-by: kokoro Reviewed-by: Matthew Poremba Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair --- M src/gpu-compute/gpu_compute_driver.cc 1 file changed, 3 insertions(+), 3 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index 7f8cc16..12e537c 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -417,7 +417,7 @@ TypedBufferArg args(ioc_buf); args.copyIn(virt_proxy); if (args->event_type != KFD_IOC_EVENT_SIGNAL) { -fatal("Signal events are only supported currently\n"); +warn("Signal events are only supported currently\n"); } else if (eventSlotIndex == SLOTS_PER_PAGE) { fatal("Signal event wasn't created; signal limit reached\n"); } @@ -508,8 +508,8 @@ "\tamdkfd wait for event %d\n", EventData->event_id); panic_if(ETable.count(EventData->event_id) == 0, "Event ID invalid, cannot set this event\n"); -panic_if(ETable[EventData->event_id].threadWaiting, - "Multiple threads waiting on the same event\n"); +if (ETable[EventData->event_id].threadWaiting) + warn("Multiple threads waiting on the same event\n"); if (ETable[EventData->event_id].setEvent) { // If event is already set, the event has already happened. // Just unset the event and dont put this thread to sleep. 3 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46247 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ie0052267f3ccfbdbadb90249b6f19e6a1205f57e Gerrit-Change-Number: 46247 Gerrit-PatchSet: 8 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alex Dutu Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: configs,gpu-compute: Add render driver needed for ROCm 4
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46244 ) Change subject: configs,gpu-compute: Add render driver needed for ROCm 4 .. configs,gpu-compute: Add render driver needed for ROCm 4 ROCm 4 utilizes the render driver located at /dev/dri/renderDXXX. This patch implements a very simple driver that just returns a file descriptor when opened, as testing has shown that's all that's needed Change-Id: I65602346cbf17b2dc80e114046ebf5c9830a1507 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46244 Tested-by: kokoro Reviewed-by: Jason Lowe-Power Reviewed-by: Matt Sinclair Maintainer: Jason Lowe-Power Maintainer: Matt Sinclair --- M configs/example/apu_se.py M configs/example/hsaTopology.py M src/gpu-compute/GPU.py M src/gpu-compute/SConscript A src/gpu-compute/gpu_render_driver.cc A src/gpu-compute/gpu_render_driver.hh 6 files changed, 124 insertions(+), 2 deletions(-) Approvals: Jason Lowe-Power: Looks good to me, but someone else must approve; Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index f779df3..98a1e19 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -436,6 +436,9 @@ gfxVersion = args.gfx_version, dGPUPoolID = 1, m_type = args.m_type) +renderDriNum = 128 +render_driver = GPURenderDriver(filename = f'dri/renderD{renderDriNum}') + # Creating the GPU kernel launching components: that is the HSA # packet processor (HSAPP), GPU command processor (CP), and the # dispatcher. @@ -498,7 +501,8 @@ "HSA_ENABLE_SDMA=0"] process = Process(executable = executable, cmd = [args.cmd] - + args.options.split(), drivers = [gpu_driver], env = env) + + args.options.split(), + drivers = [gpu_driver, render_driver], env = env) for cpu in cpu_list: cpu.createThreads() diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py index 78fe1f7..78193e0 100644 --- a/configs/example/hsaTopology.py +++ b/configs/example/hsaTopology.py @@ -156,6 +156,9 @@ file_append((node_dir, 'gpu_id'), 22124) file_append((node_dir, 'name'), 'Vega\n') +# Should be the same as the render driver filename (dri/renderD) +drm_num = 128 + # 96 in real Vega # Random comment for comparison purposes caches = 0 @@ -200,7 +203,7 @@ 'vendor_id 4098\n' + \ 'device_id 26720\n' + \ 'location_id 1024\n'+ \ -'drm_render_minor 128\n'+ \ +'drm_render_minor %s\n' % drm_num + \ 'hive_id 0\n' + \ 'num_sdma_engines 2\n' + \ 'num_sdma_xgmi_engines 0\n' + \ @@ -329,6 +332,9 @@ file_append((node_dir, 'gpu_id'), 50156) file_append((node_dir, 'name'), 'Fiji\n') +# Should be the same as the render driver filename (dri/renderD) +drm_num = 128 + # Real Fiji shows 96, but building that topology is complex and doesn't # appear to be required for anything. caches = 0 @@ -373,6 +379,7 @@ 'vendor_id 4098\n' + \ 'device_id 29440\n' + \ 'location_id 512\n' + \ +'drm_render_minor %s\n' % drm_num + \ 'max_engine_clk_fcompute %s\n' \ % int(toFrequency(options.gpu_clock) / 1e6) + \ 'local_mem_size 4294967296\n' + \ @@ -424,6 +431,9 @@ mem_banks_cnt = 1 +# Should be the same as the render driver filename (dri/renderD) +drm_num = 128 + # populate global node properties # NOTE: SIMD count triggers a valid GPU agent creation node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \ @@ -446,6 +456,7 @@ 'vendor_id 4098\n' + \ 'device_id 39028\n' + \ 'location_id 8\n' + \ +'drm_render_minor %s\n' % drm_num + \ 'max_engine_clk_fcompute %s\n' \ % int(toFrequency(options.gpu_clock) / 1e6) + \ 'local_mem_size 0\n'
[gem5-dev] Change in gem5/gem5[develop]: dev-hsa,gpu-compute: IOCTL updates for ROCm 4
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46246 ) Change subject: dev-hsa,gpu-compute: IOCTL updates for ROCm 4 .. dev-hsa,gpu-compute: IOCTL updates for ROCm 4 This change copies over the up-to-date kfd_ioctl.h file from the linux kernel, and updates the gpu_compute_driver to reflect the changes found in the new version of the kfd_ioctl.h file Change-Id: I51e8e7158762f4b7e06c0f84507e5889a17939a2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46246 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- M src/dev/hsa/kfd_ioctl.h M src/gpu-compute/gpu_compute_driver.cc 2 files changed, 310 insertions(+), 275 deletions(-) Approvals: Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/dev/hsa/kfd_ioctl.h b/src/dev/hsa/kfd_ioctl.h index 504621c..7099851 100644 --- a/src/dev/hsa/kfd_ioctl.h +++ b/src/dev/hsa/kfd_ioctl.h @@ -23,13 +23,16 @@ #ifndef KFD_IOCTL_H_INCLUDED #define KFD_IOCTL_H_INCLUDED +#include #include #include -#include - +/* + * - 1.1 - initial version + * - 1.3 - Add SMI events support + */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 2 +#define KFD_IOCTL_MINOR_VERSION 3 struct kfd_ioctl_get_version_args { @@ -41,6 +44,7 @@ #define KFD_IOC_QUEUE_TYPE_COMPUTE 0 #define KFD_IOC_QUEUE_TYPE_SDMA1 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2 +#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI3 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -89,6 +93,15 @@ uint64_t cu_mask_ptr; /* to KFD */ }; +struct kfd_ioctl_get_queue_wave_state_args +{ +uint64_t ctl_stack_address; /* to KFD */ +uint32_t ctl_stack_used_size; /* from KFD */ +uint32_t save_area_used_size; /* from KFD */ +uint32_t queue_id; /* to KFD */ +uint32_t pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -104,14 +117,6 @@ uint32_t pad; }; -struct kfd_ioctl_set_trap_handler_args -{ - uint64_t tba_addr; - uint64_t tma_addr; - uint32_t gpu_id;/* to KFD */ - uint32_t pad; -}; - /* * All counters are monotonic. They are used for profiling of compute jobs. * The profiling is done by userspace. @@ -130,8 +135,6 @@ uint32_t pad; }; -#define NUM_OF_SUPPORTED_GPUS 7 - struct kfd_process_device_apertures { uint64_t lds_base; /* from KFD */ @@ -144,10 +147,12 @@ uint32_t pad; }; -/* This IOCTL and the limited NUM_OF_SUPPORTED_GPUS is deprecated. Use - * kfd_ioctl_get_process_apertures_new instead, which supports - * arbitrary numbers of GPUs. +/* + * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use + * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an + * unlimited number of GPUs. */ +#define NUM_OF_SUPPORTED_GPUS 7 struct kfd_ioctl_get_process_apertures_args { struct kfd_process_device_apertures @@ -217,14 +222,21 @@ #define KFD_IOC_WAIT_RESULT_TIMEOUT1 #define KFD_IOC_WAIT_RESULT_FAIL 2 -/* - * The added 512 is because, currently, 8*(4096/256) signal events are - * reserved for debugger events, and we want to provide at least 4K signal - * events for EOP usage. - * We add 512 to make the allocated size (KFD_SIGNAL_EVENT_LIMIT * 8) be - * page aligned. - */ -#define KFD_SIGNAL_EVENT_LIMIT (4096 + 512) +#define KFD_SIGNAL_EVENT_LIMIT 4096 + +/* For kfd_event_data.hw_exception_data.reset_type. */ +#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET0 +#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1 + +/* For kfd_event_data.hw_exception_data.reset_cause. */ +#define KFD_HW_EXCEPTION_GPU_HANG 0 +#define KFD_HW_EXCEPTION_ECC1 + +/* For kfd_hsa_memory_exception_data.ErrorType */ +#define KFD_MEM_ERR_NO_RAS 0 +#define KFD_MEM_ERR_SRAM_ECC1 +#define KFD_MEM_ERR_POISON_CONSUMED 2 +#define KFD_MEM_ERR_GPU_HANG3 struct kfd_ioctl_create_event_args { @@ -267,22 +279,38 @@ /* memory exception data */ struct kfd_hsa_memory_exception_data { - struct kfd_memory_exception_failure failure; - uint64_t va; - uint32_t gpu_id; - uint32_t pad; +struct kfd_memory_exception_failure failure; +uint64_t va; +uint32_t gpu_id; +uint32_t ErrorType; /* 0 = no RAS error, + * 1 = ECC_SRAM, + * 2 = Link_SYNFLOOD (poison), + * 3 = GPU hang(not attributable to a specific cause), + * other values reserved + */ +}; + +/* hw exception data */ +struct kfd_hsa_hw_e
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Ignore GPU kernel names
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46245 ) Change subject: gpu-compute: Ignore GPU kernel names .. gpu-compute: Ignore GPU kernel names ROCm 4 seems to have updated the akc, and the only real issue that has occured is that we're no longer able to read kernel names in the same way as we were in ROCm 1.6. This patch removes the prior method of reading kernel names and gives all kernels a temporary name Change-Id: I0040e0cf4cd35d6f56ded6a8acfb10c600bcc77a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46245 Tested-by: kokoro Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair --- M src/gpu-compute/gpu_command_processor.cc 1 file changed, 1 insertion(+), 5 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index 9bdd0b9..78b3235 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -171,7 +171,6 @@ DPRINTF(GPUCommandProc, "Machine code starts at addr: %#x\n", machine_code_addr); -Addr kern_name_addr(0); std::string kernel_name; /** @@ -184,10 +183,7 @@ * host memory. I have no idea what BLIT stands for. * */ if (akc.runtime_loader_kernel_symbol) { -virt_proxy.readBlob(akc.runtime_loader_kernel_symbol + 0x10, -(uint8_t*)&kern_name_addr, 0x8); - -virt_proxy.readString(kernel_name, kern_name_addr); +kernel_name = "Some kernel"; } else { kernel_name = "Blit kernel"; } 3 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46245 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I0040e0cf4cd35d6f56ded6a8acfb10c600bcc77a Gerrit-Change-Number: 46245 Gerrit-PatchSet: 5 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alex Dutu Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: build with getdents64 if system supports it
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46242 ) Change subject: arch-x86: build with getdents64 if system supports it .. arch-x86: build with getdents64 if system supports it This patch makes it so the getdents64 syscall is built in gem5 if the underlying host implements the syscall, similar to how the getdents syscall is implemented. The implementation for getdents64 already existed Change-Id: I73b22c8df8df994f3f720e848a7d4f8cd31d318e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46242 Tested-by: kokoro Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Reviewed-by: Alex Dutu Maintainer: Matt Sinclair --- M src/arch/x86/linux/syscall_tbl32.cc M src/arch/x86/linux/syscall_tbl64.cc 2 files changed, 8 insertions(+), 0 deletions(-) Approvals: Alex Dutu: Looks good to me, approved Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/x86/linux/syscall_tbl32.cc b/src/arch/x86/linux/syscall_tbl32.cc index 50d0969..db70151 100644 --- a/src/arch/x86/linux/syscall_tbl32.cc +++ b/src/arch/x86/linux/syscall_tbl32.cc @@ -261,7 +261,11 @@ { 218, "mincore" }, { 219, "madvise", ignoreFunc }, { 220, "madvise1" }, +#if defined(SYS_getdents64) +{ 221, "getdents64", getdents64Func }, +#else { 221, "getdents64" }, +#endif { 222, "fcntl64" }, { 223, "unused" }, { 224, "gettid", gettidFunc }, diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index be82437..94837cd 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -257,7 +257,11 @@ { 214, "epoll_ctl_old" }, { 215, "epoll_wait_old" }, { 216, "remap_file_pages" }, +#if defined(SYS_getdents64) +{ 217, "getdents64", getdents64Func }, +#else { 217, "getdents64" }, +#endif { 218, "set_tid_address", setTidAddressFunc }, { 219, "restart_syscall" }, { 220, "semtimedop" }, 1 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46242 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I73b22c8df8df994f3f720e848a7d4f8cd31d318e Gerrit-Change-Number: 46242 Gerrit-PatchSet: 5 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alex Dutu Gerrit-Reviewer: Gabe Black Gerrit-Reviewer: Jason Lowe-Power Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86,sim: Implement sched_getaffinity
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46243 ) Change subject: arch-x86,sim: Implement sched_getaffinity .. arch-x86,sim: Implement sched_getaffinity sched_getaffinity is different from other syscalls in the raw syscall return the size of the cpumask being used to represent the CPU bit mask. Because of this, when a library (libnuma in this case) directly called sched_getaffinity and got a return value of 0, it errored out, thinking that there were no CPUs available. This implementation assumes that all CPUs are available, so it sets all simulated CPUs in the bitmask Change-Id: Id95c919986cc98a411877056256604f57a29f0f9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46243 Tested-by: kokoro Reviewed-by: Matt Sinclair Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- M src/arch/x86/linux/syscall_tbl64.cc M src/sim/syscall_emul.hh 2 files changed, 24 insertions(+), 1 deletion(-) Approvals: Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve kokoro: Regressions pass diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index 94837cd..7231595 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -244,7 +244,7 @@ { 201, "time", timeFunc }, { 202, "futex", futexFunc }, { 203, "sched_setaffinity", ignoreFunc }, -{ 204, "sched_getaffinity", ignoreFunc }, +{ 204, "sched_getaffinity", schedGetaffinityFunc }, { 205, "set_thread_area" }, { 206, "io_setup" }, { 207, "io_destroy" }, diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index cd2d8d1..3c1ad04 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -57,6 +57,7 @@ /// application on the host machine. #if defined(__linux__) +#include #include #include @@ -2603,4 +2604,26 @@ #endif } +/// Target sched_getaffinity +template +SyscallReturn +schedGetaffinityFunc(SyscallDesc *desc, ThreadContext *tc, + pid_t pid, size_t cpusetsize, VPtr<> cpu_set_mask) +{ +#if defined(__linux__) +if (cpusetsize < CPU_ALLOC_SIZE(tc->getSystemPtr()->threads.size())) +return -EINVAL; + +BufferArg maskBuf(cpu_set_mask, cpusetsize); +maskBuf.copyIn(tc->getVirtProxy()); +for (int i = 0; i < tc->getSystemPtr()->threads.size(); i++) { +CPU_SET(i, (cpu_set_t *)maskBuf.bufferPtr()); +} +maskBuf.copyOut(tc->getVirtProxy()); +return CPU_ALLOC_SIZE(tc->getSystemPtr()->threads.size()); +#else +warnUnsupportedOS("sched_getaffinity"); +return -1; +#endif +} #endif // __SIM_SYSCALL_EMUL_HH__ 3 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46243 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Id95c919986cc98a411877056256604f57a29f0f9 Gerrit-Change-Number: 46243 Gerrit-PatchSet: 5 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Gabe Black Gerrit-Reviewer: Jason Lowe-Power Gerrit-Reviewer: Jason Lowe-Power Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: configs: Add mem_banks to Carrizo topology
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46240 ) Change subject: configs: Add mem_banks to Carrizo topology .. configs: Add mem_banks to Carrizo topology ROCm 4 iterates through the mem_banks to find an appropriate place to allocate memory. Previously, Carrizo didn't have any mem_banks, which resulted in the ROCm 4 runtime erroring out, as it didn't know where to allocate memory. The implementation is fairly similar to the implementation used for the Fiji or Vega configs Change-Id: I5bb4e89657d44c6cb690fd224ee1bf1d4d6cf2a5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46240 Tested-by: kokoro Reviewed-by: Matthew Poremba Reviewed-by: Matt Sinclair Reviewed-by: Bobby R. Bruce Maintainer: Matt Sinclair --- M configs/example/hsaTopology.py 1 file changed, 15 insertions(+), 2 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved Bobby R. Bruce: Looks good to me, but someone else must approve kokoro: Regressions pass diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py index 51585de..78fe1f7 100644 --- a/configs/example/hsaTopology.py +++ b/configs/example/hsaTopology.py @@ -36,7 +36,7 @@ from os.path import join as joinpath from os.path import isdir from shutil import rmtree, copyfile -from m5.util.convert import toFrequency +from m5.util.convert import toFrequency, toMemorySize def file_append(path, contents): with open(joinpath(*path), 'a') as f: @@ -422,12 +422,14 @@ # must have marketing name file_append((node_dir, 'name'), 'Carrizo\n') +mem_banks_cnt = 1 + # populate global node properties # NOTE: SIMD count triggers a valid GPU agent creation node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \ 'simd_count %s\n' \ % (options.num_compute_units * options.simds_per_cu)+ \ -'mem_banks_count 0\n' + \ +'mem_banks_count %s\n' % mem_banks_cnt + \ 'caches_count 0\n' + \ 'io_links_count 0\n'+ \ 'cpu_core_id_base 16\n' + \ @@ -453,3 +455,14 @@ % int(toFrequency(options.CPUClock) / 1e6) file_append((node_dir, 'properties'), node_prop) + +for i in range(mem_banks_cnt): +mem_dir = joinpath(node_dir, f'mem_banks/{i}') +remake_dir(mem_dir) + +mem_prop = f'heap_type 0\n' + \ + f'size_in_bytes {toMemorySize(options.mem_size)}'+ \ + f'flags 0\n' + \ + f'width 64\n'+ \ + f'mem_clk_max 1600\n' +file_append((mem_dir, 'properties'), mem_prop) 2 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46240 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I5bb4e89657d44c6cb690fd224ee1bf1d4d6cf2a5 Gerrit-Change-Number: 46240 Gerrit-PatchSet: 4 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alex Dutu Gerrit-Reviewer: Bobby R. Bruce Gerrit-Reviewer: Jason Lowe-Power Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: Ignore certain syscalls called in ROCm 4
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46241 ) Change subject: arch-x86: Ignore certain syscalls called in ROCm 4 .. arch-x86: Ignore certain syscalls called in ROCm 4 fdatasync, sigaltstack, and prctl are called by the ROCm 4 stack, but were unimplemented. Based on testing, we can change these to ignoreFunc without affecting program correctness. sched_yield gets changed to ignoreWarnOnceFunc, as it gets called significantly more in ROCm 4. Change-Id: I566b1d71d989c54bfc559d5b83790dff73a38b28 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46241 Tested-by: kokoro Maintainer: Matt Sinclair Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba --- M src/arch/x86/linux/syscall_tbl64.cc 1 file changed, 4 insertions(+), 4 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index 8630265..be82437 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -60,7 +60,7 @@ { 21, "access", ignoreFunc }, { 22, "pipe", pipeFunc }, { 23, "select", selectFunc }, -{ 24, "sched_yield", ignoreFunc }, +{ 24, "sched_yield", ignoreWarnOnceFunc }, { 25, "mremap", mremapFunc }, { 26, "msync" }, { 27, "mincore" }, @@ -111,7 +111,7 @@ { 72, "fcntl", fcntlFunc }, { 73, "flock" }, { 74, "fsync" }, -{ 75, "fdatasync" }, +{ 75, "fdatasync", ignoreFunc }, { 76, "truncate", truncateFunc }, { 77, "ftruncate", ftruncateFunc }, #if defined(SYS_getdents) @@ -171,7 +171,7 @@ { 128, "rt_sigtimedwait" }, { 129, "rt_sigqueueinfo" }, { 130, "rt_sigsuspend" }, -{ 131, "sigaltstack" }, +{ 131, "sigaltstack", ignoreFunc }, { 132, "utime" }, { 133, "mknod", mknodFunc }, { 134, "uselib" }, @@ -197,7 +197,7 @@ { 154, "modify_ldt" }, { 155, "pivot_root" }, { 156, "_sysctl" }, -{ 157, "prctl" }, +{ 157, "prctl", ignoreFunc }, { 158, "arch_prctl", archPrctlFunc }, { 159, "adjtimex" }, { 160, "setrlimit", ignoreFunc }, 3 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46241 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I566b1d71d989c54bfc559d5b83790dff73a38b28 Gerrit-Change-Number: 46241 Gerrit-PatchSet: 5 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alex Dutu Gerrit-Reviewer: Gabe Black Gerrit-Reviewer: Jason Lowe-Power Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Initialize GPUDriver member variables before use
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46248 ) Change subject: gpu-compute: Initialize GPUDriver member variables before use .. gpu-compute: Initialize GPUDriver member variables before use A few member variables weren't initialized, but we were assuming that they were 0 when first read. This explicitly sets those variables to 0. Change-Id: I2c840d361ed3a7d306e22dc7561a3870f1ef94a1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46248 Tested-by: kokoro Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair --- M src/gpu-compute/gpu_compute_driver.cc 1 file changed, 2 insertions(+), 1 deletion(-) Approvals: Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index 12e537c..02f1de5 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -53,7 +53,8 @@ GPUComputeDriver::GPUComputeDriver(const Params &p) : EmulatedDriver(p), device(p.device), queueId(0), - isdGPU(p.isdGPU), gfxVersion(p.gfxVersion), dGPUPoolID(p.dGPUPoolID) + isdGPU(p.isdGPU), gfxVersion(p.gfxVersion), dGPUPoolID(p.dGPUPoolID), + eventPage(0), eventSlotIndex(0) { device->attachDriver(this); DPRINTF(GPUDriver, "Constructing KFD: device\n"); 1 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46248 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I2c840d361ed3a7d306e22dc7561a3870f1ef94a1 Gerrit-Change-Number: 46248 Gerrit-PatchSet: 8 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alex Dutu Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: util: Update GCN Dockerfile for ROCm 4
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/46239 ) Change subject: util: Update GCN Dockerfile for ROCm 4 .. util: Update GCN Dockerfile for ROCm 4 This now installs ROCm 4 from source instead of ROCm 1.6. Change-Id: I380ca06e93d48475e93d18f69eb97756186772ab Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/46239 Reviewed-by: Matthew Poremba Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- M util/dockerfiles/gcn-gpu/Dockerfile 1 file changed, 112 insertions(+), 158 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index 2f5d1b4..360ab1f 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -1,166 +1,120 @@ -FROM ubuntu:16.04 +# Copyright (c) 2021 Kyle Roarty +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +FROM ubuntu:20.04 +ENV DEBIAN_FRONTEND=noninteractive +RUN apt -y update +RUN apt -y upgrade +RUN apt -y install build-essential git m4 scons zlib1g zlib1g-dev \ +libprotobuf-dev protobuf-compiler libprotoc-dev libgoogle-perftools-dev \ +python3-dev python3-six python-is-python3 doxygen libboost-all-dev \ +libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config -# Needed for add-apt-repository -RUN apt-get update && apt-get install -y --no-install-recommends \ -software-properties-common +# Requirements for ROCm +RUN apt -y install cmake mesa-common-dev libgflags-dev libgoogle-glog-dev -# Ubuntu 16.04 does not have a python package new enough for gem5, use a PPA -RUN add-apt-repository ppa:deadsnakes/ppa && apt-get update +# Needed to get ROCm repo, build packages +RUN apt -y install wget gnupg2 rpm -# Should be minimal needed packages -RUN apt-get update && apt-get install -y --no-install-recommends \ -findutils \ -file \ -libunwind8 \ -libunwind-dev \ -pkg-config \ -build-essential \ -gcc-multilib \ -g++-multilib \ -git \ -ca-certificates \ -m4 \ -zlib1g \ -zlib1g-dev \ -libprotobuf-dev \ -protobuf-compiler \ -libprotoc-dev \ -libgoogle-perftools-dev \ -python-yaml \ -python3.9 \ -python3.9-dev \ -python3.9-distutils \ -wget \ -libpci3 \ -libelf1 \ -libelf-dev \ -cmake \ -openssl \ -libssl-dev \ -libboost-filesystem-dev \ -libboost-system-dev \ -libboost-dev \ -libpng12-dev \ -gdb +RUN wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - -# Use python 3.9 by default -RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 +# ROCm webpage says to use debian main, but the individual versions +# only have xenial +RUN echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.0.1/ xenial main' | tee /etc/apt/sources.list.d/rocm.list -# Setuptools is needed for cmake for ROCm build. Install using pip. -# Instructions to install PIP from https://pypi.org/project/pip/ -RUN wget https://bootstrap.pypa.io/get-pip.py -qO get-pip.py -RUN python3 get-pip.py -RUN pip install -U setuptools scons==3.1.2 six +RUN apt-get update && apt -y install hsakmt-roct hsakmt-roct-dev +RUN ln -s /opt/rocm-4.0.1 /opt/rocm -ARG gem5_dist=http://dist.gem5.org/dist/v21-0 +
[gem5-dev] Change in gem5/gem5[develop]: dev-hsa,gpu-compute: IOCTL updates for ROCm 4
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46246 ) Change subject: dev-hsa,gpu-compute: IOCTL updates for ROCm 4 .. dev-hsa,gpu-compute: IOCTL updates for ROCm 4 This change copies over the up-to-date kfd_ioctl.h file from the linux kernel, and updates the gpu_compute_driver to reflect the changes found in the new version of the kfd_ioctl.h file Change-Id: I51e8e7158762f4b7e06c0f84507e5889a17939a2 --- M src/dev/hsa/kfd_ioctl.h M src/gpu-compute/gpu_compute_driver.cc 2 files changed, 371 insertions(+), 335 deletions(-) diff --git a/src/dev/hsa/kfd_ioctl.h b/src/dev/hsa/kfd_ioctl.h index 504621c..5ba0a0c 100644 --- a/src/dev/hsa/kfd_ioctl.h +++ b/src/dev/hsa/kfd_ioctl.h @@ -23,13 +23,16 @@ #ifndef KFD_IOCTL_H_INCLUDED #define KFD_IOCTL_H_INCLUDED +#include #include #include -#include - +/* + * - 1.1 - initial version + * - 1.3 - Add SMI events support + */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 2 +#define KFD_IOCTL_MINOR_VERSION 3 struct kfd_ioctl_get_version_args { @@ -41,6 +44,7 @@ #define KFD_IOC_QUEUE_TYPE_COMPUTE 0 #define KFD_IOC_QUEUE_TYPE_SDMA1 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2 +#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 3 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -69,7 +73,7 @@ struct kfd_ioctl_destroy_queue_args { uint32_t queue_id; /* to KFD */ - uint32_t pad; +uint32_t pad; }; struct kfd_ioctl_update_queue_args @@ -78,15 +82,24 @@ uint32_t queue_id; /* to KFD */ uint32_t ring_size; /* to KFD */ - uint32_t queue_percentage; /* to KFD */ - uint32_t queue_priority;/* to KFD */ +uint32_t queue_percentage; /* to KFD */ +uint32_t queue_priority; /* to KFD */ }; struct kfd_ioctl_set_cu_mask_args { - uint32_t queue_id; /* to KFD */ - uint32_t num_cu_mask; /* to KFD */ - uint64_t cu_mask_ptr; /* to KFD */ +uint32_t queue_id; /* to KFD */ +uint32_t num_cu_mask; /* to KFD */ +uint64_t cu_mask_ptr; /* to KFD */ +}; + +struct kfd_ioctl_get_queue_wave_state_args +{ +uint64_t ctl_stack_address;/* to KFD */ +uint32_t ctl_stack_used_size; /* from KFD */ +uint32_t save_area_used_size; /* from KFD */ +uint32_t queue_id; /* to KFD */ +uint32_t pad; }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ @@ -104,14 +117,6 @@ uint32_t pad; }; -struct kfd_ioctl_set_trap_handler_args -{ - uint64_t tba_addr; - uint64_t tma_addr; - uint32_t gpu_id;/* to KFD */ - uint32_t pad; -}; - /* * All counters are monotonic. They are used for profiling of compute jobs. * The profiling is done by userspace. @@ -122,32 +127,32 @@ struct kfd_ioctl_get_clock_counters_args { uint64_t gpu_clock_counter; /* from KFD */ - uint64_t cpu_clock_counter; /* from KFD */ - uint64_t system_clock_counter; /* from KFD */ - uint64_t system_clock_freq; /* from KFD */ +uint64_t cpu_clock_counter;/* from KFD */ +uint64_t system_clock_counter; /* from KFD */ +uint64_t system_clock_freq;/* from KFD */ uint32_t gpu_id;/* to KFD */ uint32_t pad; }; -#define NUM_OF_SUPPORTED_GPUS 7 - struct kfd_process_device_apertures { uint64_t lds_base; /* from KFD */ - uint64_t lds_limit; /* from KFD */ - uint64_t scratch_base; /* from KFD */ - uint64_t scratch_limit; /* from KFD */ - uint64_t gpuvm_base;/* from KFD */ - uint64_t gpuvm_limit; /* from KFD */ - uint32_t gpu_id;/* from KFD */ - uint32_t pad; +uint64_t lds_limit;/* from KFD */ +uint64_t scratch_base; /* from KFD */ +uint64_t scratch_limit;/* from KFD */ +uint64_t gpuvm_base; /* from KFD */ +uint64_t gpuvm_limit; /* from KFD */ +uint32_t gpu_id; /* from KFD */ +uint32_t pad; }; -/* This IOCTL and the limited NUM_OF_SUPPORTED_GPUS is deprecated. Use - * kfd_ioctl_get_process_apertures_new instead, which supports - * arbitrary numbers of GPUs. +/* + * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use + * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an + * unlimited number of GPUs. */ +#define NUM_OF_SUPPORTED_GPUS 7 struct kfd_ioctl_get_process_apertures_args { struct kfd_process_device_apertures @@ -165,11 +170,11 @@ */ uint64_t kfd_process_device_apertures_
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Ignore GPU kernel names
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46245 ) Change subject: gpu-compute: Ignore GPU kernel names .. gpu-compute: Ignore GPU kernel names ROCm 4 seems to have updated the akc, and the only real issue that has occured is that we're no longer able to read kernel names in the same way as we were in ROCm 1.6. This patch removes the prior method of reading kernel names and gives all kernels a temporary name Change-Id: I0040e0cf4cd35d6f56ded6a8acfb10c600bcc77a --- M src/gpu-compute/gpu_command_processor.cc 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index 9bdd0b9..78b3235 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -171,7 +171,6 @@ DPRINTF(GPUCommandProc, "Machine code starts at addr: %#x\n", machine_code_addr); -Addr kern_name_addr(0); std::string kernel_name; /** @@ -184,10 +183,7 @@ * host memory. I have no idea what BLIT stands for. * */ if (akc.runtime_loader_kernel_symbol) { -virt_proxy.readBlob(akc.runtime_loader_kernel_symbol + 0x10, -(uint8_t*)&kern_name_addr, 0x8); - -virt_proxy.readString(kernel_name, kern_name_addr); +kernel_name = "Some kernel"; } else { kernel_name = "Blit kernel"; } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46245 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I0040e0cf4cd35d6f56ded6a8acfb10c600bcc77a Gerrit-Change-Number: 46245 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86,sim: (WIP) Workaround for sched_getaffinity
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46243 ) Change subject: arch-x86,sim: (WIP) Workaround for sched_getaffinity .. arch-x86,sim: (WIP) Workaround for sched_getaffinity sched_getaffinity is different from other syscalls in the raw syscall return the size of the cpumask being used to represent the CPU bit mask. Because of this, when a library (libnuma in this case) directly called sched_getaffinity and got a return value of 0, it errored out, thinking that there were no CPUs available. Currently the implementation just returns 1, and it's being used as a proof-of-concept for ROCm 4 support, as ROCm 4 support uses libnuma. Change-Id: Id95c919986cc98a411877056256604f57a29f0f9 --- M src/arch/x86/linux/syscall_tbl64.cc M src/sim/syscall_emul.cc M src/sim/syscall_emul.hh 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index 94837cd..bb24f3d 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -244,7 +244,7 @@ { 201, "time", timeFunc }, { 202, "futex", futexFunc }, { 203, "sched_setaffinity", ignoreFunc }, -{ 204, "sched_getaffinity", ignoreFunc }, +{ 204, "sched_getaffinity", schedGetaffinityFunc }, { 205, "set_thread_area" }, { 206, "io_setup" }, { 207, "io_destroy" }, diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc index bb8b42a..17a947a 100644 --- a/src/sim/syscall_emul.cc +++ b/src/sim/syscall_emul.cc @@ -1650,3 +1650,10 @@ return 0; } + +SyscallReturn +schedGetaffinityFunc(SyscallDesc *desc, ThreadContext *tc, + pid_t pid, size_t cpusetsize, Addr mask) +{ +return 1; +} diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index 54e92b2..83e11b2 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -367,6 +367,10 @@ SyscallReturn getsocknameFunc(SyscallDesc *desc, ThreadContext *tc, int tgt_fd, VPtr<> addrPtr, VPtr<> lenPtr); +// Target sched_getaffinity() handler +SyscallReturn schedGetaffinityFunc(SyscallDesc *desc, ThreadContext *tc, + pid_t pid, size_t cpusetsize, Addr mask); + /// Futex system call /// Implemented by Daniel Sanchez /// Used by printf's in multi-threaded apps -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46243 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Id95c919986cc98a411877056256604f57a29f0f9 Gerrit-Change-Number: 46243 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: build with getdents64 if system supports it
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46242 ) Change subject: arch-x86: build with getdents64 if system supports it .. arch-x86: build with getdents64 if system supports it This patch makes it so the getdents64 syscall is built in gem5 if the underlying host implements the syscall, similar to how the getdents syscall is implemented. The implementation for getdents64 already existed Change-Id: I73b22c8df8df994f3f720e848a7d4f8cd31d318e --- M src/arch/x86/linux/syscall_tbl32.cc M src/arch/x86/linux/syscall_tbl64.cc 2 files changed, 8 insertions(+), 0 deletions(-) diff --git a/src/arch/x86/linux/syscall_tbl32.cc b/src/arch/x86/linux/syscall_tbl32.cc index 50d0969..db70151 100644 --- a/src/arch/x86/linux/syscall_tbl32.cc +++ b/src/arch/x86/linux/syscall_tbl32.cc @@ -261,7 +261,11 @@ { 218, "mincore" }, { 219, "madvise", ignoreFunc }, { 220, "madvise1" }, +#if defined(SYS_getdents64) +{ 221, "getdents64", getdents64Func }, +#else { 221, "getdents64" }, +#endif { 222, "fcntl64" }, { 223, "unused" }, { 224, "gettid", gettidFunc }, diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index be82437..94837cd 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -257,7 +257,11 @@ { 214, "epoll_ctl_old" }, { 215, "epoll_wait_old" }, { 216, "remap_file_pages" }, +#if defined(SYS_getdents64) +{ 217, "getdents64", getdents64Func }, +#else { 217, "getdents64" }, +#endif { 218, "set_tid_address", setTidAddressFunc }, { 219, "restart_syscall" }, { 220, "semtimedop" }, -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46242 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I73b22c8df8df994f3f720e848a7d4f8cd31d318e Gerrit-Change-Number: 46242 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Change certain IOCTL errors to warnings
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46247 ) Change subject: gpu-compute: Change certain IOCTL errors to warnings .. gpu-compute: Change certain IOCTL errors to warnings There are certain IOCTL errors that were triggering with the change to ROCm 4, however they could be set to warnings without causing any errors in the program Change-Id: Ie0052267f3ccfbdbadb90249b6f19e6a1205f57e --- M src/gpu-compute/gpu_compute_driver.cc 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index 7f8cc16..12e537c 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -417,7 +417,7 @@ TypedBufferArg args(ioc_buf); args.copyIn(virt_proxy); if (args->event_type != KFD_IOC_EVENT_SIGNAL) { -fatal("Signal events are only supported currently\n"); +warn("Signal events are only supported currently\n"); } else if (eventSlotIndex == SLOTS_PER_PAGE) { fatal("Signal event wasn't created; signal limit reached\n"); } @@ -508,8 +508,8 @@ "\tamdkfd wait for event %d\n", EventData->event_id); panic_if(ETable.count(EventData->event_id) == 0, "Event ID invalid, cannot set this event\n"); -panic_if(ETable[EventData->event_id].threadWaiting, - "Multiple threads waiting on the same event\n"); +if (ETable[EventData->event_id].threadWaiting) + warn("Multiple threads waiting on the same event\n"); if (ETable[EventData->event_id].setEvent) { // If event is already set, the event has already happened. // Just unset the event and dont put this thread to sleep. -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46247 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ie0052267f3ccfbdbadb90249b6f19e6a1205f57e Gerrit-Change-Number: 46247 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Initialize GPUDriver member variables before use
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46248 ) Change subject: gpu-compute: Initialize GPUDriver member variables before use .. gpu-compute: Initialize GPUDriver member variables before use A few member variables weren't initialized, but we were assuming that they were 0 when first read. This explicitly sets those variables to 0. Change-Id: I2c840d361ed3a7d306e22dc7561a3870f1ef94a1 --- M src/gpu-compute/gpu_compute_driver.cc 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc index 12e537c..02f1de5 100644 --- a/src/gpu-compute/gpu_compute_driver.cc +++ b/src/gpu-compute/gpu_compute_driver.cc @@ -53,7 +53,8 @@ GPUComputeDriver::GPUComputeDriver(const Params &p) : EmulatedDriver(p), device(p.device), queueId(0), - isdGPU(p.isdGPU), gfxVersion(p.gfxVersion), dGPUPoolID(p.dGPUPoolID) + isdGPU(p.isdGPU), gfxVersion(p.gfxVersion), dGPUPoolID(p.dGPUPoolID), + eventPage(0), eventSlotIndex(0) { device->attachDriver(this); DPRINTF(GPUDriver, "Constructing KFD: device\n"); -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46248 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I2c840d361ed3a7d306e22dc7561a3870f1ef94a1 Gerrit-Change-Number: 46248 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: Ignore syscalls called in ROCm 4
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46241 ) Change subject: arch-x86: Ignore syscalls called in ROCm 4 .. arch-x86: Ignore syscalls called in ROCm 4 This patch ignores syscalls called by the ROCm 4 stack. Based on testing so far, these syscalls don't affect the correctness of programs that use ROCm 4. sched_yield gets changed to ignoreWarnOnceFunc, as it gets called significantly more in ROCm 4. Change-Id: I566b1d71d989c54bfc559d5b83790dff73a38b28 --- M src/arch/x86/linux/syscall_tbl64.cc 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index 8630265..be82437 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -60,7 +60,7 @@ { 21, "access", ignoreFunc }, { 22, "pipe", pipeFunc }, { 23, "select", selectFunc }, -{ 24, "sched_yield", ignoreFunc }, +{ 24, "sched_yield", ignoreWarnOnceFunc }, { 25, "mremap", mremapFunc }, { 26, "msync" }, { 27, "mincore" }, @@ -111,7 +111,7 @@ { 72, "fcntl", fcntlFunc }, { 73, "flock" }, { 74, "fsync" }, -{ 75, "fdatasync" }, +{ 75, "fdatasync", ignoreFunc }, { 76, "truncate", truncateFunc }, { 77, "ftruncate", ftruncateFunc }, #if defined(SYS_getdents) @@ -171,7 +171,7 @@ { 128, "rt_sigtimedwait" }, { 129, "rt_sigqueueinfo" }, { 130, "rt_sigsuspend" }, -{ 131, "sigaltstack" }, +{ 131, "sigaltstack", ignoreFunc }, { 132, "utime" }, { 133, "mknod", mknodFunc }, { 134, "uselib" }, @@ -197,7 +197,7 @@ { 154, "modify_ldt" }, { 155, "pivot_root" }, { 156, "_sysctl" }, -{ 157, "prctl" }, +{ 157, "prctl", ignoreFunc }, { 158, "arch_prctl", archPrctlFunc }, { 159, "adjtimex" }, { 160, "setrlimit", ignoreFunc }, -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46241 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I566b1d71d989c54bfc559d5b83790dff73a38b28 Gerrit-Change-Number: 46241 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: util: Update GCN Dockerfile for ROCm 4
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46239 ) Change subject: util: Update GCN Dockerfile for ROCm 4 .. util: Update GCN Dockerfile for ROCm 4 This now installs ROCm 4 from source instead of ROCm 1.6. Change-Id: I380ca06e93d48475e93d18f69eb97756186772ab --- M util/dockerfiles/gcn-gpu/Dockerfile 1 file changed, 152 insertions(+), 144 deletions(-) diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index e5683ab..491c960 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -1,166 +1,174 @@ -FROM ubuntu:16.04 +# Copyright (c) 2020 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +FROM ubuntu:20.04 +ENV DEBIAN_FRONTEND=noninteractive +RUN apt -y update +RUN apt -y upgrade +RUN apt -y install build-essential git m4 scons zlib1g zlib1g-dev \ +libprotobuf-dev protobuf-compiler libprotoc-dev libgoogle-perftools-dev \ +python3-dev python3-six python-is-python3 doxygen libboost-all-dev \ +libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config -# Needed for add-apt-repository -RUN apt-get update && apt-get install -y --no-install-recommends \ -software-properties-common +# Requirements for ROCm +RUN apt -y install cmake mesa-common-dev libgflags-dev libgoogle-glog-dev -# Ubuntu 16.04 does not have a python package new enough for gem5, use a PPA -RUN add-apt-repository ppa:deadsnakes/ppa && apt-get update +RUN git clone https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface.git &&\ +git -C /ROCT-Thunk-Interface/ checkout roc-4.0.x && \ +mkdir -p /ROCT-Thunk-Interface/build -# Should be minimal needed packages -RUN apt-get update && apt-get install -y --no-install-recommends \ -findutils \ -file \ -libunwind8 \ -libunwind-dev \ -pkg-config \ -build-essential \ -gcc-multilib \ -g++-multilib \ -git \ -ca-certificates \ -m4 \ -zlib1g \ -zlib1g-dev \ -libprotobuf-dev \ -protobuf-compiler \ -libprotoc-dev \ -libgoogle-perftools-dev \ -python-yaml \ -python3.9 \ -python3.9-dev \ -python3.9-distutils \ -wget \ -libpci3 \ -libelf1 \ -libelf-dev \ -cmake \ -openssl \ -libssl-dev \ -libboost-filesystem-dev \ -libboost-system-dev \ -libboost-dev \ -libpng12-dev \ -gdb +WORKDIR /ROCT-Thunk-Interface/build +RUN cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=/opt/rocm .. && \ +make -j$(nproc) && make install +WORKDIR / -# Use python 3.9 by default -RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 +# There was no rocm-4.0.x tag at the time, there was even a github post +# stating to use rocm-3.10.x +RUN git clone https://github.com/RadeonOpenCompute/ROCR-Runtime.git && \ +git -C /ROCR-Runtime/ checkout rocm-3.10.x && \ +mkdir -p /ROCR-Runtime/src/build -# Setuptools is needed for cmake for ROCm build. Install using pip. -# Instructions to install PIP from https://pypi.org/project/pip/ -RUN wget https://bootstrap.pypa.io/get-pip.py -qO get-pip.py -RUN python3 get-pip.py -RUN pip install -U setuptools scons==3.1.2 six +WORKDIR /ROCR-Runtime/src/build +# need MEMFD_CREATE=OFF as MEMFD_CREATE syscall isn't implemented +RUN cmake -DIMAGE_SUPPORT=OFF -DHAVE_MEMFD_CREATE=OFF -DCMAKE_BUILD_TYPE=Debug\ +-DCMAK
[gem5-dev] Change in gem5/gem5[develop]: configs,gpu-compute: Add render driver needed for ROCm 4
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46244 ) Change subject: configs,gpu-compute: Add render driver needed for ROCm 4 .. configs,gpu-compute: Add render driver needed for ROCm 4 ROCm 4 utilizes the render driver located at /dev/dri/renderDXXX. This patch implements a very simple driver that just returns a file descriptor when opened, as testing has shown that's all that's needed Change-Id: I65602346cbf17b2dc80e114046ebf5c9830a1507 --- M configs/example/apu_se.py M configs/example/hsaTopology.py M src/gpu-compute/GPU.py M src/gpu-compute/SConscript A src/gpu-compute/gpu_render_driver.cc A src/gpu-compute/gpu_render_driver.hh 6 files changed, 49 insertions(+), 1 deletion(-) diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index f779df3..b9e1e7c 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -436,6 +436,8 @@ gfxVersion = args.gfx_version, dGPUPoolID = 1, m_type = args.m_type) +render_driver = GPURenderDriver(filename = 'dri/renderD128') + # Creating the GPU kernel launching components: that is the HSA # packet processor (HSAPP), GPU command processor (CP), and the # dispatcher. @@ -498,7 +500,8 @@ "HSA_ENABLE_SDMA=0"] process = Process(executable = executable, cmd = [args.cmd] - + args.options.split(), drivers = [gpu_driver], env = env) + + args.options.split(), + drivers = [gpu_driver, render_driver], env = env) for cpu in cpu_list: cpu.createThreads() diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py index 78fe1f7..b77d1c1 100644 --- a/configs/example/hsaTopology.py +++ b/configs/example/hsaTopology.py @@ -373,6 +373,7 @@ 'vendor_id 4098\n' + \ 'device_id 29440\n' + \ 'location_id 512\n' + \ +'drm_render_minor 128\n'+ \ 'max_engine_clk_fcompute %s\n' \ % int(toFrequency(options.gpu_clock) / 1e6) + \ 'local_mem_size 4294967296\n' + \ @@ -446,6 +447,7 @@ 'vendor_id 4098\n' + \ 'device_id 39028\n' + \ 'location_id 8\n' + \ +'drm_render_minor 128\n'+ \ 'max_engine_clk_fcompute %s\n' \ % int(toFrequency(options.gpu_clock) / 1e6) + \ 'local_mem_size 0\n'+ \ diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index 579c84b..ace83a5 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -254,6 +254,10 @@ # default value: 5/C_RO_S (only allow caching in GL2 for read. Shared) m_type = Param.Int("Default MTYPE for cache. Valid values between 0-7"); +class GPURenderDriver(EmulatedDriver): +type = 'GPURenderDriver' +cxx_header = 'gpu-compute/gpu_render_driver.hh' + class GPUDispatcher(SimObject): type = 'GPUDispatcher' cxx_header = 'gpu-compute/dispatcher.hh' diff --git a/src/gpu-compute/SConscript b/src/gpu-compute/SConscript index adb9b0e..ae0bfab 100644 --- a/src/gpu-compute/SConscript +++ b/src/gpu-compute/SConscript @@ -52,6 +52,7 @@ Source('gpu_compute_driver.cc') Source('gpu_dyn_inst.cc') Source('gpu_exec_context.cc') +Source('gpu_render_driver.cc') Source('gpu_static_inst.cc') Source('gpu_tlb.cc') Source('lds_state.cc') diff --git a/src/gpu-compute/gpu_render_driver.cc b/src/gpu-compute/gpu_render_driver.cc new file mode 100644 index 000..9d9cbd2 --- /dev/null +++ b/src/gpu-compute/gpu_render_driver.cc @@ -0,0 +1,17 @@ +#include "gpu-compute/gpu_render_driver.hh" + +#include "params/GPURenderDriver.hh" +#include "sim/fd_entry.hh" + +GPURenderDriver::GPURenderDriver(const GPURenderDriverParams &p) +: EmulatedDriver(p) +{ +} + +int GPURenderDriver::open(ThreadContext *tc, int mode, int flags) +{ +auto process = tc->getProcessPtr(); +auto device_fd_entry = std::make_shared(this, filename); +int tgt_fd = process->fds->allocFD(device_fd_entry); +return tgt_fd; +} diff --git a/src/gpu-compute/gpu_render_driver.hh b/src/gpu-compute/gpu_render_driver.hh new file mode 100644 index 000..46d1b8d --- /dev/null +++ b/src/gpu-compute/gpu_render_driver.hh @@ -0,0 +1,21 @@ +#ifndef __GPU_COMPUTE_GPU_RENDER_DRIVER_HH__ +#define __GPU_COMPUTE_GPU_RENDER
[gem5-dev] Change in gem5/gem5[develop]: configs: Add mem_banks to Carrizo topology
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/46240 ) Change subject: configs: Add mem_banks to Carrizo topology .. configs: Add mem_banks to Carrizo topology ROCm 4 iterates through the mem_banks to find an appropriate place to allocate memory. Previously, Carrizo didn't have any mem_banks, which resulted in the ROCm 4 runtime erroring out, as it didn't know where to allocate memory. The implementation is fairly similar to the implementation used for the Fiji or Vega configs Change-Id: I5bb4e89657d44c6cb690fd224ee1bf1d4d6cf2a5 --- M configs/example/hsaTopology.py 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py index 51585de..78fe1f7 100644 --- a/configs/example/hsaTopology.py +++ b/configs/example/hsaTopology.py @@ -36,7 +36,7 @@ from os.path import join as joinpath from os.path import isdir from shutil import rmtree, copyfile -from m5.util.convert import toFrequency +from m5.util.convert import toFrequency, toMemorySize def file_append(path, contents): with open(joinpath(*path), 'a') as f: @@ -422,12 +422,14 @@ # must have marketing name file_append((node_dir, 'name'), 'Carrizo\n') +mem_banks_cnt = 1 + # populate global node properties # NOTE: SIMD count triggers a valid GPU agent creation node_prop = 'cpu_cores_count %s\n' % options.num_cpus + \ 'simd_count %s\n' \ % (options.num_compute_units * options.simds_per_cu)+ \ -'mem_banks_count 0\n' + \ +'mem_banks_count %s\n' % mem_banks_cnt + \ 'caches_count 0\n' + \ 'io_links_count 0\n'+ \ 'cpu_core_id_base 16\n' + \ @@ -453,3 +455,14 @@ % int(toFrequency(options.CPUClock) / 1e6) file_append((node_dir, 'properties'), node_prop) + +for i in range(mem_banks_cnt): +mem_dir = joinpath(node_dir, f'mem_banks/{i}') +remake_dir(mem_dir) + +mem_prop = f'heap_type 0\n' + \ + f'size_in_bytes {toMemorySize(options.mem_size)}'+ \ + f'flags 0\n' + \ + f'width 64\n'+ \ + f'mem_clk_max 1600\n' +file_append((mem_dir, 'properties'), mem_prop) -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/46240 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I5bb4e89657d44c6cb690fd224ee1bf1d4d6cf2a5 Gerrit-Change-Number: 46240 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3,arch-vega,gpu-compute: Move request counters
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/45347 ) Change subject: arch-gcn3,arch-vega,gpu-compute: Move request counters .. arch-gcn3,arch-vega,gpu-compute: Move request counters When the Vega ISA got committed, it lacked the request counter tracking for memory requests that existed in the GCN3 code. Instead of copying over the same lines from the GCN3 code to the Vega code, this commit makes the various memory pipelines handle updating the request counter information instead, as every memory instruction calls a memory pipeline. This commit also adds an issueRequest in scalar_memory_pipeline, as previously, the gpuDynInsts were explicitly placed in the queue of issuedRequests. Change-Id: I5140d3b2f12be582f2ae9ff7c433167aeec5b68e --- M src/arch/amdgpu/gcn3/insts/instructions.cc M src/arch/amdgpu/vega/insts/instructions.cc M src/gpu-compute/global_memory_pipeline.cc M src/gpu-compute/local_memory_pipeline.cc M src/gpu-compute/scalar_memory_pipeline.cc M src/gpu-compute/scalar_memory_pipeline.hh 6 files changed, 82 insertions(+), 408 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc index a5f28e3..a51354e 100644 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -4494,12 +4494,7 @@ calcAddr(gpuDynInst, addr, offset); gpuDynInst->computeUnit()->scalarMemoryPipe -.getGMReqFIFO().push(gpuDynInst); - -wf->scalarRdGmReqsInPipe--; -wf->scalarOutstandingReqsRdGm++; -gpuDynInst->wavefront()->outstandingReqs++; -gpuDynInst->wavefront()->validateRequestCounters(); +.issueRequest(gpuDynInst); } void @@ -4553,12 +4548,7 @@ calcAddr(gpuDynInst, addr, offset); gpuDynInst->computeUnit()->scalarMemoryPipe. -getGMReqFIFO().push(gpuDynInst); - -wf->scalarRdGmReqsInPipe--; -wf->scalarOutstandingReqsRdGm++; -gpuDynInst->wavefront()->outstandingReqs++; -gpuDynInst->wavefront()->validateRequestCounters(); +issueRequest(gpuDynInst); } void @@ -4610,12 +4600,7 @@ calcAddr(gpuDynInst, addr, offset); gpuDynInst->computeUnit()->scalarMemoryPipe. -getGMReqFIFO().push(gpuDynInst); - -wf->scalarRdGmReqsInPipe--; -wf->scalarOutstandingReqsRdGm++; -gpuDynInst->wavefront()->outstandingReqs++; -gpuDynInst->wavefront()->validateRequestCounters(); +issueRequest(gpuDynInst); } void @@ -4667,12 +4652,7 @@ calcAddr(gpuDynInst, addr, offset); gpuDynInst->computeUnit()->scalarMemoryPipe. -getGMReqFIFO().push(gpuDynInst); - -wf->scalarRdGmReqsInPipe--; -wf->scalarOutstandingReqsRdGm++; -gpuDynInst->wavefront()->outstandingReqs++; -gpuDynInst->wavefront()->validateRequestCounters(); +issueRequest(gpuDynInst); } void @@ -4724,12 +4704,7 @@ calcAddr(gpuDynInst, addr, offset); gpuDynInst->computeUnit()->scalarMemoryPipe. -getGMReqFIFO().push(gpuDynInst); - -wf->scalarRdGmReqsInPipe--; -wf->scalarOutstandingReqsRdGm++; -gpuDynInst->wavefront()->outstandingReqs++; -gpuDynInst->wavefront()->validateRequestCounters(); +issueRequest(gpuDynInst); } void @@ -4782,12 +4757,7 @@ calcAddr(gpuDynInst, rsrcDesc, offset); gpuDynInst->computeUnit()->scalarMemoryPipe -.getGMReqFIFO().push(gpuDynInst); - -wf->scalarRdGmReqsInPipe--; -wf->scalarOutstandingReqsRdGm++; -gpuDynInst->wavefront()->outstandingReqs++; -gpuDynInst->wavefront()->validateRequestCounters(); +.issueRequest(gpuDynInst); } // execute void @@ -4841,12 +4811,7 @@ calcAddr(gpuDynInst, rsrcDesc, offset); gpuDynInst->computeUnit()->scalarMemoryPipe -.getGMReqFIFO().push(gpuDynInst); - -wf->scalarRdGmReqsInPipe--; -wf->scalarOutstandingReqsRdGm++; -gpuDynInst->wavefront()->outstandingReqs++; -gpuDynInst->wavefront()->validateRequestCounters(); +.issueRequest(gpuDynInst); } // execute void @@ -4900,12 +4865,7 @@ calcAddr(gpuDynInst, rsrcDesc, offset); gpuDynInst->computeUnit()->scalarMemoryPipe -.getGMReqFIFO().push(gpuDynInst); - -wf->scalarRdGmReqsInPipe--; -wf->scalarOutstandingReqsRdGm++; -gpuDynInst->wavefront()->outstandingReqs++; -gpuDynInst->wavefront()->validateRequestCounters(); +.issueRequest(gpuDynInst); } // execute void @@ -4959,12 +4919,7 @@ calcAddr(gpuDynInst, rsrcDesc, offset); gpuDynInst->computeUnit()->scalarMemoryPipe -.getGMRe
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3,gpu-compute: Set gpuDynInst exec_mask before use
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/45346 ) Change subject: arch-gcn3,gpu-compute: Set gpuDynInst exec_mask before use .. arch-gcn3,gpu-compute: Set gpuDynInst exec_mask before use vector_register_file uses the exec_mask of a memory instruction in order to determine if it should mark a register as in-use or not. Previously, the exec_mask of memory instructions were only set on execution of that instruction, which occurs after the code in vector_register_file. This lead to the code reading potentially garbage data, leading to a scenario where a register would be marked used when it shouldn't be. This fix sets the exec_mask of memory instructions in schedule_stage, which works because the only time the wavefront execMask() is updated is on a instruction executing, and we know the previous instruction will have executed by the time schedule_stage executes, due to the order the pipeline is executed in. This also undoes part of a patch from last year (62ec973) which treated the symptom of accidental register allocation, without preventing the registers from being allocated in the first place. This patch also removes now redundant code that sets the exec_mask in instructions.cc for memory instructions Change-Id: Idabd3502764fb06133ac2458606c1aaf6f04 --- M src/arch/amdgpu/gcn3/insts/instructions.cc M src/gpu-compute/schedule_stage.cc 2 files changed, 29 insertions(+), 155 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc index 4ae4c29..a5f28e3 100644 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -31240,7 +31240,6 @@ { Wavefront *wf = gpuDynInst->wavefront(); gpuDynInst->execUnitId = wf->execUnitId; -gpuDynInst->exec_mask = wf->execMask(); gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set( gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); @@ -31301,7 +31300,6 @@ { Wavefront *wf = gpuDynInst->wavefront(); gpuDynInst->execUnitId = wf->execUnitId; -gpuDynInst->exec_mask = wf->execMask(); gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set( gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); @@ -31365,7 +31363,6 @@ { Wavefront *wf = gpuDynInst->wavefront(); gpuDynInst->execUnitId = wf->execUnitId; -gpuDynInst->exec_mask = wf->execMask(); gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set( gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); @@ -31545,7 +31542,6 @@ { Wavefront *wf = gpuDynInst->wavefront(); gpuDynInst->execUnitId = wf->execUnitId; -gpuDynInst->exec_mask = wf->execMask(); gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set( gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); @@ -31605,7 +31601,6 @@ { Wavefront *wf = gpuDynInst->wavefront(); gpuDynInst->execUnitId = wf->execUnitId; -gpuDynInst->exec_mask = wf->execMask(); gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set( gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); @@ -32070,7 +32065,6 @@ { Wavefront *wf = gpuDynInst->wavefront(); gpuDynInst->execUnitId = wf->execUnitId; -gpuDynInst->exec_mask = wf->execMask(); gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set( gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); @@ -32132,7 +32126,6 @@ { Wavefront *wf = gpuDynInst->wavefront(); gpuDynInst->execUnitId = wf->execUnitId; -gpuDynInst->exec_mask = wf->execMask(); gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set( gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); @@ -32197,7 +32190,6 @@ { Wavefront *wf = gpuDynInst->wavefront(); gpuDynInst->execUnitId = wf->execUnitId; -gpuDynInst->exec_mask = wf->execMask(); gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set( gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); @@ -32281,7 +32273,6 @@ { Wavefront *wf = gpuDynInst->wavefront(); gpuDynInst->execUnitId = wf->execUnitId; -gpuDynInst->exec_mask = wf->execMask(); gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set( gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); @@ -32362,7 +32353,6 @@ { Wavefront *wf = gpuDynInst->wavefront();
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Read registers in execute instead of initiateAcc
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/45345 ) Change subject: arch-gcn3: Read registers in execute instead of initiateAcc .. arch-gcn3: Read registers in execute instead of initiateAcc Certain memory writes were reading their registers in initiateAcc, which lead to scenarios where a subsequent instruction would execute, clobbering the value in that register before the memory writes' initiateAcc method was called, causing the memory write to read wrong data. This patch moves all register reads to execute, preventing the above scenario from happening. Change-Id: Iee107c19e4b82c2e172bf2d6cc95b79983a43d83 --- M src/arch/amdgpu/gcn3/insts/instructions.cc 1 file changed, 116 insertions(+), 125 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc index 8cadff7..4ae4c29 100644 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -5065,8 +5065,13 @@ gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); ScalarRegU32 offset(0); ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); +ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA); addr.read(); +sdata.read(); + +std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), +sizeof(ScalarRegU32)); if (instData.IMM) { offset = extData.OFFSET; @@ -5090,10 +5095,6 @@ void Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) { -ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA); -sdata.read(); -std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), -sizeof(ScalarRegU32)); initMemWrite<1>(gpuDynInst); } // initiateAcc @@ -5124,8 +5125,13 @@ gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); ScalarRegU32 offset(0); ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); +ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); addr.read(); +sdata.read(); + +std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), +sizeof(ScalarRegU64)); if (instData.IMM) { offset = extData.OFFSET; @@ -5149,10 +5155,6 @@ void Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) { -ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); -sdata.read(); -std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), -sizeof(ScalarRegU64)); initMemWrite<2>(gpuDynInst); } // initiateAcc @@ -5183,8 +5185,13 @@ gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); ScalarRegU32 offset(0); ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); +ConstScalarOperandU128 sdata(gpuDynInst, instData.SDATA); addr.read(); +sdata.read(); + +std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), +4 * sizeof(ScalarRegU32)); if (instData.IMM) { offset = extData.OFFSET; @@ -5208,10 +5215,6 @@ void Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) { -ConstScalarOperandU128 sdata(gpuDynInst, instData.SDATA); -sdata.read(); -std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), -4 * sizeof(ScalarRegU32)); initMemWrite<4>(gpuDynInst); } // initiateAcc @@ -35743,9 +35746,18 @@ ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); +ConstVecOperandI8 data(gpuDynInst, extData.VDATA); rsrcDesc.read(); offset.read(); +data.read(); + +for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { +if (gpuDynInst->exec_mask[lane]) { +(reinterpret_cast(gpuDynInst->d_data))[lane] += data[lane]; +} +} int inst_offset = instData.OFFSET; @@ -35790,16 +35802,6 @@ void Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) { -ConstVecOperandI8 data(gpuDynInst, extData.VDATA); -data.read(); - -for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { -if (gpuDynInst->exec_mask[lane]) { -(reinterpret_cast(gpuDynInst->d_data))[lane] -= data[lane]; -} -} - initMemWrite(gpuDynInst); } // initiateAcc @@ -35839,9 +35841,18 @@ ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Fix scalar register ready check
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/44045 ) Change subject: gpu-compute: Fix scalar register ready check .. gpu-compute: Fix scalar register ready check Replaces some curly braces that were accidentally removed causing the function to return false even when it shouldn't Change-Id: I15fb4167468c8e3dd1107f1ca3dc98c48df4611b --- M src/gpu-compute/scalar_register_file.cc 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gpu-compute/scalar_register_file.cc b/src/gpu-compute/scalar_register_file.cc index 5fa7a62..14ea3fe 100644 --- a/src/gpu-compute/scalar_register_file.cc +++ b/src/gpu-compute/scalar_register_file.cc @@ -52,11 +52,12 @@ { for (const auto& srcScalarOp : ii->srcScalarRegOperands()) { for (const auto& physIdx : srcScalarOp.physIndices()) { -if (regBusy(physIdx)) +if (regBusy(physIdx)) { DPRINTF(GPUSRF, "RAW stall: WV[%d]: %s: physReg[%d]\n", w->wfDynId, ii->disassemble(), physIdx); w->stats.numTimesBlockedDueRAWDependencies++; return false; +} } } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/44045 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I15fb4167468c8e3dd1107f1ca3dc98c48df4611b Gerrit-Change-Number: 44045 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: dev-hsa,gpu-compute: Fix override for updateHsaSignal
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/44046 ) Change subject: dev-hsa,gpu-compute: Fix override for updateHsaSignal .. dev-hsa,gpu-compute: Fix override for updateHsaSignal Change 965ad12 removed a parameter from the updateHsaSignal function. Change 25e8a14 added the parameter back, but only for the derived class, breaking the override. This patch adds that parameter back to the base class, fixing the override. Change-Id: Id1e96e29ca4be7f3ce244bac83a112e3250812d1 --- M src/dev/hsa/hsa_device.hh M src/gpu-compute/gpu_command_processor.hh 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dev/hsa/hsa_device.hh b/src/dev/hsa/hsa_device.hh index 157c459..d722a5d 100644 --- a/src/dev/hsa/hsa_device.hh +++ b/src/dev/hsa/hsa_device.hh @@ -101,7 +101,8 @@ fatal("%s does not need HSA driver\n", name()); } virtual void -updateHsaSignal(Addr signal_handle, uint64_t signal_value) +updateHsaSignal(Addr signal_handle, uint64_t signal_value, +HsaSignalCallbackFunction function = [ = ] (const uint64_t &) { }) { fatal("%s does not have HSA signal update functionality.\n", name()); } diff --git a/src/gpu-compute/gpu_command_processor.hh b/src/gpu-compute/gpu_command_processor.hh index c78ae0b..67cda7d 100644 --- a/src/gpu-compute/gpu_command_processor.hh +++ b/src/gpu-compute/gpu_command_processor.hh @@ -90,7 +90,7 @@ void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function = -[] (const uint64_t &) { }); +[] (const uint64_t &) { }) override; uint64_t functionalReadHsaSignal(Addr signal_handle) override; -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/44046 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Id1e96e29ca4be7f3ce244bac83a112e3250812d1 Gerrit-Change-Number: 44046 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: Add insts used in newer libstdc++ rehashing
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/42443 ) Change subject: arch-x86: Add insts used in newer libstdc++ rehashing .. arch-x86: Add insts used in newer libstdc++ rehashing For newer versions of libstdc++ (Like the one in the ubuntu-20.04_all-dependencies docker image), the variables used when rehashing, e.g., std::unordered_maps have been extended. This resulted in the rehashing function using different, unimplemented, instructions. Because these instructions are unimplemented, it resulted in a std::bad_alloc exception when inserting into an unordered_map This patchset implements the following instructions: FCOMI, a floating point comparison instruction, using the compfp microop. The implementation mirrors that of the FUCOMI instruction (another floating point comparison instruction) FSUBRP, a reverse subtraction instruction, is implemented using the subfp microop like the FSUBP does, but with the operands flipped accordingly. FISTP, an instruction to convert a float to int and then store, is implemented by using a conversion microop (cvtf_d2i) and then a store. The cvtf_d2i microop is re-written to handle multple data sizes, as is required by the FISTP instruction. Change-Id: I85c57acace1f7a547b0a97ec3a0f0500909c5d2a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42443 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- M src/arch/x86/insts/microfpop.hh M src/arch/x86/isa/decoder/x87.isa M src/arch/x86/isa/insts/x87/arithmetic/subtraction.py M src/arch/x86/isa/insts/x87/compare_and_test/floating_point_ordered_compare.py M src/arch/x86/isa/insts/x87/data_transfer_and_conversion/convert_and_load_or_store_integer.py M src/arch/x86/isa/microops/fpop.isa 6 files changed, 37 insertions(+), 13 deletions(-) Approvals: Gabe Black: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/x86/insts/microfpop.hh b/src/arch/x86/insts/microfpop.hh index e9d32da..245a899 100644 --- a/src/arch/x86/insts/microfpop.hh +++ b/src/arch/x86/insts/microfpop.hh @@ -54,6 +54,7 @@ const RegIndex dest; const uint8_t dataSize; const int8_t spm; +RegIndex foldOBit; // Constructor FpOp(ExtMachInst _machInst, @@ -66,7 +67,9 @@ __opClass), src1(_src1.index()), src2(_src2.index()), dest(_dest.index()), dataSize(_dataSize), spm(_spm) -{} +{ +foldOBit = (dataSize == 1 && !_machInst.rex.present) ? 1 << 6 : 0; +} std::string generateDisassembly( Addr pc, const Loader::SymbolTable *symtab) const override; diff --git a/src/arch/x86/isa/decoder/x87.isa b/src/arch/x86/isa/decoder/x87.isa index 258fcb5..e7f1747 100644 --- a/src/arch/x86/isa/decoder/x87.isa +++ b/src/arch/x86/isa/decoder/x87.isa @@ -185,7 +185,7 @@ } 0x3: decode MODRM_MOD { 0x3: fcmovnu(); -default: fistp(); +default: Inst::FISTP(Md); } 0x4: decode MODRM_MOD { 0x3: decode MODRM_RM { @@ -203,7 +203,7 @@ default: Inst::FLD80(M); } 0x6: decode MODRM_MOD { -0x3: fcomi(); +0x3: Inst::FCOMI(Rq); default: Inst::UD2(); } 0x7: decode MODRM_MOD { @@ -307,7 +307,7 @@ default: ficomp(); } 0x4: decode MODRM_MOD { -0x3: fsubrp(); +0x3: Inst::FSUBRP(Rq); default: fisub(); } 0x5: decode MODRM_MOD { @@ -344,7 +344,7 @@ } 0x3: decode MODRM_MOD { 0x3: Inst::UD2(); -default: fistp(); +default: Inst::FISTP(Mw); } 0x4: decode MODRM_MOD { 0x3: decode MODRM_RM { @@ -365,7 +365,7 @@ } 0x7: decode MODRM_MOD { 0x3: Inst::UD2(); -default: fistp(); +default: Inst::FISTP(Mq); } } } diff --git a/src/arch/x86/isa/insts/x87/arithmetic/subtraction.py b/src/arch/x86/isa/insts/x87/arithmetic/subtraction.py index 02c41f6..dea1277 100644 --- a/src/arch/x86/isa/insts/x87/arithmetic/subtraction.py +++ b/src/arch/x86/isa/insts/x87/arithmetic/subtraction.py @@ -91,8 +91,12 @@ fault "std::make_shared()" }; +def macroop FSUBRP_R +{ +subfp sti, st(0), sti, spm=1 +}; + # FISUB # FSUBR -# FSUBRP # FISUBR ''' diff --git a/src/arch/x86/isa/insts/x87/compare_and_test/floating_point_ordered_compare.py b/src/arch/x86/isa/insts/x87/compare_and_test/floating_point_ordered_compare.py index 5e03952..cd348cd 100644 --- a/src/arch/x86/isa/insts/x87/compare_and_test/floating_point_ordered_compare.py +++ b/src/arch
[gem5-dev] Change in gem5/gem5[develop]: mem-ruby: Add missing transitions + wakes for Dma events
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/42463 ) Change subject: mem-ruby: Add missing transitions + wakes for Dma events .. mem-ruby: Add missing transitions + wakes for Dma events This also changes one of the wakeUpDependents calls to a wakeUpAllDependentsAddr call to prevent a hang. Change-Id: Ia076414e5c6d9c8c0b2576d1f442195d75d275fc --- M src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index 684d03e..4d24891 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -1119,7 +1119,7 @@ // The exit state is always going to be U, so wakeUpDependents logic should be covered in all the // transitions which are flowing into U. - transition({BL, BS_M, BM_M, B_M, BP, BDW_P, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {DmaRead,DmaWrite}){ + transition({BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, {DmaRead,DmaWrite}){ sd_stallAndWaitRequest; } @@ -1280,6 +1280,7 @@ transition(BDR_M, MemData, U) { mt_writeMemDataToTBE; dd_sendResponseDmaData; +wa_wakeUpAllDependentsAddr; dt_deallocateTBE; pm_popMemQueue; } @@ -1373,7 +1374,7 @@ dd_sendResponseDmaData; // Check for pending requests from the core we put to sleep while waiting // for a response -wa_wakeUpDependents; +wa_wakeUpAllDependentsAddr; dt_deallocateTBE; pt_popTriggerQueue; } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/42463 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ia076414e5c6d9c8c0b2576d1f442195d75d275fc Gerrit-Change-Number: 42463 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: Add insts used in newer libstdc++ rehashing
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/42443 ) Change subject: arch-x86: Add insts used in newer libstdc++ rehashing .. arch-x86: Add insts used in newer libstdc++ rehashing For newer versions of libstdc++ (Like the one in the ubuntu-20.04_all-dependencies docker image), the variables used when rehashing, e.g., std::unordered_maps have been extended. This resulted in the rehashing function using different, unimplemented, instructions. Because these instructions are unimplemented, it resulted in a std::bad_alloc exception when inserting into an unordered_map This patchset implements the following instructions: FCOMI FSUBRP FISTP Change-Id: I85c57acace1f7a547b0a97ec3a0f0500909c5d2a --- M src/arch/x86/isa/decoder/x87.isa M src/arch/x86/isa/insts/x87/arithmetic/subtraction.py M src/arch/x86/isa/insts/x87/compare_and_test/floating_point_ordered_compare.py M src/arch/x86/isa/insts/x87/data_transfer_and_conversion/convert_and_load_or_store_integer.py M src/arch/x86/isa/microops/ldstop.isa 5 files changed, 65 insertions(+), 8 deletions(-) diff --git a/src/arch/x86/isa/decoder/x87.isa b/src/arch/x86/isa/decoder/x87.isa index 258fcb5..c28bc2f 100644 --- a/src/arch/x86/isa/decoder/x87.isa +++ b/src/arch/x86/isa/decoder/x87.isa @@ -185,7 +185,7 @@ } 0x3: decode MODRM_MOD { 0x3: fcmovnu(); -default: fistp(); +default: Inst::FISTP(Md); // 32-bit int } 0x4: decode MODRM_MOD { 0x3: decode MODRM_RM { @@ -203,7 +203,7 @@ default: Inst::FLD80(M); } 0x6: decode MODRM_MOD { -0x3: fcomi(); +0x3: Inst::FCOMI(Rq); default: Inst::UD2(); } 0x7: decode MODRM_MOD { @@ -307,7 +307,10 @@ default: ficomp(); } 0x4: decode MODRM_MOD { -0x3: fsubrp(); +0x3: decode MODRM_RM { +0x1: Inst::FSUBRP(); +default: Inst::FSUBRP(Eq); +} default: fisub(); } 0x5: decode MODRM_MOD { @@ -344,7 +347,7 @@ } 0x3: decode MODRM_MOD { 0x3: Inst::UD2(); -default: fistp(); +default: Inst::FISTP(Mw); // 16-bit int } 0x4: decode MODRM_MOD { 0x3: decode MODRM_RM { @@ -365,7 +368,7 @@ } 0x7: decode MODRM_MOD { 0x3: Inst::UD2(); -default: fistp(); +default: Inst::FISTP(Mq); } } } diff --git a/src/arch/x86/isa/insts/x87/arithmetic/subtraction.py b/src/arch/x86/isa/insts/x87/arithmetic/subtraction.py index 02c41f6..97cdb45 100644 --- a/src/arch/x86/isa/insts/x87/arithmetic/subtraction.py +++ b/src/arch/x86/isa/insts/x87/arithmetic/subtraction.py @@ -91,8 +91,27 @@ fault "std::make_shared()" }; +def macroop FSUBRP +{ +subfp st(1), st(0), st(1), spm=1 +}; + +def macroop FSUBRP_R +{ +subfp sti, st(0), sti, spm=1 +}; + +def macroop FSUBRP_M +{ +fault "std::make_shared()" +}; + +def macroop FSUBRP_P +{ +fault "std::make_shared()" +}; + # FISUB # FSUBR -# FSUBRP # FISUBR ''' diff --git a/src/arch/x86/isa/insts/x87/compare_and_test/floating_point_ordered_compare.py b/src/arch/x86/isa/insts/x87/compare_and_test/floating_point_ordered_compare.py index 5e03952..a3e71e9 100644 --- a/src/arch/x86/isa/insts/x87/compare_and_test/floating_point_ordered_compare.py +++ b/src/arch/x86/isa/insts/x87/compare_and_test/floating_point_ordered_compare.py @@ -37,6 +37,11 @@ # FCOM # FCOMP # FCOMPP -# FCOMI # FCOMIP + +#fcomi +def macroop FCOMI_R { +compfp st(0), sti +}; + ''' diff --git a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/convert_and_load_or_store_integer.py b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/convert_and_load_or_store_integer.py index 1dbe79f..515b98b 100644 --- a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/convert_and_load_or_store_integer.py +++ b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/convert_and_load_or_store_integer.py @@ -50,6 +50,19 @@ }; # FIST -# FISTP + +def macroop FISTP_M { +movfp ufp1, st(0) +stifp87 ufp1, seg, sib, disp +pop87 +}; + +def macroop FISTP_P { +movfp ufp1, st(0) +rdip t7 +stifp87 ufp1, seg, riprel, disp +pop87 +}; + # FISTTP ''' diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa index 79aadfa..0186bc6 100644 --- a/src/arch/x86/isa/microops/ldstop.isa +++ b/src/arch/x86/isa/microops/ldstop.isa @@ -649,6 +649,23 @@ } ''') +defineMicroStoreOp('Stifp87', code=''' +switch (d
[gem5-dev] Change in gem5/gem5[develop]: dev-hsa: Fix size of HSA Queue
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/42423 ) Change subject: dev-hsa: Fix size of HSA Queue .. dev-hsa: Fix size of HSA Queue In the HSAQueueDescriptor ptr function, we mod the index by numElts, but numElts was previously just set to size, which was the raw size of the queue. This lead to indexing past the queue. We fix this by dividing by the size by the AQL packet size to get the actual number of elements the queue can hold. Change-Id: Ie5e699379f303255305c279e58a34dc783df86a0 --- M src/dev/hsa/hsa_packet_processor.hh 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dev/hsa/hsa_packet_processor.hh b/src/dev/hsa/hsa_packet_processor.hh index e79ffb1..8ef5ccd 100644 --- a/src/dev/hsa/hsa_packet_processor.hh +++ b/src/dev/hsa/hsa_packet_processor.hh @@ -84,7 +84,7 @@ uint64_t hri_ptr, uint32_t size) : basePointer(base_ptr), doorbellPointer(db_ptr), writeIndex(0), readIndex(0), -numElts(size), hostReadIndexPtr(hri_ptr), +numElts(size / AQL_PACKET_SIZE), hostReadIndexPtr(hri_ptr), stalledOnDmaBufAvailability(false), dmaInProgress(false) { } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/42423 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ie5e699379f303255305c279e58a34dc783df86a0 Gerrit-Change-Number: 42423 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Explicitly set driver to NULL in constructor
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/41973 ) Change subject: gpu-compute: Explicitly set driver to NULL in constructor .. gpu-compute: Explicitly set driver to NULL in constructor We have a fail_if in attachDriver to prevent driver from being overwritten. However, the fail_if only checks for if the driver is not NULL. Previously in some cases, driver was set to garbage, which made the fail_if trip the first time we were assigning the driver. This patch explicitly sets driver to NULL in the constructor, thus ensuring that it will be NULL the first time we call attachDriver Change-Id: I325f6033e785025a912e3af3888c66cee0332f40 --- M src/gpu-compute/gpu_command_processor.cc 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index da21076..4901a93 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -42,7 +42,7 @@ #include "sim/syscall_emul_buf.hh" GPUCommandProcessor::GPUCommandProcessor(const Params &p) -: HSADevice(p), dispatcher(*p.dispatcher) +: HSADevice(p), dispatcher(*p.dispatcher), driver(NULL) { dispatcher.setCommandProcessor(this); } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/41973 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I325f6033e785025a912e3af3888c66cee0332f40 Gerrit-Change-Number: 41973 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Fix accidental execution when stopped at barrier
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/41573 ) Change subject: gpu-compute: Fix accidental execution when stopped at barrier .. gpu-compute: Fix accidental execution when stopped at barrier Due the compute unit pipeline being executed in reverse order, there exists a scenario where a compute unit will execute an extra instruction when it's supposed to be stopped at a barrier. It occurs as follows: * The ScheduleStage sets a barrier instruction ready to execute. * The ScoreboardCheckStage adds another instruction to the readyList. This is where the barrier is checked, but because the barrier isn't executing yet, the instruction can be passed along to ScheduleStage * The barrier executes, and stalls * The ScheduleStage sees that there's a new instruction and schedules it to be executed. * Only now will the ScoreboardCheckStage realize a barrier is active and stall accordingly * The subsequent instruction executes This patch checks for barrier status in the ScheduleStage to prevent an instruction from being scheduled when there is a barrier active. Change-Id: Ib683e2c68f361d7ee60a3beaf53b4b6c888c9f8d --- M src/gpu-compute/schedule_stage.cc 1 file changed, 15 insertions(+), 0 deletions(-) diff --git a/src/gpu-compute/schedule_stage.cc b/src/gpu-compute/schedule_stage.cc index 8a2ea18..5c51e76 100644 --- a/src/gpu-compute/schedule_stage.cc +++ b/src/gpu-compute/schedule_stage.cc @@ -106,6 +106,21 @@ wIt++; } } +/** + * Remove any wave that's at a barrier. Due to backwards execution + * of the pipeline, the ScoreboardCheckStage can mark an instruction + * as ready immediately before a barrier executes, which would then + * be executed when the barrier is active without this check. + **/ +for (auto wIt = fromScoreboardCheck.readyWFs(j).begin(); + wIt != fromScoreboardCheck.readyWFs(j).end();) { +if ((*wIt)->getStatus() == Wavefront::S_BARRIER) { +*wIt = nullptr; +wIt = fromScoreboardCheck.readyWFs(j).erase(wIt); +} else { +wIt++; +} +} } // Attempt to add another wave for each EXE type to schList queues -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/41573 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ib683e2c68f361d7ee60a3beaf53b4b6c888c9f8d Gerrit-Change-Number: 41573 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Fix sign extension for branches with multiplied offset
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/41053 ) Change subject: arch-gcn3: Fix sign extension for branches with multiplied offset .. arch-gcn3: Fix sign extension for branches with multiplied offset Certain branch instructions specify that the result of (simm16 * 4) gets sign-extended before being added to the PC. Previously, that result was being sign extended as if it was still a 16-bit number. This patch fixes that by having the result be sign extended as an 18-bit number. Change-Id: Id4d430f8daa71ca7910b570e7e39790626f1decf Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/41053 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- M src/arch/gcn3/insts/instructions.cc 1 file changed, 7 insertions(+), 7 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 03b11ab..29de1a8 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -3900,7 +3900,7 @@ Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; wf->pc(pc); } @@ -3946,7 +3946,7 @@ scc.read(); if (!scc.rawData()) { -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; } wf->pc(pc); @@ -3975,7 +3975,7 @@ scc.read(); if (scc.rawData()) { -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; } wf->pc(pc); @@ -4005,7 +4005,7 @@ vcc.read(); if (!vcc.rawData()) { -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; } wf->pc(pc); @@ -4035,7 +4035,7 @@ if (vcc.rawData()) { Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; wf->pc(pc); } } @@ -4060,7 +4060,7 @@ if (wf->execMask().none()) { Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; wf->pc(pc); } } @@ -4085,7 +4085,7 @@ if (wf->execMask().any()) { Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; wf->pc(pc); } } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/41053 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Id4d430f8daa71ca7910b570e7e39790626f1decf Gerrit-Change-Number: 41053 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alexandru Duțu Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Fix sign extension for branches with multiplied offset
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/41053 ) Change subject: arch-gcn3: Fix sign extension for branches with multiplied offset .. arch-gcn3: Fix sign extension for branches with multiplied offset Certain branch instructions specify that the result of (simm16 * 4) gets sign-extended before being added to the PC. Previously, that result was being sign extended as if it was still a 16-bit number. This patch fixes that by having the result be sign extended as an 18-bit number. Change-Id: Id4d430f8daa71ca7910b570e7e39790626f1decf --- M src/arch/gcn3/insts/instructions.cc 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 03b11ab..29de1a8 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -3900,7 +3900,7 @@ Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; wf->pc(pc); } @@ -3946,7 +3946,7 @@ scc.read(); if (!scc.rawData()) { -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; } wf->pc(pc); @@ -3975,7 +3975,7 @@ scc.read(); if (scc.rawData()) { -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; } wf->pc(pc); @@ -4005,7 +4005,7 @@ vcc.read(); if (!vcc.rawData()) { -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; } wf->pc(pc); @@ -4035,7 +4035,7 @@ if (vcc.rawData()) { Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; wf->pc(pc); } } @@ -4060,7 +4060,7 @@ if (wf->execMask().none()) { Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; wf->pc(pc); } } @@ -4085,7 +4085,7 @@ if (wf->execMask().any()) { Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; +pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; wf->pc(pc); } } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/41053 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Id4d430f8daa71ca7910b570e7e39790626f1decf Gerrit-Change-Number: 41053 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: dev-hsa: Add missing include to hsa_driver.hh
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/40216 ) Change subject: dev-hsa: Add missing include to hsa_driver.hh .. dev-hsa: Add missing include to hsa_driver.hh Due to using ThreadContext::Suspended in hsa_driver.hh as of 965ad12b9a4ae4035b0f63e7ab083ac87258a071, we now need to include cpu/thread_context.hh. This change fixes that. Change-Id: I2c6882f2a29ca1638dd34cda42874b95cafbe548 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/40216 Reviewed-by: Matt Sinclair Reviewed-by: Gabe Black Maintainer: Matt Sinclair Tested-by: kokoro --- M src/dev/hsa/hsa_driver.hh 1 file changed, 1 insertion(+), 1 deletion(-) Approvals: Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved Gabe Black: Looks good to me, approved kokoro: Regressions pass diff --git a/src/dev/hsa/hsa_driver.hh b/src/dev/hsa/hsa_driver.hh index fc8131e..616ec94 100644 --- a/src/dev/hsa/hsa_driver.hh +++ b/src/dev/hsa/hsa_driver.hh @@ -54,12 +54,12 @@ #include #include "base/types.hh" +#include "cpu/thread_context.hh" #include "sim/emul_driver.hh" struct HSADriverParams; class HSADevice; class PortProxy; -class ThreadContext; class HSADriver : public EmulatedDriver { -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/40216 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I2c6882f2a29ca1638dd34cda42874b95cafbe548 Gerrit-Change-Number: 40216 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alexandru Duțu Gerrit-Reviewer: Gabe Black Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: dev-hsa: Add missing include to hsa_driver.hh
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/40216 ) Change subject: dev-hsa: Add missing include to hsa_driver.hh .. dev-hsa: Add missing include to hsa_driver.hh Due to using ThreadContext::Suspended in hsa_driver.hh as of 965ad12b9a4ae4035b0f63e7ab083ac87258a071, we now need to include cpu/thread_context.hh. This change fixes that. Change-Id: I2c6882f2a29ca1638dd34cda42874b95cafbe548 --- M src/dev/hsa/hsa_driver.hh 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dev/hsa/hsa_driver.hh b/src/dev/hsa/hsa_driver.hh index fc8131e..616ec94 100644 --- a/src/dev/hsa/hsa_driver.hh +++ b/src/dev/hsa/hsa_driver.hh @@ -54,12 +54,12 @@ #include #include "base/types.hh" +#include "cpu/thread_context.hh" #include "sim/emul_driver.hh" struct HSADriverParams; class HSADevice; class PortProxy; -class ThreadContext; class HSADriver : public EmulatedDriver { -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/40216 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I2c6882f2a29ca1638dd34cda42874b95cafbe548 Gerrit-Change-Number: 40216 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: dev-hsa: enable interruptible hsa signal support
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/38335 ) Change subject: dev-hsa: enable interruptible hsa signal support .. dev-hsa: enable interruptible hsa signal support Event creation and management support from emulated drivers is required to support interruptible signals in HSA and this support was not available. This changeset adds the event creation and management support in the emulated driver. With this patch, each interruptible signal created by the HSA runtime is associated with a signal event. The HSA runtime can then put a thread waiting on a signal condition to sleep asking the driver to monitor the event associated with that signal. If the signal is modified by the GPU, the dispatcher notifies the driver about signal value change. If the modifier is a CPU thread, the thread will have to make HSA API calls to modify the signal and these API calls will notify the driver about signal value change. Once the driver is notified about a change in the signal value, the driver checks to see if any thread is sleeping on that signal and wake up the sleeping thread associated with that event. The driver has also implemented the time_out wakeup that can wake up the thread after a certain time period has expired. This is also true for barrier packets. Each signal has an event address in a kernel managed and allocated event page that can be used as a mailbox pointer to notify an event. However, this feature used by non-CPU agents to communicate with the driver is not implemented by this changeset because the non-CPU HSA agents in our model can directly communicate with driver in our implementation. Having said that, adding that feature should be trivial because the event address and event pages are correctly setup by this changeset and just adding the event page's virtual address to our PIO doorbell interface in the page tables and registering that pio address to the driver should be sufficient. Managing mailbox pointer for an event is based on event ID and using this event ID as an index into event page, this changeset already provides a unique mailbox pointer for each event. Change-Id: Ic62794076ddd47526b1f952fdb4c1bad632bdd2e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38335 Reviewed-by: Jason Lowe-Power Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- M configs/example/apu_se.py M src/dev/hsa/hsa_device.hh M src/dev/hsa/hsa_driver.cc M src/dev/hsa/hsa_driver.hh M src/dev/hsa/hsa_packet_processor.cc M src/dev/hsa/hsa_packet_processor.hh A src/dev/hsa/hsa_signal.hh M src/dev/hsa/hw_scheduler.cc A src/dev/hsa/kfd_event_defines.h M src/gpu-compute/dispatcher.cc M src/gpu-compute/gpu_command_processor.cc M src/gpu-compute/gpu_command_processor.hh M src/gpu-compute/gpu_compute_driver.cc M src/gpu-compute/gpu_compute_driver.hh M src/sim/emul_driver.hh 15 files changed, 545 insertions(+), 70 deletions(-) Approvals: Jason Lowe-Power: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index 7edc733..feed8a7 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -470,7 +470,7 @@ "/usr/lib/x86_64-linux-gnu" ]), 'HOME=%s' % os.getenv('HOME','/'), - "HSA_ENABLE_INTERRUPT=0"] + "HSA_ENABLE_INTERRUPT=1"] process = Process(executable = executable, cmd = [options.cmd] + options.options.split(), drivers = [gpu_driver], env = env) diff --git a/src/dev/hsa/hsa_device.hh b/src/dev/hsa/hsa_device.hh index 68cbd82..6f981d6 100644 --- a/src/dev/hsa/hsa_device.hh +++ b/src/dev/hsa/hsa_device.hh @@ -43,10 +43,13 @@ #include "dev/hsa/hsa_packet_processor.hh" #include "params/HSADevice.hh" +class HSADriver; + class HSADevice : public DmaDevice { public: typedef HSADeviceParams Params; +typedef std::function HsaSignalCallbackFunction; HSADevice(const Params &p) : DmaDevice(p), hsaPP(p.hsapp) { @@ -92,7 +95,21 @@ { fatal("%s does not accept vendor specific packets\n", name()); } - +virtual void +attachDriver(HSADriver *driver) +{ +fatal("%s does not need HSA driver\n", name()); +} +virtual void +updateHsaSignal(Addr signal_handle, uint64_t signal_value) +{ +fatal("%s does not have HSA signal update functionality.\n", name()); +} +virtual uint64_t +functionalReadHsaSignal(Addr signal_handle) +{ +fatal("%s does not have HSA signal read functionality.\n", name()); +} void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay = 0); void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *cb, diff --git a/src/dev/hsa/hsa_driver.cc
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: Make JRCX instruction do 64-bit jump
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/40195 ) Change subject: arch-x86: Make JRCX instruction do 64-bit jump .. arch-x86: Make JRCX instruction do 64-bit jump Per the AMD64 Architecture Programming Manual: The size of the count register (CX, ECX, or RCX) depends on the address-size attribute of the JrCXZ instruction. Therefore, JRCXZ can only be executed in 64-bit mode and In 64-bit mode, the operand size defaults to 64 bits. The processor sign-extends the 8-bit displacement value to 64 bits before adding it to the RIP. Change-Id: Id55147d0602ff41ad6aaef483bef722ff56cae62 --- M src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py 1 file changed, 2 insertions(+), 0 deletions(-) diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py index 390a08b..420d55b 100644 --- a/src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py +++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/conditional_jump.py @@ -212,6 +212,8 @@ def macroop JRCX_I { +# Make the default data size of jumps 64 bits in 64 bit mode +.adjust_env oszIn64Override .control_direct rdip t1 -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/40195 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Id55147d0602ff41ad6aaef483bef722ff56cae62 Gerrit-Change-Number: 40195 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Simplify LGKM decrementing for Flat instructions
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/39396 ) Change subject: gpu-compute: Simplify LGKM decrementing for Flat instructions .. gpu-compute: Simplify LGKM decrementing for Flat instructions This commit makes it so LGKM count is decremented in a single place (after completeAcc), which fixes a couple of potential bugs 1. Data is only written by completeAcc, not after initiateAcc. LGKM count is supposed to be decremented after data is written. 2. LGKM count is now properly decremented for atomics without return Change-Id: Ic791af3b42e04f7baaa0ce50cb2a2c6286c54f5a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/39396 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- M src/gpu-compute/global_memory_pipeline.cc 1 file changed, 1 insertion(+), 5 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index 48f767b..2f251e8 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -130,7 +130,7 @@ DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n", m->cu_id, m->simdId, m->wfSlotId, m->disassemble()); m->completeAcc(m); -if (m->isFlat() && (m->isLoad() || m->isAtomicRet())) { +if (m->isFlat()) { w->decLGKMInstsIssued(); } w->decVMemInstsIssued(); @@ -196,10 +196,6 @@ mp->disassemble(), mp->seqNum()); mp->initiateAcc(mp); -if (mp->isFlat() && mp->isStore()) { -mp->wavefront()->decLGKMInstsIssued(); -} - if (mp->isStore() && mp->isGlobalSeg()) { mp->wavefront()->decExpInstsIssued(); } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/39396 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ic791af3b42e04f7baaa0ce50cb2a2c6286c54f5a Gerrit-Change-Number: 39396 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Simplify LGKM decrementing for Flat instructions
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/39396 ) Change subject: gpu-compute: Simplify LGKM decrementing for Flat instructions .. gpu-compute: Simplify LGKM decrementing for Flat instructions This commit makes it so LGKM count is decremented in a single place (after completeAcc), which fixes a couple of potential bugs 1. Data is only written by completeAcc, not after initiateAcc. LGKM count is supposed to be decremented after data is written. 2. LGKM count is now properly decremented for atomics without return Change-Id: Ic791af3b42e04f7baaa0ce50cb2a2c6286c54f5a --- M src/gpu-compute/global_memory_pipeline.cc 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index 48f767b..2f251e8 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -130,7 +130,7 @@ DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n", m->cu_id, m->simdId, m->wfSlotId, m->disassemble()); m->completeAcc(m); -if (m->isFlat() && (m->isLoad() || m->isAtomicRet())) { +if (m->isFlat()) { w->decLGKMInstsIssued(); } w->decVMemInstsIssued(); @@ -196,10 +196,6 @@ mp->disassemble(), mp->seqNum()); mp->initiateAcc(mp); -if (mp->isFlat() && mp->isStore()) { -mp->wavefront()->decLGKMInstsIssued(); -} - if (mp->isStore() && mp->isGlobalSeg()) { mp->wavefront()->decExpInstsIssued(); } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/39396 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ic791af3b42e04f7baaa0ce50cb2a2c6286c54f5a Gerrit-Change-Number: 39396 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Fix LGKM decrementing for flat atomic insts
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/39155 ) Change subject: gpu-compute: Fix LGKM decrementing for flat atomic insts .. gpu-compute: Fix LGKM decrementing for flat atomic insts A prior commit (f6ec145fc0) fixed early LGKM decrementing for flat loads and stores, but failed to address flat atomics. Per the GCN3 ISA, LGKM count is decremented on flat atomics with return when the data has been returned. This patch checks if the flat instruction is an atomic with return, and decrements LGKM count if so. Change-Id: I5c0c2c205a8b21327d4c42ba71c59842c15bd63b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/39155 Reviewed-by: Matthew Poremba Reviewed-by: Matt Sinclair Maintainer: Matthew Poremba Tested-by: kokoro --- M src/gpu-compute/global_memory_pipeline.cc 1 file changed, 1 insertion(+), 1 deletion(-) Approvals: Matthew Poremba: Looks good to me, approved; Looks good to me, approved Matt Sinclair: Looks good to me, approved kokoro: Regressions pass diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index a2b24e4..f6d60cf 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -130,7 +130,7 @@ DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n", m->cu_id, m->simdId, m->wfSlotId, m->disassemble()); m->completeAcc(m); -if (m->isFlat() && m->isLoad()) { +if (m->isFlat() && (m->isLoad() || m->isAtomicRet())) { w->decLGKMInstsIssued(); } w->decVMemInstsIssued(); -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/39155 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I5c0c2c205a8b21327d4c42ba71c59842c15bd63b Gerrit-Change-Number: 39155 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Fix LGKM decrementing for flat atomic insts
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/39155 ) Change subject: gpu-compute: Fix LGKM decrementing for flat atomic insts .. gpu-compute: Fix LGKM decrementing for flat atomic insts A prior commit (f6ec145fc0) fixed early LGKM decrementing for flat loads and stores, but failed to address flat atomics. Per the GCN3 ISA, LGKM count is decremented on flat atomics with return when the data has been returned. This patch checks if the flat instruction is an atomic with return, and decrements LGKM count if so. Change-Id: I5c0c2c205a8b21327d4c42ba71c59842c15bd63b --- M src/gpu-compute/global_memory_pipeline.cc 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index a2b24e4..f6d60cf 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -130,7 +130,7 @@ DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n", m->cu_id, m->simdId, m->wfSlotId, m->disassemble()); m->completeAcc(m); -if (m->isFlat() && m->isLoad()) { +if (m->isFlat() && (m->isLoad() || m->isAtomicRet())) { w->decLGKMInstsIssued(); } w->decVMemInstsIssued(); -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/39155 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I5c0c2c205a8b21327d4c42ba71c59842c15bd63b Gerrit-Change-Number: 39155 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Fix FLAT insts decrementing lgkm count early
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/38696 ) Change subject: gpu-compute: Fix FLAT insts decrementing lgkm count early .. gpu-compute: Fix FLAT insts decrementing lgkm count early FLAT instructions used to decrement lgkm count on execute, while the GCN3 ISA specifies that lgkm count should be decremented on data being returned or data being written. This patch changes it so that lgkm is decremented after initiateAcc (for stores) and after completeAcc (for loads) to better reflect the ISA definition. This fixes a bug where waitcnts would be satisfied even though the memory access wasn't completed, which lead to instructions using the wrong data. Change-Id: I596cb031af9cda8d47a1b5e146e4a4ffd793d36c --- M src/gpu-compute/global_memory_pipeline.cc M src/gpu-compute/gpu_dyn_inst.cc 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index bcd93f8..e71f1a9 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -130,6 +130,9 @@ DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n", m->cu_id, m->simdId, m->wfSlotId, m->disassemble()); m->completeAcc(m); +if (m->isFlat()) { +w->decLGKMInstsIssued(); +} w->decVMemInstsIssued(); if (m->isLoad() || m->isAtomicRet()) { diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc index 03ed689..38e4ecf 100644 --- a/src/gpu-compute/gpu_dyn_inst.cc +++ b/src/gpu-compute/gpu_dyn_inst.cc @@ -819,7 +819,6 @@ if (executedAs() == Enums::SC_GLOBAL) { // no transormation for global segment wavefront()->execUnitId = wavefront()->flatGmUnitId; -wavefront()->decLGKMInstsIssued(); if (isLoad()) { wavefront()->rdLmReqsInPipe--; } else if (isStore()) { -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/38696 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I596cb031af9cda8d47a1b5e146e4a4ffd793d36c Gerrit-Change-Number: 38696 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: dev-hsa: enable interruptible hsa signal support
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/38335 ) Change subject: dev-hsa: enable interruptible hsa signal support .. dev-hsa: enable interruptible hsa signal support Event creation and management support from emulated drivers is required to support interruptible signals in HSA and this support was not available. This changeset adds the event creation and management support in the emulated driver. With this patch, each interruptible signal created by the HSA runtime is associated with a signal event. The HSA runtime can then put a thread waiting on a signal condition to sleep asking the driver to monitor the event associated with that signal. If the signal is modified by the GPU, the dispatcher notifies the driver about signal value change. If the modifier is a CPU thread, the thread will have to make HSA API calls to modify the signal and these API calls will notify the driver about signal value change. Once the driver is notified about a change in the signal value, the driver checks to see if any thread is sleeping on that signal and wake up the sleeping thread associated with that event. The driver has also implemented the time_out wakeup that can wake up the thread after a certain time period has expired. This is also true for barrier packets. This changeset also fixes a bug in the mmap syscall. Each signal has an event address in a kernel managed and allocated event page that can be used as a mailbox pointer to notify an event. However, this feature used by non-CPU agents to communicate with the driver is not implemented by this changeset because the non-CPU HSA agents in our model can directly communicate with driver in our implementation. Having said that, adding that feature should be trivial because the event address and event pages are correctly setup by this changeset and just adding the event page's virtual address to our PIO doorbell interface in the page tables and registering that pio address to the driver should be sufficient. Managing mailbox pointer for an event is based on event ID and using this event ID as an index into event page, this changeset already provides a unique mailbox pointer for each event. Change-Id: Ic62794076ddd47526b1f952fdb4c1bad632bdd2e --- M configs/example/apu_se.py M src/dev/hsa/hsa_device.hh M src/dev/hsa/hsa_driver.cc M src/dev/hsa/hsa_driver.hh M src/dev/hsa/hsa_packet_processor.cc M src/dev/hsa/hsa_packet_processor.hh A src/dev/hsa/hsa_signal.hh M src/dev/hsa/hw_scheduler.cc A src/dev/hsa/kfd_event_defines.h M src/gpu-compute/dispatcher.cc M src/gpu-compute/gpu_command_processor.cc M src/gpu-compute/gpu_command_processor.hh M src/gpu-compute/gpu_compute_driver.cc M src/gpu-compute/gpu_compute_driver.hh M src/sim/emul_driver.hh 15 files changed, 515 insertions(+), 70 deletions(-) diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index 14f7163..aadf1ec 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -461,7 +461,7 @@ "/usr/lib/x86_64-linux-gnu" ]), 'HOME=%s' % os.getenv('HOME','/'), - "HSA_ENABLE_INTERRUPT=0"] + "HSA_ENABLE_INTERRUPT=1"] process = Process(executable = executable, cmd = [options.cmd] + options.options.split(), drivers = [gpu_driver], env = env) diff --git a/src/dev/hsa/hsa_device.hh b/src/dev/hsa/hsa_device.hh index 68cbd82..e9a5fd1 100644 --- a/src/dev/hsa/hsa_device.hh +++ b/src/dev/hsa/hsa_device.hh @@ -43,10 +43,13 @@ #include "dev/hsa/hsa_packet_processor.hh" #include "params/HSADevice.hh" +class HSADriver; + class HSADevice : public DmaDevice { public: typedef HSADeviceParams Params; +typedef std::function HsaSignalCallbackFunction; HSADevice(const Params &p) : DmaDevice(p), hsaPP(p.hsapp) { @@ -92,7 +95,22 @@ { fatal("%s does not accept vendor specific packets\n", name()); } - +virtual void +attachDriver(HSADriver *driver) +{ +fatal("%s does not need HSA driver\n", name()); +} +virtual void +updateHsaSignal(Addr signal_handle, uint64_t signal_value, +HsaSignalCallbackFunction function = [ = ] (const uint64_t &) { }) +{ +fatal("%s does not have HSA signal update functionality.\n", name()); +} +virtual uint64_t +functionalReadHsaSignal(Addr signal_handle) +{ +fatal("%s does not have HSA signal read functionality.\n", name()); +} void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay = 0); void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *cb, diff --git a/src/dev/hsa/hsa_driver.cc b/src/dev/hsa/hsa_driver.cc index 190213a..40330b7 100644 --- a/src/dev/hsa/hsa_driver.cc +++ b/src/dev/hsa/hsa_driver.cc @@ -39,6 +39,8 @@ #include "cpu/thread_context.hh" #include "debug/HSA
[gem5-dev] Change in gem5/gem5[develop]: util: Update ROCm to 1.6.4 in gcn Dockerfile, install HIP by .deb
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/37675 ) Change subject: util: Update ROCm to 1.6.4 in gcn Dockerfile, install HIP by .deb .. util: Update ROCm to 1.6.4 in gcn Dockerfile, install HIP by .deb Previously, we were using ROCm 1.6.2 as there were issues with some of the machine learning applications that weren't present on 1.6.2. However, after re-running them we've found that they, and all other applications previously tested, run to completion. Additionally, there have been patches to enable BLIT kernels which made it so we no longer need to build HIP and MIOpen differently for APU and DGPU code. This allows us to install HIP directly from the .deb packages instead of from source. Installing from the .deb packages also avoid the hipDeviceSynchronize() bug. Finally, this makes it so most GPU programs can be run as-is without modifications to remove hipMalloc/hipMemcpy calls as was done previously. Change-Id: Ic61b09ed200b19f759d891487cde874abd607537 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37675 Reviewed-by: Bobby R. Bruce Maintainer: Bobby R. Bruce Tested-by: kokoro --- M util/dockerfiles/gcn-gpu/Dockerfile 1 file changed, 10 insertions(+), 14 deletions(-) Approvals: Bobby R. Bruce: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index e13367f..dad41b92 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -54,20 +54,21 @@ ARG gem5_dist=http://dist.gem5.org/dist/develop # Install ROCm 1.6 binaries -RUN wget -qO- ${gem5_dist}/apt_1.6.2.tar.bz2 \ +RUN wget -qO- ${gem5_dist}/apt_1.6.4.tar.bz2 \ | tar -xjv \ -&& cd apt_1.6.2/pool/main/ \ +&& cd apt_1.6.4/pool/main/ \ && dpkg -i h/hsakmt-roct-dev/* \ && dpkg -i h/hsa-ext-rocr-dev/* \ && dpkg -i h/hsa-rocr-dev/* \ && dpkg -i r/rocm-utils/* \ && dpkg -i h/hcc/* \ && dpkg -i r/rocm-opencl/* \ -&& dpkg -i r/rocm-opencl-dev/* +&& dpkg -i r/rocm-opencl-dev/* \ +&& dpkg -i h/hip_base/* \ +&& dpkg -i h/hip_hcc/* # Get ROCm libraries we need to compile from source (and ROCm-profiler) -RUN git clone --single-branch https://github.com/ROCm-Developer-Tools/HIP/ && \ -git clone --single-branch https://github.com/ROCmSoftwarePlatform/hipBLAS/ && \ +RUN git clone --single-branch https://github.com/ROCmSoftwarePlatform/hipBLAS/ && \ git clone --single-branch https://github.com/ROCmSoftwarePlatform/rocBLAS/ && \ git clone --single-branch https://github.com/ROCmSoftwarePlatform/MIOpenGEMM/ && \ git clone --single-branch https://github.com/ROCmSoftwarePlatform/MIOpen/ && \ @@ -77,12 +78,10 @@ # Apply patches to various repos RUN mkdir -p /patch && cd /patch && \ wget ${gem5_dist}/rocm_patches/hipBLAS.patch && \ -wget ${gem5_dist}/rocm_patches/hip.patch_v2 && \ wget ${gem5_dist}/rocm_patches/miopen-conv.patch && \ wget ${gem5_dist}/rocm_patches/rocBLAS.patch -RUN git -C /HIP/ checkout 0e3d824e && git -C /HIP/ apply /patch/hip.patch_v2 && \ -git -C /hipBLAS/ checkout ee57787e && git -C /hipBLAS/ apply /patch/hipBLAS.patch && \ +RUN git -C /hipBLAS/ checkout ee57787e && git -C /hipBLAS/ apply /patch/hipBLAS.patch && \ git -C /rocBLAS/ checkout cbff4b4e && git -C /rocBLAS/ apply /patch/rocBLAS.patch && \ git -C /rocm-cmake/ checkout 12670acb && \ git -C /MIOpenGEMM/ checkout 9547fb9e && \ @@ -97,17 +96,13 @@ ENV HCC_AMDGPU_TARGET gfx801 # Create build dirs for machine learning ROCm installs -RUN mkdir -p /HIP/build && \ -mkdir -p /rocBLAS/build && \ +RUN mkdir -p /rocBLAS/build && \ mkdir -p /hipBLAS/build && \ mkdir -p /rocm-cmake/build && \ mkdir -p /MIOpenGEMM/build && \ mkdir -p /MIOpen/build # Do the builds, empty build dir to trim image size -WORKDIR /HIP/build -RUN cmake .. && make -j$(nproc) && make install && rm -rf * - WORKDIR /rocBLAS/build RUN CXX=/opt/rocm/bin/hcc cmake -DCMAKE_CXX_FLAGS="--amdgpu-target=gfx801" .. && \ make -j$(nproc) && make install && rm -rf * @@ -144,7 +139,7 @@ -DMIOPEN_CACHE_DIR=/.cache/miopen \ -DMIOPEN_AMDGCN_ASSEMBLER_PATH=/opt/rocm/opencl/bin \ -DHALF_INCLUDE_DIR=/MIOpen/half-1.12.0/include \ --DCMAKE_CXX_FLAGS="-isystem /usr/include/x86_64-linux-gnu" .. && \ +-DCMAKE_CXX_FLAGS="-isystem /usr/include/x86_64-linux-gnu -DDGPU" .. && \ make -j$(nproc) && make install && rm -rf * # Re-set defaults @@ -166,4 +161,5 @@ # Always use python3 and create a link to config command for gem5 to find RUN ln -sf /usr/bin/python3 /usr/bin/python RUN ln -sf /usr/bin/python3.9-config /usr/bin/python3-config + WORKDIR / -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37675 To unsubscribe, or f
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Use dict.get syntax for accessing buildEnv keys
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/38135 ) Change subject: gpu-compute: Use dict.get syntax for accessing buildEnv keys .. gpu-compute: Use dict.get syntax for accessing buildEnv keys 37775 removed SmartDict, which is the type buildEnv used to be. Because of that change, doing buildEnv[key] with a key not in the dict returns KeyError instead of False. By using buildEnv(key, False), we are able to return False when the key isn't in the dict. Change-Id: I4aae29b95b082efb2b021f21d608f9cd1c196379 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38135 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Maintainer: Matthew Poremba Tested-by: kokoro --- M src/gpu-compute/X86GPUTLB.py 1 file changed, 2 insertions(+), 2 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved; Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/gpu-compute/X86GPUTLB.py b/src/gpu-compute/X86GPUTLB.py index fee9b9a..7e5d932 100644 --- a/src/gpu-compute/X86GPUTLB.py +++ b/src/gpu-compute/X86GPUTLB.py @@ -36,7 +36,7 @@ from m5.objects.ClockedObject import ClockedObject from m5.SimObject import SimObject -if buildEnv['FULL_SYSTEM']: +if buildEnv.get('FULL_SYSTEM', False): class X86PagetableWalker(SimObject): type = 'X86PagetableWalker' cxx_class = 'X86ISA::Walker' @@ -50,7 +50,7 @@ size = Param.Int(64, "TLB size (number of entries)") assoc = Param.Int(64, "TLB associativity") -if buildEnv['FULL_SYSTEM']: +if buildEnv.get('FULL_SYSTEM', False): walker = Param.X86PagetableWalker(X86PagetableWalker(), "page table walker") -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/38135 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I4aae29b95b082efb2b021f21d608f9cd1c196379 Gerrit-Change-Number: 38135 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Bobby R. Bruce Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Add exp_cnt tracking for buffer store instructions
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/37555 ) Change subject: gpu-compute: Add exp_cnt tracking for buffer store instructions .. gpu-compute: Add exp_cnt tracking for buffer store instructions exp_cnt (expInstsIssued in the code) is used in the waitcnt instruction to track that data has been read out of VGPRs in previous global memory instructions, making it safe to overwrite the VGPRs used in said global memory instructions. Previously, exp_cnt wasn't being tracked at all, which lead to the waitcnt finishing immediately, leading to the memory instruction's VPGRs getting overwritten by subsequent instructions, causing errors. This patch makes it so waitcnts waiting on exp_cnt will wait for MUBUF buffer store instructions to read their VGPRs before completing Change-Id: Idd2b59511bc086cf316217da27b7a228272b0b0f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37555 Reviewed-by: Matt Sinclair Reviewed-by: Alexandru Duțu Maintainer: Matt Sinclair Tested-by: kokoro --- M src/gpu-compute/global_memory_pipeline.cc M src/gpu-compute/schedule_stage.cc 2 files changed, 7 insertions(+), 0 deletions(-) Approvals: Alexandru Duțu: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index 01f986c..bcd93f8 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -193,6 +193,10 @@ mp->disassemble(), mp->seqNum()); mp->initiateAcc(mp); +if (mp->isStore() && mp->isGlobalSeg()) { +mp->wavefront()->decExpInstsIssued(); +} + if (((mp->isMemSync() && !mp->isEndOfKernel()) | | !mp->isMemSync())) { /** * if we are not in out-of-order data delivery mode diff --git a/src/gpu-compute/schedule_stage.cc b/src/gpu-compute/schedule_stage.cc index 851cca8..54e9313 100644 --- a/src/gpu-compute/schedule_stage.cc +++ b/src/gpu-compute/schedule_stage.cc @@ -144,6 +144,9 @@ wf->incLGKMInstsIssued(); } } +if (gpu_dyn_inst->isStore() && gpu_dyn_inst->isGlobalSeg()) { +wf->incExpInstsIssued(); +} } } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37555 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Idd2b59511bc086cf316217da27b7a228272b0b0f Gerrit-Change-Number: 37555 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alexandru Duțu Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-CC: Anthony Gutierrez Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Use dict.get syntax for accessing buildEnv keys
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/38135 ) Change subject: gpu-compute: Use dict.get syntax for accessing buildEnv keys .. gpu-compute: Use dict.get syntax for accessing buildEnv keys 37775 removed SmartDict, which is the type buildEnv used to be. Because of that change, doing buildEnv[key] with a key not in the dict returns KeyError instead of False. By using buildEnv(key, False), we are able to return False when the key isn't in the dict. Change-Id: I4aae29b95b082efb2b021f21d608f9cd1c196379 --- M src/gpu-compute/X86GPUTLB.py 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gpu-compute/X86GPUTLB.py b/src/gpu-compute/X86GPUTLB.py index fee9b9a..7e5d932 100644 --- a/src/gpu-compute/X86GPUTLB.py +++ b/src/gpu-compute/X86GPUTLB.py @@ -36,7 +36,7 @@ from m5.objects.ClockedObject import ClockedObject from m5.SimObject import SimObject -if buildEnv['FULL_SYSTEM']: +if buildEnv.get('FULL_SYSTEM', False): class X86PagetableWalker(SimObject): type = 'X86PagetableWalker' cxx_class = 'X86ISA::Walker' @@ -50,7 +50,7 @@ size = Param.Int(64, "TLB size (number of entries)") assoc = Param.Int(64, "TLB associativity") -if buildEnv['FULL_SYSTEM']: +if buildEnv.get('FULL_SYSTEM', False): walker = Param.X86PagetableWalker(X86PagetableWalker(), "page table walker") -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/38135 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I4aae29b95b082efb2b021f21d608f9cd1c196379 Gerrit-Change-Number: 38135 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: util: Update ROCm to 1.6.4 in gcn Dockerfile, install HIP by .deb
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/37675 ) Change subject: util: Update ROCm to 1.6.4 in gcn Dockerfile, install HIP by .deb .. util: Update ROCm to 1.6.4 in gcn Dockerfile, install HIP by .deb Previously, we were using ROCm 1.6.2 as there were issues with some of the machine learning applications that weren't present on 1.6.2. However, after re-running them we've found that they, and all other applications previously tested, run to completion. Additionally, there have been patches to enable BLIT kernels which made it so we no longer need to build HIP and MIOpen differently for APU and DGPU code. This allows us to install HIP directly from the .deb packages instead of from source. Installing from the .deb packages also avoid the hipDeviceSynchronize() bug. Finally, this makes it so most GPU programs can be run as-is without modifications to remove hipMalloc/hipMemcpy calls as was done previously. Change-Id: Ic61b09ed200b19f759d891487cde874abd607537 --- M util/dockerfiles/gcn-gpu/Dockerfile 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index e13367f..096435c 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -54,20 +54,21 @@ ARG gem5_dist=http://dist.gem5.org/dist/develop # Install ROCm 1.6 binaries -RUN wget -qO- ${gem5_dist}/apt_1.6.2.tar.bz2 \ +RUN wget -qO- repo.radeon.com/rocm/archive/apt_1.6.4.tar.bz2 \ | tar -xjv \ -&& cd apt_1.6.2/pool/main/ \ +&& cd apt_1.6.4/pool/main/ \ && dpkg -i h/hsakmt-roct-dev/* \ && dpkg -i h/hsa-ext-rocr-dev/* \ && dpkg -i h/hsa-rocr-dev/* \ && dpkg -i r/rocm-utils/* \ && dpkg -i h/hcc/* \ && dpkg -i r/rocm-opencl/* \ -&& dpkg -i r/rocm-opencl-dev/* +&& dpkg -i r/rocm-opencl-dev/* \ +&& dpkg -i h/hip_base/* \ +&& dpkg -i h/hip_hcc/* # Get ROCm libraries we need to compile from source (and ROCm-profiler) -RUN git clone --single-branch https://github.com/ROCm-Developer-Tools/HIP/ && \ -git clone --single-branch https://github.com/ROCmSoftwarePlatform/hipBLAS/ && \ +RUN git clone --single-branch https://github.com/ROCmSoftwarePlatform/hipBLAS/ && \ git clone --single-branch https://github.com/ROCmSoftwarePlatform/rocBLAS/ && \ git clone --single-branch https://github.com/ROCmSoftwarePlatform/MIOpenGEMM/ && \ git clone --single-branch https://github.com/ROCmSoftwarePlatform/MIOpen/ && \ @@ -77,12 +78,10 @@ # Apply patches to various repos RUN mkdir -p /patch && cd /patch && \ wget ${gem5_dist}/rocm_patches/hipBLAS.patch && \ -wget ${gem5_dist}/rocm_patches/hip.patch_v2 && \ wget ${gem5_dist}/rocm_patches/miopen-conv.patch && \ wget ${gem5_dist}/rocm_patches/rocBLAS.patch -RUN git -C /HIP/ checkout 0e3d824e && git -C /HIP/ apply /patch/hip.patch_v2 && \ -git -C /hipBLAS/ checkout ee57787e && git -C /hipBLAS/ apply /patch/hipBLAS.patch && \ +RUN git -C /hipBLAS/ checkout ee57787e && git -C /hipBLAS/ apply /patch/hipBLAS.patch && \ git -C /rocBLAS/ checkout cbff4b4e && git -C /rocBLAS/ apply /patch/rocBLAS.patch && \ git -C /rocm-cmake/ checkout 12670acb && \ git -C /MIOpenGEMM/ checkout 9547fb9e && \ @@ -97,17 +96,13 @@ ENV HCC_AMDGPU_TARGET gfx801 # Create build dirs for machine learning ROCm installs -RUN mkdir -p /HIP/build && \ -mkdir -p /rocBLAS/build && \ +RUN mkdir -p /rocBLAS/build && \ mkdir -p /hipBLAS/build && \ mkdir -p /rocm-cmake/build && \ mkdir -p /MIOpenGEMM/build && \ mkdir -p /MIOpen/build # Do the builds, empty build dir to trim image size -WORKDIR /HIP/build -RUN cmake .. && make -j$(nproc) && make install && rm -rf * - WORKDIR /rocBLAS/build RUN CXX=/opt/rocm/bin/hcc cmake -DCMAKE_CXX_FLAGS="--amdgpu-target=gfx801" .. && \ make -j$(nproc) && make install && rm -rf * @@ -144,7 +139,7 @@ -DMIOPEN_CACHE_DIR=/.cache/miopen \ -DMIOPEN_AMDGCN_ASSEMBLER_PATH=/opt/rocm/opencl/bin \ -DHALF_INCLUDE_DIR=/MIOpen/half-1.12.0/include \ --DCMAKE_CXX_FLAGS="-isystem /usr/include/x86_64-linux-gnu" .. && \ +-DCMAKE_CXX_FLAGS="-isystem /usr/include/x86_64-linux-gnu -DDGPU" .. && \ make -j$(nproc) && make install && rm -rf * # Re-set defaults @@ -166,4 +161,5 @@ # Always use python3 and create a link to config command for gem5 to find RUN ln -sf /usr/bin/python3 /usr/bin/python RUN ln -sf /usr/bin/python3.9-config /usr/bin/python3-config + WORKDIR / -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37675 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ic61b09ed200b19f759d891487cde874abd607537 Gerrit-Change-Number: 37675 Gerrit-Pa
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Implement flat_load_sbyte instruction
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/37476 ) Change subject: arch-gcn3: Implement flat_load_sbyte instruction .. arch-gcn3: Implement flat_load_sbyte instruction Change-Id: I3aa7547a393b9ecb4b3d4d107394c54d690a0ac2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37476 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- M src/arch/gcn3/insts/instructions.cc 1 file changed, 45 insertions(+), 1 deletion(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 64f0c36..93fc142 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -39497,17 +39497,61 @@ void Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) { -panicUnimplemented(); +Wavefront *wf = gpuDynInst->wavefront(); + +if (wf->execMask().none()) { +wf->decVMemInstsIssued(); +wf->decLGKMInstsIssued(); +wf->rdGmReqsInPipe--; +wf->rdLmReqsInPipe--; +gpuDynInst->exec_mask = wf->execMask(); +wf->computeUnit->vrf[wf->simdId]-> +scheduleWriteOperandsFromLoad(wf, gpuDynInst); +return; +} + +gpuDynInst->execUnitId = wf->execUnitId; +gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask(); +gpuDynInst->latency.init(gpuDynInst->computeUnit()); +gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + +ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + +addr.read(); + +calcAddr(gpuDynInst, addr); + +if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) { +gpuDynInst->computeUnit()->globalMemoryPipe +.issueRequest(gpuDynInst); +wf->rdGmReqsInPipe--; +wf->outstandingReqsRdGm++; +} else { +fatal("Non global flat instructions not implemented yet.\n"); +} + +gpuDynInst->wavefront()->outstandingReqs++; +gpuDynInst->wavefront()->validateRequestCounters(); } void Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) { +initMemRead(gpuDynInst); } // initiateAcc void Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) { +VecOperandI32 vdst(gpuDynInst, extData.VDST); + +for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { +if (gpuDynInst->exec_mask[lane]) { +vdst[lane] = (VecElemI32)((reinterpret_cast( +gpuDynInst->d_data))[lane]); +} +} +vdst.write(); } Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt) -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37476 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I3aa7547a393b9ecb4b3d4d107394c54d690a0ac2 Gerrit-Change-Number: 37476 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alexandru Duțu Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-CC: Anthony Gutierrez Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Explicitly sign-extend simm16
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/37495 ) Change subject: arch-gcn3: Explicitly sign-extend simm16 .. arch-gcn3: Explicitly sign-extend simm16 In some instructions, simm16 needs to be sign extended. Previous code simply casted the simm16 to a 32-bit or 64-bit datatype, however this didn't actually sign-extend the value. This patch explicitly calls sext<16> on simm16 whenever it's supposed to be sign-extended. Change-Id: I32f02e51fbab220d1a73dc7e68c7410937db21c7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37495 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- M src/arch/gcn3/insts/instructions.cc 1 file changed, 17 insertions(+), 17 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 93fc142..e8951a9 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -1369,7 +1369,7 @@ void Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ScalarOperandI32 sdst(gpuDynInst, instData.SDST); sdst = simm16; @@ -1393,7 +1393,7 @@ void Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ScalarOperandI32 sdst(gpuDynInst, instData.SDST); ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1419,7 +1419,7 @@ void Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1444,7 +1444,7 @@ void Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1469,7 +1469,7 @@ void Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1494,7 +1494,7 @@ void Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1519,7 +1519,7 @@ void Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1544,7 +1544,7 @@ void Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1727,7 +1727,7 @@ src.read(); -sdst = src.rawData() + (ScalarRegI32)simm16; +sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); scc = (bits(src.rawData(), 31) == bits(simm16, 15) && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; @@ -1754,7 +1754,7 @@ sdst.read(); -sdst = sdst.rawData() * (ScalarRegI32)simm16; +sdst = sdst.rawData() * (ScalarRegI32)sext<16>(simm16); sdst.write(); } @@ -3902,7 +3902,7 @@ Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; +pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; wf->pc(pc); } @@ -3948,7 +3948,7 @@ scc.read(); if (!scc.rawData()) { -pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; +pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Implement s_setreg_imm32_b32 instruction
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/37475 ) Change subject: arch-gcn3: Implement s_setreg_imm32_b32 instruction .. arch-gcn3: Implement s_setreg_imm32_b32 instruction Change-Id: I5383243403156dc17d4997106085a62fb0483fec Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37475 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- M src/arch/gcn3/insts/instructions.cc 1 file changed, 23 insertions(+), 0 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index b501167..64f0c36 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -1847,6 +1847,7 @@ InFmt_SOPK *iFmt) : Inst_SOPK(iFmt, "s_setreg_imm32_b32") { +setFlag(ALU); } // Inst_SOPK__S_SETREG_IMM32_B32 Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32() @@ -1860,6 +1861,28 @@ void Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst) { +ScalarRegI16 simm16 = instData.SIMM16; +ScalarRegU32 hwregId = simm16 & 0x3f; +ScalarRegU32 offset = (simm16 >> 6) & 31; +ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; + +ScalarOperandU32 hwreg(gpuDynInst, hwregId); +ScalarRegU32 simm32 = extData.imm_u32; +hwreg.read(); + +ScalarRegU32 mask = (((1U << size) - 1U) << offset); +hwreg = ((hwreg.rawData() & ~mask) +| ((simm32 << offset) & mask)); +hwreg.write(); + +if (hwregId==1 && size==2 +&& (offset==4 || offset==0)) { +warn_once("Be cautious that s_setreg_imm32_b32 has no real effect " +"on FP modes: %s\n", gpuDynInst->disassemble()); +return; +} + +// panic if not changing MODE of floating-point numbers panicUnimplemented(); } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37475 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I5383243403156dc17d4997106085a62fb0483fec Gerrit-Change-Number: 37475 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alexandru Duțu Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-CC: Anthony Gutierrez Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: mem-ruby: Fix deadlock in VIPERCoalescer
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/37477 ) Change subject: mem-ruby: Fix deadlock in VIPERCoalescer .. mem-ruby: Fix deadlock in VIPERCoalescer Certain instructions (some atomics and buffer_wbinvl1_vol) deadlock in the coalescer, where sendTimingReq fails, fails a retry, and then never retries again. This fix sets m_cache_inv_pkt to null before calling completeHitCallback(), as that allows the failed packets to be retried again. Change-Id: I4a51c741360f385f8b4c3f2a31a9410f18e095d9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37477 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- M src/mem/ruby/system/VIPERCoalescer.cc 1 file changed, 1 insertion(+), 1 deletion(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc index 111f9f2..1aea4a3 100644 --- a/src/mem/ruby/system/VIPERCoalescer.cc +++ b/src/mem/ruby/system/VIPERCoalescer.cc @@ -269,8 +269,8 @@ if (m_num_pending_invs == 0) { std::vector pkt_list { m_cache_inv_pkt }; -completeHitCallback(pkt_list); m_cache_inv_pkt = nullptr; +completeHitCallback(pkt_list); } } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37477 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I4a51c741360f385f8b4c3f2a31a9410f18e095d9 Gerrit-Change-Number: 37477 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Alexandru Duțu Gerrit-Reviewer: Jason Lowe-Power Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-CC: Bradford Beckmann Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Add exp_cnt tracking for buffer store instructions
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/37555 ) Change subject: gpu-compute: Add exp_cnt tracking for buffer store instructions .. gpu-compute: Add exp_cnt tracking for buffer store instructions exp_cnt (expInstsIssued in the code) is used in the waitcnt instruction to track that data has been read out of VGPRs in previous global memory instructions, making it safe to overwrite the VGPRs used in said global memory instructions. Previously, exp_cnt wasn't being tracked at all, which lead to the waitcnt finishing immediately, leading to the memory instruction's VPGRs getting overwritten by subsequent instructions, causing errors. This patch makes it so waitcnts waiting on exp_cnt will wait for MUBUF buffer store instructions to read their VGPRs before completing Change-Id: Idd2b59511bc086cf316217da27b7a228272b0b0f --- M src/gpu-compute/global_memory_pipeline.cc M src/gpu-compute/schedule_stage.cc 2 files changed, 7 insertions(+), 0 deletions(-) diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index 01f986c..bcd93f8 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -193,6 +193,10 @@ mp->disassemble(), mp->seqNum()); mp->initiateAcc(mp); +if (mp->isStore() && mp->isGlobalSeg()) { +mp->wavefront()->decExpInstsIssued(); +} + if (((mp->isMemSync() && !mp->isEndOfKernel()) | | !mp->isMemSync())) { /** * if we are not in out-of-order data delivery mode diff --git a/src/gpu-compute/schedule_stage.cc b/src/gpu-compute/schedule_stage.cc index 851cca8..54e9313 100644 --- a/src/gpu-compute/schedule_stage.cc +++ b/src/gpu-compute/schedule_stage.cc @@ -144,6 +144,9 @@ wf->incLGKMInstsIssued(); } } +if (gpu_dyn_inst->isStore() && gpu_dyn_inst->isGlobalSeg()) { +wf->incExpInstsIssued(); +} } } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37555 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Idd2b59511bc086cf316217da27b7a228272b0b0f Gerrit-Change-Number: 37555 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Explicitly sign-extend simm16
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/37495 ) Change subject: arch-gcn3: Explicitly sign-extend simm16 .. arch-gcn3: Explicitly sign-extend simm16 In some instructions, simm16 needs to be sign extended. Previous code simply casted the simm16 to a 32-bit or 64-bit datatype, however this didn't actually sign-extend the value. This patch explicitly calls sext<16> on simm16 whenever it's supposed to be sign-extended. Change-Id: I32f02e51fbab220d1a73dc7e68c7410937db21c7 --- M src/arch/gcn3/insts/instructions.cc 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 93fc142..e8951a9 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -1369,7 +1369,7 @@ void Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ScalarOperandI32 sdst(gpuDynInst, instData.SDST); sdst = simm16; @@ -1393,7 +1393,7 @@ void Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ScalarOperandI32 sdst(gpuDynInst, instData.SDST); ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1419,7 +1419,7 @@ void Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1444,7 +1444,7 @@ void Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1469,7 +1469,7 @@ void Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1494,7 +1494,7 @@ void Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1519,7 +1519,7 @@ void Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1544,7 +1544,7 @@ void Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) { -ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; +ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1727,7 +1727,7 @@ src.read(); -sdst = src.rawData() + (ScalarRegI32)simm16; +sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); scc = (bits(src.rawData(), 31) == bits(simm16, 15) && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; @@ -1754,7 +1754,7 @@ sdst.read(); -sdst = sdst.rawData() * (ScalarRegI32)simm16; +sdst = sdst.rawData() * (ScalarRegI32)sext<16>(simm16); sdst.write(); } @@ -3902,7 +3902,7 @@ Addr pc = wf->pc(); ScalarRegI16 simm16 = instData.SIMM16; -pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; +pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; wf->pc(pc); } @@ -3948,7 +3948,7 @@ scc.read(); if (!scc.rawData()) { -pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; +pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; } wf->pc(pc); @@ -3977,7 +3977,7 @@ scc.read(); if (scc.rawData()) { -pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; +pc = pc + ((ScalarRegI64)sext<16>(simm16 * 4LL)) + 4LL; } wf->pc(pc); @@ -4007,7 +4007,7 @@ vcc.read(); if (!vcc.rawData
[gem5-dev] Change in gem5/gem5[develop]: mem-ruby: Fix deadlock in VIPERCoalescer
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/37477 ) Change subject: mem-ruby: Fix deadlock in VIPERCoalescer .. mem-ruby: Fix deadlock in VIPERCoalescer Certain instructions (some atomics and buffer_wbinvl1_vol) deadlock in the coalescer, where sendTimingReq fails, fails a retry, and then never retries again. This fix sets m_cache_inv_pkt to null before calling completeHitCallback(), as that allows the failed packets to be retried again. Change-Id: I4a51c741360f385f8b4c3f2a31a9410f18e095d9 --- M src/mem/ruby/system/VIPERCoalescer.cc 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc index 111f9f2..1aea4a3 100644 --- a/src/mem/ruby/system/VIPERCoalescer.cc +++ b/src/mem/ruby/system/VIPERCoalescer.cc @@ -269,8 +269,8 @@ if (m_num_pending_invs == 0) { std::vector pkt_list { m_cache_inv_pkt }; -completeHitCallback(pkt_list); m_cache_inv_pkt = nullptr; +completeHitCallback(pkt_list); } } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37477 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I4a51c741360f385f8b4c3f2a31a9410f18e095d9 Gerrit-Change-Number: 37477 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Implement flat_load_sbyte instruction
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/37476 ) Change subject: arch-gcn3: Implement flat_load_sbyte instruction .. arch-gcn3: Implement flat_load_sbyte instruction Change-Id: I3aa7547a393b9ecb4b3d4d107394c54d690a0ac2 --- M src/arch/gcn3/insts/instructions.cc 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 64f0c36..93fc142 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -39497,17 +39497,61 @@ void Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) { -panicUnimplemented(); +Wavefront *wf = gpuDynInst->wavefront(); + +if (wf->execMask().none()) { +wf->decVMemInstsIssued(); +wf->decLGKMInstsIssued(); +wf->rdGmReqsInPipe--; +wf->rdLmReqsInPipe--; +gpuDynInst->exec_mask = wf->execMask(); +wf->computeUnit->vrf[wf->simdId]-> +scheduleWriteOperandsFromLoad(wf, gpuDynInst); +return; +} + +gpuDynInst->execUnitId = wf->execUnitId; +gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask(); +gpuDynInst->latency.init(gpuDynInst->computeUnit()); +gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + +ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + +addr.read(); + +calcAddr(gpuDynInst, addr); + +if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) { +gpuDynInst->computeUnit()->globalMemoryPipe +.issueRequest(gpuDynInst); +wf->rdGmReqsInPipe--; +wf->outstandingReqsRdGm++; +} else { +fatal("Non global flat instructions not implemented yet.\n"); +} + +gpuDynInst->wavefront()->outstandingReqs++; +gpuDynInst->wavefront()->validateRequestCounters(); } void Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) { +initMemRead(gpuDynInst); } // initiateAcc void Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) { +VecOperandI32 vdst(gpuDynInst, extData.VDST); + +for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { +if (gpuDynInst->exec_mask[lane]) { +vdst[lane] = (VecElemI32)((reinterpret_cast( +gpuDynInst->d_data))[lane]); +} +} +vdst.write(); } Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt) -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37476 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I3aa7547a393b9ecb4b3d4d107394c54d690a0ac2 Gerrit-Change-Number: 37476 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Implement s_setreg_imm32_b32 instruction
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/37475 ) Change subject: arch-gcn3: Implement s_setreg_imm32_b32 instruction .. arch-gcn3: Implement s_setreg_imm32_b32 instruction Change-Id: I5383243403156dc17d4997106085a62fb0483fec --- M src/arch/gcn3/insts/instructions.cc 1 file changed, 23 insertions(+), 0 deletions(-) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index b501167..64f0c36 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -1847,6 +1847,7 @@ InFmt_SOPK *iFmt) : Inst_SOPK(iFmt, "s_setreg_imm32_b32") { +setFlag(ALU); } // Inst_SOPK__S_SETREG_IMM32_B32 Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32() @@ -1860,6 +1861,28 @@ void Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst) { +ScalarRegI16 simm16 = instData.SIMM16; +ScalarRegU32 hwregId = simm16 & 0x3f; +ScalarRegU32 offset = (simm16 >> 6) & 31; +ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; + +ScalarOperandU32 hwreg(gpuDynInst, hwregId); +ScalarRegU32 simm32 = extData.imm_u32; +hwreg.read(); + +ScalarRegU32 mask = (((1U << size) - 1U) << offset); +hwreg = ((hwreg.rawData() & ~mask) +| ((simm32 << offset) & mask)); +hwreg.write(); + +if (hwregId==1 && size==2 +&& (offset==4 || offset==0)) { +warn_once("Be cautious that s_setreg_imm32_b32 has no real effect " +"on FP modes: %s\n", gpuDynInst->disassemble()); +return; +} + +// panic if not changing MODE of floating-point numbers panicUnimplemented(); } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37475 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I5383243403156dc17d4997106085a62fb0483fec Gerrit-Change-Number: 37475 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: include system syscall header in syscall table files
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/37195 ) Change subject: arch-x86: include system syscall header in syscall table files .. arch-x86: include system syscall header in syscall table files The getdents syscall is only implemented on hosts that define SYS_getdents, which is located in . That header was missed when splitting the syscall tables into their own files; this patch adds the header to the syscall table files. Change-Id: I28d54f6ea2874aa533c89ed7520561e19fe5e5f9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37195 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Maintainer: Gabe Black Tested-by: kokoro --- M src/arch/x86/linux/syscall_tbl32.cc M src/arch/x86/linux/syscall_tbl64.cc 2 files changed, 4 insertions(+), 0 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved Gabe Black: Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/x86/linux/syscall_tbl32.cc b/src/arch/x86/linux/syscall_tbl32.cc index 855de88..50d0969 100644 --- a/src/arch/x86/linux/syscall_tbl32.cc +++ b/src/arch/x86/linux/syscall_tbl32.cc @@ -25,6 +25,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include + #include "arch/x86/linux/linux.hh" #include "arch/x86/linux/se_workload.hh" #include "arch/x86/linux/syscalls.hh" diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index 3516ea2..8630265 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -25,6 +25,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include + #include "arch/x86/linux/linux.hh" #include "arch/x86/linux/se_workload.hh" #include "arch/x86/linux/syscalls.hh" -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37195 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I28d54f6ea2874aa533c89ed7520561e19fe5e5f9 Gerrit-Change-Number: 37195 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Gabe Black Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-CC: Daniel Gerzhoy Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-x86: include system syscall header in syscall table files
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/37195 ) Change subject: arch-x86: include system syscall header in syscall table files .. arch-x86: include system syscall header in syscall table files The getdents syscall is only implemented on hosts that define SYS_getdents, which is located in . That header was missed when splitting the syscall tables into their own files; this patch adds the header to the syscall table files. Change-Id: I28d54f6ea2874aa533c89ed7520561e19fe5e5f9 --- M src/arch/x86/linux/syscall_tbl32.cc M src/arch/x86/linux/syscall_tbl64.cc 2 files changed, 4 insertions(+), 0 deletions(-) diff --git a/src/arch/x86/linux/syscall_tbl32.cc b/src/arch/x86/linux/syscall_tbl32.cc index 855de88..50d0969 100644 --- a/src/arch/x86/linux/syscall_tbl32.cc +++ b/src/arch/x86/linux/syscall_tbl32.cc @@ -25,6 +25,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include + #include "arch/x86/linux/linux.hh" #include "arch/x86/linux/se_workload.hh" #include "arch/x86/linux/syscalls.hh" diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index 3516ea2..8630265 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -25,6 +25,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include + #include "arch/x86/linux/linux.hh" #include "arch/x86/linux/se_workload.hh" #include "arch/x86/linux/syscalls.hh" -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37195 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I28d54f6ea2874aa533c89ed7520561e19fe5e5f9 Gerrit-Change-Number: 37195 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: util: Specify version of rocm-cmake in gcn3 Dockerfile
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/37155 ) Change subject: util: Specify version of rocm-cmake in gcn3 Dockerfile .. util: Specify version of rocm-cmake in gcn3 Dockerfile This patch updates the gcn3 Dockerfile to use the version of rocm-cmake that MIOpen specifies in its dev-requirements.txt. This fixes a build conflict with newer versions of rocm-cmake that require a higher version of SCons than we have in the Dockerfile. Change-Id: I70887fd91807b77e5015037830cfe96560ac8a31 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37155 Maintainer: Gabe Black Maintainer: Matt Sinclair Reviewed-by: Bobby R. Bruce Reviewed-by: Matt Sinclair Tested-by: kokoro --- M util/dockerfiles/gcn-gpu/Dockerfile 1 file changed, 1 insertion(+), 0 deletions(-) Approvals: Matt Sinclair: Looks good to me, approved; Looks good to me, approved Bobby R. Bruce: Looks good to me, approved Gabe Black: Looks good to me, approved kokoro: Regressions pass diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index 7a9ec8f..19f3ad7 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -73,6 +73,7 @@ RUN git -C /HIP/ checkout 0e3d824e && git -C /HIP/ apply /patch/hip.patch_v2 && \ git -C /hipBLAS/ checkout ee57787e && git -C /hipBLAS/ apply /patch/hipBLAS.patch && \ git -C /rocBLAS/ checkout cbff4b4e && git -C /rocBLAS/ apply /patch/rocBLAS.patch && \ +git -C /rocm-cmake/ checkout 12670acb && \ git -C /MIOpenGEMM/ checkout 9547fb9e && \ git -C /MIOpen/ checkout 01d6ca55c && git -C /MIOpen/ apply /patch/miopen-conv.patch -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37155 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I70887fd91807b77e5015037830cfe96560ac8a31 Gerrit-Change-Number: 37155 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Bobby R. Bruce Gerrit-Reviewer: Gabe Black Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: util: Specify version of rocm-cmake in gcn3 Dockerfile
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/37155 ) Change subject: util: Specify version of rocm-cmake in gcn3 Dockerfile .. util: Specify version of rocm-cmake in gcn3 Dockerfile This patch updates the gcn3 Dockerfile to use the version of rocm-cmake that MIOpen specifies in its dev-requirements.txt. This fixes a build conflict with newer versions of rocm-cmake that require a higher version of SCons than we have in the Dockerfile. Change-Id: I70887fd91807b77e5015037830cfe96560ac8a31 --- M util/dockerfiles/gcn-gpu/Dockerfile 1 file changed, 1 insertion(+), 0 deletions(-) diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index 7a9ec8f..19f3ad7 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -73,6 +73,7 @@ RUN git -C /HIP/ checkout 0e3d824e && git -C /HIP/ apply /patch/hip.patch_v2 && \ git -C /hipBLAS/ checkout ee57787e && git -C /hipBLAS/ apply /patch/hipBLAS.patch && \ git -C /rocBLAS/ checkout cbff4b4e && git -C /rocBLAS/ apply /patch/rocBLAS.patch && \ +git -C /rocm-cmake/ checkout 12670acb && \ git -C /MIOpenGEMM/ checkout 9547fb9e && \ git -C /MIOpen/ checkout 01d6ca55c && git -C /MIOpen/ apply /patch/miopen-conv.patch -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/37155 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I70887fd91807b77e5015037830cfe96560ac8a31 Gerrit-Change-Number: 37155 Gerrit-PatchSet: 1 Gerrit-Owner: Kyle Roarty Gerrit-MessageType: newchange ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Fix operand size reporting for Flat insts
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/36275 ) Change subject: arch-gcn3: Fix operand size reporting for Flat insts .. arch-gcn3: Fix operand size reporting for Flat insts Some Flat instructions were reporting their operand sizes in bits instead of bytes. This lead to panics occuring in StaticRegisterManagerPolicy::mapVgpr. This patch updates those insts to report their operand sizes in bytes. Change-Id: I48f485e638864a1f2a1a3be66ed20893e73e9705 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36275 Reviewed-by: Matt Sinclair Reviewed-by: Alexandru Duțu Maintainer: Matt Sinclair Tested-by: kokoro --- M src/arch/gcn3/insts/instructions.hh 1 file changed, 22 insertions(+), 22 deletions(-) Approvals: Alexandru Duțu: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/gcn3/insts/instructions.hh b/src/arch/gcn3/insts/instructions.hh index 471c130..207da29 100644 --- a/src/arch/gcn3/insts/instructions.hh +++ b/src/arch/gcn3/insts/instructions.hh @@ -79137,7 +79137,7 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_dst -return 32; +return 1; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -79261,7 +79261,7 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_dst -return 32; +return 2; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80147,9 +80147,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80215,9 +80215,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80281,9 +80281,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80347,9 +80347,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80413,9 +80413,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80479,9 +80479,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80545,9 +80545,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80611,9 +80611,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80677,9 +80677,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_d
[gem5-dev] Change in gem5/gem5[develop]: arch-gcn3: Fix operand size reporting for Flat insts
Kyle Roarty has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/36275 ) Change subject: arch-gcn3: Fix operand size reporting for Flat insts .. arch-gcn3: Fix operand size reporting for Flat insts Some Flat instructions were reporting their operand sizes in bits instead of bytes. This lead to panics occuring in StaticRegisterManagerPolicy::mapVgpr. This patch updates those insts to report their operand sizes in bytes. Change-Id: I48f485e638864a1f2a1a3be66ed20893e73e9705 --- M src/arch/gcn3/insts/instructions.hh 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/arch/gcn3/insts/instructions.hh b/src/arch/gcn3/insts/instructions.hh index 471c130..207da29 100644 --- a/src/arch/gcn3/insts/instructions.hh +++ b/src/arch/gcn3/insts/instructions.hh @@ -79137,7 +79137,7 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_dst -return 32; +return 1; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -79261,7 +79261,7 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_dst -return 32; +return 2; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80147,9 +80147,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80215,9 +80215,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80281,9 +80281,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80347,9 +80347,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80413,9 +80413,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80479,9 +80479,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80545,9 +80545,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80611,9 +80611,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80677,9 +80677,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +return 4; case 2: //vgpr_dst -return 32; +return 4; default: fatal("op idx %i out of bounds\n", opIdx); return -1; @@ -80745,9 +80745,9 @@ case 0: //vgpr_addr return 8; case 1: //vgpr_src -return 32; +
[gem5-dev] Change in gem5/gem5[develop]: configs: python3 compatibility for apu_se
Kyle Roarty has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/36159 ) Change subject: configs: python3 compatibility for apu_se .. configs: python3 compatibility for apu_se This patch changes xrange to range, as the former doesn't exist in python3. Change-Id: Ibe2c1fb073194e3e0713bb1718f2e323f7c4e397 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36159 Reviewed-by: Matt Sinclair Reviewed-by: Jason Lowe-Power Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- M configs/example/apu_se.py 1 file changed, 3 insertions(+), 3 deletions(-) Approvals: Jason Lowe-Power: Looks good to me, but someone else must approve Matthew Poremba: Looks good to me, approved Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index ba0d9ea..6faca5f 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -299,8 +299,8 @@ vrf_pool_mgrs = [] srfs = [] srf_pool_mgrs = [] -for j in xrange(options.simds_per_cu): -for k in xrange(shader.n_wf): +for j in range(options.simds_per_cu): +for k in range(shader.n_wf): wavefronts.append(Wavefront(simdId = j, wf_slot_id = k, wf_size = options.wf_size)) vrf_pool_mgrs.append(SimplePoolManager(pool_size = \ @@ -593,7 +593,7 @@ system.ruby._cpu_ports[gpu_port_idx].slave gpu_port_idx = gpu_port_idx + 1 -for i in xrange(n_cu): +for i in range(n_cu): if i > 0 and not i % options.cu_per_scalar_cache: print("incrementing idx on ", i) gpu_port_idx += 1 -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/36159 To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ibe2c1fb073194e3e0713bb1718f2e323f7c4e397 Gerrit-Change-Number: 36159 Gerrit-PatchSet: 2 Gerrit-Owner: Kyle Roarty Gerrit-Reviewer: Jason Lowe-Power Gerrit-Reviewer: Kyle Roarty Gerrit-Reviewer: Matt Sinclair Gerrit-Reviewer: Matthew Poremba Gerrit-Reviewer: kokoro Gerrit-MessageType: merged ___ gem5-dev mailing list -- gem5-dev@gem5.org To unsubscribe send an email to gem5-dev-le...@gem5.org %(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s