https://github.com/petar-avramovic created https://github.com/llvm/llvm-project/pull/157845
Use same rules for G_ZEXTLOAD and G_SEXTLOAD as for G_LOAD. Flat addrspace(0) and private addrspace(5) G_ZEXTLOAD and G_SEXTLOAD should be always divergent. >From 8573b1705133c5284a283d170643b8e30bfc4a20 Mon Sep 17 00:00:00 2001 From: Petar Avramovic <petar.avramo...@amd.com> Date: Wed, 10 Sep 2025 13:04:20 +0200 Subject: [PATCH] AMDGPU/UniformityAnalysis: fix G_ZEXTLOAD and G_SEXTLOAD Use same rules for G_ZEXTLOAD and G_SEXTLOAD as for G_LOAD. Flat addrspace(0) and private addrspace(5) G_ZEXTLOAD and G_SEXTLOAD should be always divergent. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 15 +++++++------- .../AMDGPU/MIR/loads-gmir.mir | 20 +++++++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 5c958dfe6954f..398c99b3bd127 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -10281,7 +10281,7 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, InstructionUniformity SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); - unsigned opcode = MI.getOpcode(); + unsigned Opcode = MI.getOpcode(); auto HandleAddrSpaceCast = [this, &MRI](const MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); @@ -10301,7 +10301,7 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { // If the target supports globally addressable scratch, the mapping from // scratch memory to the flat aperture changes therefore an address space cast // is no longer uniform. - if (opcode == TargetOpcode::G_ADDRSPACE_CAST) + if (Opcode == TargetOpcode::G_ADDRSPACE_CAST) return HandleAddrSpaceCast(MI); if (auto *GI = dyn_cast<GIntrinsic>(&MI)) { @@ -10329,7 +10329,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { // // All other loads are not divergent, because if threads issue loads with the // same arguments, they will always get the same result. - if (opcode == AMDGPU::G_LOAD) { + if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD || + Opcode == AMDGPU::G_SEXTLOAD) { if (MI.memoperands_empty()) return InstructionUniformity::NeverUniform; // conservative assumption @@ -10343,10 +10344,10 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { return InstructionUniformity::Default; } - if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) || - opcode == AMDGPU::G_ATOMIC_CMPXCHG || - opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS || - AMDGPU::isGenericAtomic(opcode)) { + if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) || + Opcode == AMDGPU::G_ATOMIC_CMPXCHG || + Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS || + AMDGPU::isGenericAtomic(Opcode)) { return InstructionUniformity::NeverUniform; } return InstructionUniformity::Default; diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir index cb3c2de5b8753..d799cd2057f47 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir @@ -46,13 +46,13 @@ body: | %6:_(p5) = G_IMPLICIT_DEF ; Atomic load - ; CHECK-NOT: DIVERGENT - + ; CHECK: DIVERGENT + ; CHECK-SAME: G_ZEXTLOAD %0:_(s32) = G_ZEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`) ; flat load - ; CHECK-NOT: DIVERGENT - + ; CHECK: DIVERGENT + ; CHECK-SAME: G_ZEXTLOAD %2:_(s32) = G_ZEXTLOAD %1(p0) :: (load (s16) from `ptr undef`) ; Gloabal load @@ -60,7 +60,8 @@ body: | %3:_(s32) = G_ZEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; Private load - ; CHECK-NOT: DIVERGENT + ; CHECK: DIVERGENT + ; CHECK-SAME: G_ZEXTLOAD %5:_(s32) = G_ZEXTLOAD %6(p5) :: (volatile load (s16) from `ptr addrspace(5) undef`, addrspace 5) G_STORE %2(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) G_STORE %3(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -80,11 +81,13 @@ body: | %6:_(p5) = G_IMPLICIT_DEF ; Atomic load - ; CHECK-NOT: DIVERGENT + ; CHECK: DIVERGENT + ; CHECK-SAME: G_SEXTLOAD %0:_(s32) = G_SEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`) ; flat load - ; CHECK-NOT: DIVERGENT + ; CHECK: DIVERGENT + ; CHECK-SAME: G_SEXTLOAD %2:_(s32) = G_SEXTLOAD %1(p0) :: (load (s16) from `ptr undef`) ; Gloabal load @@ -92,7 +95,8 @@ body: | %3:_(s32) = G_SEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; Private load - ; CHECK-NOT: DIVERGENT + ; CHECK: DIVERGENT + ; CHECK-SAME: G_SEXTLOAD %5:_(s32) = G_SEXTLOAD %6(p5) :: (volatile load (s16) from `ptr addrspace(5) undef`, addrspace 5) G_STORE %2(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) G_STORE %3(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits