For non blocking mode during SmmMpBroadcastProcedure, multiple APs might contended access the RunningApCount in the PROCEDURE_TOKEN:
Step1: RunningApCount is initialized to the mMaxNumberOfCpus (See GetFreeToken). Step2: Decrease RunningApCount if the AP is not present (See InterlockedDecrement in InternalSmmStartupAllAPs). Step3: multiple APs are contended to decrease RunningApCount in the same token (See ReleaseToken in APHandler). So, Contended lock case happen during Step3. For multiple APs access the shared memory (RunningApCount), we shall use exclusive cache line with WB attribute for SMM Performance Tuning. This patch makes RunningApCount on exclusive cacheline. Cc: Ray Ni <ray...@intel.com> Cc: Laszlo Ersek <ler...@redhat.com> Cc: Eric Dong <eric.d...@intel.com> Cc: Zeng Star <star.z...@intel.com> Cc: Gerd Hoffmann <kra...@redhat.com> Cc: Rahul Kumar <rahul1.ku...@intel.com> Signed-off-by: Jiaxin Wu <jiaxin...@intel.com> --- UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 35 +++++++++++++++++++----------- UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 2 +- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c index 9790b4f888..05fa6854fe 100644 --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c @@ -421,11 +421,11 @@ ReleaseToken ( { PROCEDURE_TOKEN *Token; Token = mSmmMpSyncData->CpuData[CpuIndex].Token; - if (InterlockedDecrement (&Token->RunningApCount) == 0) { + if (InterlockedDecrement (Token->RunningApCount) == 0) { ReleaseSpinLock (Token->SpinLock); } mSmmMpSyncData->CpuData[CpuIndex].Token = NULL; } @@ -970,12 +970,12 @@ AllocateTokenBuffer ( ) { UINTN SpinLockSize; UINT32 TokenCountPerChunk; UINTN Index; - SPIN_LOCK *SpinLock; - UINT8 *SpinLockBuffer; + UINTN BufferAddr; + VOID *Buffer; PROCEDURE_TOKEN *ProcTokens; SpinLockSize = GetSpinLockProperties (); TokenCountPerChunk = FixedPcdGet32 (PcdCpuSmmMpTokenCountPerChunk); @@ -986,25 +986,34 @@ AllocateTokenBuffer ( } DEBUG ((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x, PcdCpuSmmMpTokenCountPerChunk = 0x%x\n", SpinLockSize, TokenCountPerChunk)); // - // Separate the Spin_lock and Proc_token because the alignment requires by Spin_Lock. + // Allocate the buffer for SpinLock and RunningApCount to meet the alignment requirement. // - SpinLockBuffer = AllocatePool (SpinLockSize * TokenCountPerChunk); - ASSERT (SpinLockBuffer != NULL); + Buffer = AllocatePages (EFI_SIZE_TO_PAGES (SpinLockSize * TokenCountPerChunk * 2)); + if (Buffer == NULL) { + DEBUG ((DEBUG_ERROR, "AllocateTokenBuffer: Failed to allocate the buffer for SpinLock and RunningApCount!\n")); + CpuDeadLoop (); + } ProcTokens = AllocatePool (sizeof (PROCEDURE_TOKEN) * TokenCountPerChunk); ASSERT (ProcTokens != NULL); + BufferAddr = (UINTN)Buffer; for (Index = 0; Index < TokenCountPerChunk; Index++) { - SpinLock = (SPIN_LOCK *)(SpinLockBuffer + SpinLockSize * Index); - InitializeSpinLock (SpinLock); + ProcTokens[Index].Signature = PROCEDURE_TOKEN_SIGNATURE; + + ProcTokens[Index].SpinLock = (SPIN_LOCK *)BufferAddr; + InitializeSpinLock (ProcTokens[Index].SpinLock); + + BufferAddr += SpinLockSize; + + ProcTokens[Index].RunningApCount = (volatile UINT32 *)BufferAddr; + *ProcTokens[Index].RunningApCount = 0; - ProcTokens[Index].Signature = PROCEDURE_TOKEN_SIGNATURE; - ProcTokens[Index].SpinLock = SpinLock; - ProcTokens[Index].RunningApCount = 0; + BufferAddr += SpinLockSize; InsertTailList (&gSmmCpuPrivate->TokenList, &ProcTokens[Index].Link); } return &ProcTokens[0].Link; @@ -1036,11 +1045,11 @@ GetFreeToken ( } NewToken = PROCEDURE_TOKEN_FROM_LINK (gSmmCpuPrivate->FirstFreeToken); gSmmCpuPrivate->FirstFreeToken = GetNextNode (&gSmmCpuPrivate->TokenList, gSmmCpuPrivate->FirstFreeToken); - NewToken->RunningApCount = RunningApsCount; + *NewToken->RunningApCount = RunningApsCount; AcquireSpinLock (NewToken->SpinLock); return NewToken; } @@ -1298,11 +1307,11 @@ InternalSmmStartupAllAPs ( // // Decrease the count to mark this processor(AP or BSP) as finished. // if (ProcToken != NULL) { - InterlockedDecrement (&ProcToken->RunningApCount); + InterlockedDecrement (ProcToken->RunningApCount); } } } ReleaseAllAPs (); diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h index 7f244ea803..07473208fd 100644 --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h @@ -213,11 +213,11 @@ typedef struct { typedef struct { UINTN Signature; LIST_ENTRY Link; SPIN_LOCK *SpinLock; - volatile UINT32 RunningApCount; + volatile UINT32 *RunningApCount; } PROCEDURE_TOKEN; #define PROCEDURE_TOKEN_FROM_LINK(a) CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE) #define TOKEN_BUFFER_SIGNATURE SIGNATURE_32 ('T', 'K', 'B', 'S') -- 2.16.2.windows.1 -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#116435): https://edk2.groups.io/g/devel/message/116435 Mute This Topic: https://groups.io/mt/104764105/21656 Group Owner: devel+ow...@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-