superficial comments only; the patch is nearly ready:

On 02/22/21 08:19, Ankur Arora wrote:
> Add EjectCpu(), which handles the CPU ejection, and provides a holding
> area for said CPUs. It is called via SmmCpuFeaturesRendezvousExit(),
> at the tail end of the SMI handling.
>
> Also UnplugCpus() now stashes QEMU Selectors of CPUs which need to be
> ejected in CPU_HOT_EJECT_DATA.QemuSelectorMap. This is used by
> EjectCpu() to identify CPUs marked for ejection.
>
> Cc: Laszlo Ersek <ler...@redhat.com>
> Cc: Jordan Justen <jordan.l.jus...@intel.com>
> Cc: Ard Biesheuvel <ard.biesheu...@arm.com>
> Cc: Igor Mammedov <imamm...@redhat.com>
> Cc: Boris Ostrovsky <boris.ostrov...@oracle.com>
> Cc: Aaron Young <aaron.yo...@oracle.com>
> Ref: https://bugzilla.tianocore.org/show_bug.cgi?id=3132
> Signed-off-by: Ankur Arora <ankur.a.ar...@oracle.com>
> ---
>
> Notes:
>     Address these review comments from v6:
>      (1) s/CpuEject/EjectCpu/g
>      (2) Ensure that the added include is in sorted order.
>      (3) Switch to a cheaper CpuSleep() based loop instead of
>       CpuDeadLoop().  Also add the CpuLib LibraryClass.
>      (4) Remove the nested else clause
>      (5) Use Laszlo's much clearer comment when we try to map multiple
>       QemuSelector to the same ProcessorNum.
>      (6a) Fix indentation of the debug print in the block in (5).
>      (6b,6c,6d) Fix printf types for ProcessorNum, use FMT_APIC_ID for
>       APIC_ID and 0x%Lx for QemuSelector[].
>      () As discussed elsewhere add an DEBUG_INFO print logging the
>       correspondence between ProcessorNum, APIC_ID, QemuSelector.
>      (7a,7b) Use EFI_ALREADY_STARTED instead of EFI_INVALID_PARAMETER and
>       document it in the UnplugCpus() comment block.
>      ()  As discussed elsewhere, add the import statement for
>       PcdCpuHotEjectDataAddress.
>      (9) Use Laszlo's comment in the PcdGet64(PcdCpuHotEjectDataAddress)
>       description block.
>      (10) Change mCpuHotEjectData init state checks from ASSERT to ones
>       consistent with similar checks for mCpuHotPlugData.
>      (11-14) Get rid of mCpuHotEjectData init loop: moved to a prior
>       patch so it can be done at allocation time.
>      (15) s/SmmCpuFeaturesSmiRendezvousExit/SmmCpuFeaturesRendezvousExit/
>      (16,17) Document the ordering requirements of
>       mCpuHotEjectData->Handler, and mCpuHotEjectData->QemuSelectorMap.
>
>     Not addressed:
>      (8) Not removing the EjectCount variable as I'd like to minimize
>       stores/loads to CPU_HOT_EJECT_DATA->Handler and so would like to do this
>       a single time at the end of the iteration.  (It is safe to write 
> multiple
>       times to the handler in UnplugCpus() but given the ordering concerns
>       around it, it seems cleaner to not access it unnecessarily.)
>
>  OvmfPkg/CpuHotplugSmm/CpuHotplugSmm.inf |   2 +
>  OvmfPkg/CpuHotplugSmm/CpuHotplug.c      | 157 
> ++++++++++++++++++++++++++++++--
>  2 files changed, 151 insertions(+), 8 deletions(-)
>
> diff --git a/OvmfPkg/CpuHotplugSmm/CpuHotplugSmm.inf 
> b/OvmfPkg/CpuHotplugSmm/CpuHotplugSmm.inf
> index 04322b0d7855..ebcc7e2ac63a 100644
> --- a/OvmfPkg/CpuHotplugSmm/CpuHotplugSmm.inf
> +++ b/OvmfPkg/CpuHotplugSmm/CpuHotplugSmm.inf
> @@ -40,6 +40,7 @@ [Packages]
>  [LibraryClasses]
>    BaseLib
>    BaseMemoryLib
> +  CpuLib
>    DebugLib
>    LocalApicLib
>    MmServicesTableLib
> @@ -54,6 +55,7 @@ [Protocols]
>
>  [Pcd]
>    gUefiCpuPkgTokenSpaceGuid.PcdCpuHotPlugDataAddress                ## 
> CONSUMES
> +  gUefiOvmfPkgTokenSpaceGuid.PcdCpuHotEjectDataAddress              ## 
> CONSUMES
>    gUefiOvmfPkgTokenSpaceGuid.PcdQ35SmramAtDefaultSmbase             ## 
> CONSUMES
>
>  [FeaturePcd]
> diff --git a/OvmfPkg/CpuHotplugSmm/CpuHotplug.c 
> b/OvmfPkg/CpuHotplugSmm/CpuHotplug.c
> index f07b5072749a..851e2b28aad9 100644
> --- a/OvmfPkg/CpuHotplugSmm/CpuHotplug.c
> +++ b/OvmfPkg/CpuHotplugSmm/CpuHotplug.c
> @@ -10,10 +10,12 @@
>  #include <IndustryStandard/Q35MchIch9.h>     // ICH9_APM_CNT
>  #include <IndustryStandard/QemuCpuHotplug.h> // QEMU_CPUHP_CMD_GET_PENDING
>  #include <Library/BaseLib.h>                 // CpuDeadLoop()
> +#include <Library/CpuLib.h>                  // CpuSleep()
>  #include <Library/DebugLib.h>                // ASSERT()
>  #include <Library/MmServicesTableLib.h>      // gMmst
>  #include <Library/PcdLib.h>                  // PcdGetBool()
>  #include <Library/SafeIntLib.h>              // SafeUintnSub()
> +#include <Pcd/CpuHotEjectData.h>             // CPU_HOT_EJECT_DATA
>  #include <Protocol/MmCpuIo.h>                // EFI_MM_CPU_IO_PROTOCOL
>  #include <Protocol/SmmCpuService.h>          // EFI_SMM_CPU_SERVICE_PROTOCOL
>  #include <Uefi/UefiBaseType.h>               // EFI_STATUS
> @@ -32,11 +34,12 @@ STATIC EFI_MM_CPU_IO_PROTOCOL *mMmCpuIo;
>  //
>  STATIC EFI_SMM_CPU_SERVICE_PROTOCOL *mMmCpuService;
>  //
> -// This structure is a communication side-channel between the
> +// These structures serve as communication side-channels between the
>  // EFI_SMM_CPU_SERVICE_PROTOCOL consumer (i.e., this driver) and provider
>  // (i.e., PiSmmCpuDxeSmm).
>  //
>  STATIC CPU_HOT_PLUG_DATA *mCpuHotPlugData;
> +STATIC CPU_HOT_EJECT_DATA *mCpuHotEjectData;
>  //
>  // SMRAM arrays for fetching the APIC IDs of processors with pending events 
> (of
>  // known event types), for the time of just one MMI.
> @@ -188,18 +191,72 @@ RevokeNewSlot:
>  }
>
>  /**
> +  CPU Hot-eject handler, called from SmmCpuFeaturesRendezvousExit()
> +  on each CPU at exit from SMM.
> +
> +  If, the executing CPU is not being ejected, nothing to be done.
> +  If, the executing CPU is being ejected, wait in a halted loop
> +  until ejected.
> +
> +  @param[in] ProcessorNum      ProcessorNum denotes the CPU exiting SMM,
> +                               and will be used as an index into
> +                               CPU_HOT_EJECT_DATA->QemuSelectorMap. It is
> +                               identical to the processor handle number in
> +                               EFI_SMM_CPU_SERVICE_PROTOCOL.
> +**/
> +VOID
> +EFIAPI
> +EjectCpu (
> +  IN UINTN ProcessorNum
> +  )
> +{
> +  UINT64 QemuSelector;
> +
> +  QemuSelector = mCpuHotEjectData->QemuSelectorMap[ProcessorNum];
> +  if (QemuSelector == CPU_EJECT_QEMU_SELECTOR_INVALID) {
> +    return;
> +  }
> +
> +  //
> +  // CPU(s) being unplugged get here from SmmCpuFeaturesRendezvousExit()
> +  // after having been cleared to exit the SMI by the BSP and thus have
> +  // no SMM processing remaining.
> +  //
> +  // Given that we cannot allow them to escape to the guest, we pen them
> +  // here until the BSP tells QEMU to unplug them.
> +  //
> +  for (;;) {
> +    DisableInterrupts ();
> +    CpuSleep ();
> +  }
> +}
> +
> +/**
>    Process to be hot-unplugged CPUs, per QemuCpuhpCollectApicIds().
>
>    For each such CPU, report the CPU to PiSmmCpuDxeSmm via
> -  EFI_SMM_CPU_SERVICE_PROTOCOL. If the to be hot-unplugged CPU is
> -  unknown, skip it silently.
> +  EFI_SMM_CPU_SERVICE_PROTOCOL and stash the APIC ID for later ejection.
> +  If the to be hot-unplugged CPU is unknown, skip it silently.
> +
> +  Additonally, if we do stash any APIC IDs, also install a CPU eject handler
> +  which would handle the ejection.

(1) Please update the two APIC ID references above to QEMU CPU selectors
(the commit message has been updated already, correctly).


>
>    @param[in] ToUnplugApicIds    The APIC IDs of the CPUs that are about to be
>                                  hot-unplugged.
>
> +  @param[in] ToUnplugSelector   The QEMU Selectors of the CPUs that are 
> about to
> +                                be hot-unplugged.
> +

(2) Please rename the parameter to "ToUnplugSelectors" (plural), both
here in the comment and in the actual parameter list.


>    @param[in] ToUnplugCount      The number of filled-in APIC IDs in
>                                  ToUnplugApicIds.
>
> +  @retval EFI_ALREADY_STARTED   For the ProcessorNum that
> +                                EFI_SMM_CPU_SERVICE_PROTOCOL had assigned to
> +                                one of the APIC ID in ToUnplugApicIds,
> +                                mCpuHotEjectData->QemuSelectorMap already has
> +                                the QemuSelector value stashed. (This should
> +                                never happen.)
> +

(3) Unfortunately I made a typing error in my v6 review where I
suggested this language: it should say

  one of the APIC ID*s* in ToUnplugApicIds

(emphasis only here, in this comment)


>    @retval EFI_SUCCESS           Known APIC IDs have been removed from SMM 
> data
>                                  structures.
>
> @@ -210,23 +267,36 @@ STATIC
>  EFI_STATUS
>  UnplugCpus (
>    IN APIC_ID                      *ToUnplugApicIds,
> +  IN UINT32                       *ToUnplugSelector,
>    IN UINT32                       ToUnplugCount
>    )
>  {
>    EFI_STATUS Status;
>    UINT32     ToUnplugIdx;
> +  UINT32     EjectCount;
>    UINTN      ProcessorNum;
>
>    ToUnplugIdx = 0;
> +  EjectCount = 0;
>    while (ToUnplugIdx < ToUnplugCount) {
>      APIC_ID    RemoveApicId;
> +    UINT32     QemuSelector;
>
>      RemoveApicId = ToUnplugApicIds[ToUnplugIdx];
> +    QemuSelector = ToUnplugSelector[ToUnplugIdx];
>
>      //
> -    // mCpuHotPlugData->ApicId maps ProcessorNum -> ApicId. Use it to find
> -    // the ProcessorNum for the APIC ID to be removed.
> +    // mCpuHotPlugData->ApicId maps ProcessorNum -> ApicId. Use RemoveApicId
> +    // to find the corresponding ProcessorNum for the CPU to be removed.
>      //
> +    // With this we can establish a 3 way mapping:
> +    //    APIC_ID -- ProcessorNum -- QemuSelector
> +    //
> +    // We stash the ProcessorNum -> QemuSelector mapping so it can later be
> +    // used for CPU hot-eject in SmmCpuFeaturesRendezvousExit() context 
> (where
> +    // we only have ProcessorNum available.)
> +    //
> +
>      for (ProcessorNum = 0;
>           ProcessorNum < mCpuHotPlugData->ArrayLength;
>           ProcessorNum++) {
> @@ -255,11 +325,64 @@ UnplugCpus (
>        return Status;
>      }
>
> +    if (mCpuHotEjectData->QemuSelectorMap[ProcessorNum] !=
> +          CPU_EJECT_QEMU_SELECTOR_INVALID) {

(4) Invalid indentation; in this (controlling expression) context,
CPU_EJECT_QEMU_SELECTOR_INVALID should line up with "mCpuHotEjectData".


> +      //
> +      // mCpuHotEjectData->QemuSelectorMap[ProcessorNum] is set to
> +      // CPU_EJECT_QEMU_SELECTOR_INVALID when 
> mCpuHotEjectData->QemuSelectorMap
> +      // is allocated, and once the subject processsor is ejected.
> +      //
> +      // Additionally, mMmCpuService->RemoveProcessor(ProcessorNum) 
> invalidates
> +      // mCpuHotPlugData->ApicId[ProcessorNum], so a given ProcessorNum can
> +      // never match more than one APIC ID and by transitivity, more than one
> +      // QemuSelector in a single invocation of UnplugCpus().
> +      //

(5) good comment, but please make it a tiny bit easier to read: please
insert two "em dashes", as follows:

      // never match more than one APIC ID -- and by transitivity, designate
      // more than one QemuSelector -- in a single invocation of UnplugCpus().

(I've also inserted the verb "designate", because "match" doesn't seem
to apply in that context.)


> +      DEBUG ((DEBUG_ERROR, "%a: ProcessorNum %Lu maps to QemuSelector 0x%Lx, 
> "
> +        "cannot also map to 0x%Lx\n", __FUNCTION__, (UINT64)ProcessorNum,
> +        (UINT64)mCpuHotEjectData->QemuSelectorMap[ProcessorNum], 
> QemuSelector));

(6a) "mCpuHotEjectData->QemuSelectorMap[ProcessorNum]" has type UINT64,
so the cast is superfluous

(6b) we log selectors in all other places in decimal, so please use %Lu
here, not 0x%Lx, for logging
"mCpuHotEjectData->QemuSelectorMap[ProcessorNum]"

(6c) "QemuSelector" has type UINT32 (correctly), so we need to log it
with either "0x%x" or "%u". Given my above point about logging selectors
in decimal everywhere else, please use "%u".

      DEBUG ((DEBUG_ERROR, "%a: ProcessorNum %Lu maps to QemuSelector %Lu, "
        "cannot also map to %u\n", __FUNCTION__, (UINT64)ProcessorNum,
        mCpuHotEjectData->QemuSelectorMap[ProcessorNum], QemuSelector));


> +
> +      Status = EFI_ALREADY_STARTED;
> +      return Status;
> +    }

(7) style nit -- this looks better as "return EFI_ALREADY_STARTED".


> +
> +    //
> +    // Stash the QemuSelector so we can do the actual ejection later.
> +    //
> +    mCpuHotEjectData->QemuSelectorMap[ProcessorNum] = (UINT64)QemuSelector;
> +
> +    DEBUG ((DEBUG_INFO, "%a: Started hot-unplug on ProcessorNum %Lu, APIC ID 
> "
> +      FMT_APIC_ID ", QemuSelector 0x%Lx\n", __FUNCTION__, 
> (UINT64)ProcessorNum,
> +      RemoveApicId, mCpuHotEjectData->QemuSelectorMap[ProcessorNum]));

(8a) In the format string, please replace "QemuSelector 0x%Lx" with
"QemuSelector %u" -- the main point is the decimal notation

(8b) As a suggestion, I think we should simplify this DEBUG call by
replacing the "mCpuHotEjectData->QemuSelectorMap[ProcessorNum]" argument
with just "QemuSelector".

    DEBUG ((DEBUG_INFO, "%a: Started hot-unplug on ProcessorNum %Lu, APIC ID "
      FMT_APIC_ID ", QemuSelector %u\n", __FUNCTION__, (UINT64)ProcessorNum,
      RemoveApicId, QemuSelector));


> +
> +    EjectCount++;

(I'm fine with keeping the "EjectCount" variable.)


>      ToUnplugIdx++;
>    }
>
> +  if (EjectCount != 0) {
> +    //
> +    // We have processors to be ejected; install the handler.
> +    //
> +    mCpuHotEjectData->Handler = EjectCpu;
> +
> +    //
> +    // The BSP, CPUs to be ejected dereference mCpuHotEjectData->Handler, and
> +    // mCpuHotEjectData->QemuSelectorMap[] in SmmCpuFeaturesRendezvousExit().
> +    //
> +    // Assignments to both of these are ordered-before the BSP's SMI exit 
> signal
> +    // which happens via a write to 
> SMM_DISPATCHER_MP_SYNC_DATA->AllCpusInSync.
> +    // Dereferences of both are ordered-after the synchronization via
> +    // "AllCpusInSync".
> +    //
> +    // So we are guaranteed that the Handler would see the assignments above.
> +    // However, add a MemoryFence() here in-lieu of a compiler barrier to
> +    // ensure that the compiler doesn't monkey around with the stores.
> +    //
> +    MemoryFence ();
> +  }
> +

(9) Per previous discussion (under patch v8 #7), please replace the last
paragraph of this comment, with a "ReleaseMemoryFence" reference.

The rest looks good.

Thanks!
Laszlo

>    //
> -  // We've removed this set of APIC IDs from SMM data structures.
> +  // We've removed this set of APIC IDs from SMM data structures and
> +  // have installed an ejection handler if needed.
>    //
>    return EFI_SUCCESS;
>  }
> @@ -387,7 +510,7 @@ CpuHotplugMmi (
>    }
>
>    if (ToUnplugCount > 0) {
> -    Status = UnplugCpus (mToUnplugApicIds, ToUnplugCount);
> +    Status = UnplugCpus (mToUnplugApicIds, mToUnplugSelector, ToUnplugCount);
>      if (EFI_ERROR (Status)) {
>        goto Fatal;
>      }
> @@ -458,9 +581,14 @@ CpuHotplugEntry (
>
>    //
>    // Our DEPEX on EFI_SMM_CPU_SERVICE_PROTOCOL guarantees that PiSmmCpuDxeSmm
> -  // has pointed PcdCpuHotPlugDataAddress to CPU_HOT_PLUG_DATA in SMRAM.
> +  // has pointed:
> +  // - PcdCpuHotPlugDataAddress to CPU_HOT_PLUG_DATA in SMRAM,
> +  // - PcdCpuHotEjectDataAddress to CPU_HOT_EJECT_DATA in SMRAM, if the
> +  //   possible CPU count is greater than 1.
>    //
>    mCpuHotPlugData = (VOID *)(UINTN)PcdGet64 (PcdCpuHotPlugDataAddress);
> +  mCpuHotEjectData = (VOID *)(UINTN)PcdGet64 (PcdCpuHotEjectDataAddress);
> +
>    if (mCpuHotPlugData == NULL) {
>      Status = EFI_NOT_FOUND;
>      DEBUG ((DEBUG_ERROR, "%a: CPU_HOT_PLUG_DATA: %r\n", __FUNCTION__, 
> Status));
> @@ -472,6 +600,19 @@ CpuHotplugEntry (
>    if (mCpuHotPlugData->ArrayLength == 1) {
>      return EFI_UNSUPPORTED;
>    }
> +
> +  if (mCpuHotEjectData == NULL) {
> +    Status = EFI_NOT_FOUND;
> +  } else if (mCpuHotPlugData->ArrayLength != mCpuHotEjectData->ArrayLength) {
> +    Status = EFI_INVALID_PARAMETER;
> +  } else {
> +    Status = EFI_SUCCESS;
> +  }
> +  if (EFI_ERROR (Status)) {
> +    DEBUG ((DEBUG_ERROR, "%a: CPU_HOT_EJECT_DATA: %r\n", __FUNCTION__, 
> Status));
> +    goto Fatal;
> +  }
> +
>    //
>    // Allocate the data structures that depend on the possible CPU count.
>    //
>



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#72113): https://edk2.groups.io/g/devel/message/72113
Mute This Topic: https://groups.io/mt/80819863/21656
Group Owner: devel+ow...@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-


Reply via email to