On 6/3/2025 4:45 PM, Dave Jiang wrote:


On 6/3/25 3:19 PM, Smita Koralahalli wrote:
Introduce a waitqueue mechanism to coordinate initialization between the
cxl_pci and cxl_mem drivers.

Launch a background worker from cxl_acpi_probe() that waits for both
drivers to complete initialization before invoking wait_for_device_probe().
Without this, the probe completion wait could begin prematurely, before
the drivers are present, leading to missed updates.

Co-developed-by: Nathan Fontenot <nathan.fonte...@amd.com>
Signed-off-by: Nathan Fontenot <nathan.fonte...@amd.com>
Co-developed-by: Terry Bowman <terry.bow...@amd.com>
Signed-off-by: Terry Bowman <terry.bow...@amd.com>
Signed-off-by: Smita Koralahalli <smita.koralahallichannabasa...@amd.com>
---
  drivers/cxl/acpi.c         | 23 +++++++++++++++++++++++
  drivers/cxl/core/suspend.c | 21 +++++++++++++++++++++
  drivers/cxl/cxl.h          |  2 ++
  3 files changed, 46 insertions(+)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index cb14829bb9be..978f63b32b41 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -813,6 +813,24 @@ static int pair_cxl_resource(struct device *dev, void 
*data)
        return 0;
  }
+static void cxl_softreserv_mem_work_fn(struct work_struct *work)
+{
+       /* Wait for cxl_pci and cxl_mem drivers to load */
+       cxl_wait_for_pci_mem();
+
+       /*
+        * Wait for the driver probe routines to complete after cxl_pci
+        * and cxl_mem drivers are loaded.
+        */
+       wait_for_device_probe();
+}
+static DECLARE_WORK(cxl_sr_work, cxl_softreserv_mem_work_fn);
+
+static void cxl_softreserv_mem_update(void)
+{
+       schedule_work(&cxl_sr_work);
+}
+
  static int cxl_acpi_probe(struct platform_device *pdev)
  {
        int rc;
@@ -887,6 +905,10 @@ static int cxl_acpi_probe(struct platform_device *pdev)
/* In case PCI is scanned before ACPI re-trigger memdev attach */
        cxl_bus_rescan();
+
+       /* Update SOFT RESERVE resources that intersect with CXL regions */
+       cxl_softreserv_mem_update();
+
        return 0;
  }
@@ -918,6 +940,7 @@ static int __init cxl_acpi_init(void) static void __exit cxl_acpi_exit(void)
  {
+       cancel_work_sync(&cxl_sr_work);
        platform_driver_unregister(&cxl_acpi_driver);
        cxl_bus_drain();
  }
diff --git a/drivers/cxl/core/suspend.c b/drivers/cxl/core/suspend.c
index 72818a2c8ec8..c0d8f70aed56 100644
--- a/drivers/cxl/core/suspend.c
+++ b/drivers/cxl/core/suspend.c
@@ -2,12 +2,15 @@
  /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
  #include <linux/atomic.h>
  #include <linux/export.h>
+#include <linux/wait.h>
  #include "cxlmem.h"
  #include "cxlpci.h"
static atomic_t mem_active;
  static atomic_t pci_loaded;
+static DECLARE_WAIT_QUEUE_HEAD(cxl_wait_queue);
+
  bool cxl_mem_active(void)
  {
        if (IS_ENABLED(CONFIG_CXL_MEM))
@@ -19,6 +22,7 @@ bool cxl_mem_active(void)
  void cxl_mem_active_inc(void)
  {
        atomic_inc(&mem_active);
+       wake_up(&cxl_wait_queue);
  }
  EXPORT_SYMBOL_NS_GPL(cxl_mem_active_inc, "CXL");
@@ -28,8 +32,25 @@ void cxl_mem_active_dec(void)
  }
  EXPORT_SYMBOL_NS_GPL(cxl_mem_active_dec, "CXL");
+static bool cxl_pci_loaded(void)
+{
+       if (IS_ENABLED(CONFIG_CXL_PCI))
+               return atomic_read(&pci_loaded) != 0;
+
+       return false;
+}
+
  void mark_cxl_pci_loaded(void)
  {
        atomic_inc(&pci_loaded);
+       wake_up(&cxl_wait_queue);
  }
  EXPORT_SYMBOL_NS_GPL(mark_cxl_pci_loaded, "CXL");
+
+void cxl_wait_for_pci_mem(void)
+{
+       if (!wait_event_timeout(cxl_wait_queue, cxl_pci_loaded() &&
+                               cxl_mem_active(), 30 * HZ))

I'm trying to understand why cxl_pci_loaded() is needed. cxl_mem_active() goes 
above 0 when a cxl_mem_probe() instance succeeds. cxl_mem_probe() being 
triggered implies that an instance of cxl_pci_probe() has been called since 
cxl_mem_probe() is triggered from devm_cxl_add_memdev() with memdev being added 
and cxl_mem driver also have been loaded. So does cxl_mem_active() not imply 
cxl_pci_loaded() and makes it unnecessary?

Yeah you are right. I will remove this check.

Thanks
Smita

DJ


+               pr_debug("Timeout waiting for cxl_pci or cxl_mem probing\n");
+}
+EXPORT_SYMBOL_NS_GPL(cxl_wait_for_pci_mem, "CXL");
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index a9ab46eb0610..1ba7d39c2991 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -902,6 +902,8 @@ void cxl_coordinates_combine(struct access_coordinate *out,
bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); +void cxl_wait_for_pci_mem(void);
+
  /*
   * Unit test builds overrides this to __weak, find the 'strong' version
   * of these symbols in tools/testing/cxl/.



Reply via email to