This is an automated email from the ASF dual-hosted git repository.

ligd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git

commit d18fd4640e05ce42c90f409eaa4438d167a76f45
Author: ouyangxiangzhen <[email protected]>
AuthorDate: Thu Jan 15 10:58:21 2026 +0800

    sched/hrtimer: Fix functional correctness issue in SMP.
    
    Since the hrtimer->expired is not monotonic, it can not be used as the
    version. This commit added the ownership encoding to ensure the
    invariant that the cancelled the timer can not modify the hrtimer again.
    
    Signed-off-by: ouyangxiangzhen <[email protected]>
---
 include/nuttx/hrtimer.h            |  39 +++++++------
 sched/hrtimer/hrtimer.h            | 112 ++++++++++++++++++++++++++++++++++++-
 sched/hrtimer/hrtimer_cancel.c     |  74 +++++++++++++-----------
 sched/hrtimer/hrtimer_initialize.c |   2 +-
 sched/hrtimer/hrtimer_process.c    |  17 ++----
 sched/hrtimer/hrtimer_start.c      |   4 ++
 6 files changed, 185 insertions(+), 63 deletions(-)

diff --git a/include/nuttx/hrtimer.h b/include/nuttx/hrtimer.h
index e77041f1105..16db12805d1 100644
--- a/include/nuttx/hrtimer.h
+++ b/include/nuttx/hrtimer.h
@@ -125,23 +125,31 @@ extern "C"
  * Name: hrtimer_cancel
  *
  * Description:
- *   Cancel a high-resolution timer.
- *
- *   If the timer is armed but has not yet expired, it will be removed from
- *   the timer queue and the callback will not be invoked.
- *
- *   If the timer callback is currently executing, this function will mark
- *   the timer as canceled and return immediately. The running callback is
- *   allowed to complete, but it will not be invoked again.
+ *   Cancel a high-resolution timer asynchronously. This function set the
+ *   timer to the cancelled state. The caller will acquire the limited
+ *   ownership of the hrtimer, which allow the caller restart the hrtimer,
+ *   but the callback function may still be executing on another CPU, which
+ *   prevent the caller from freeing the hrtimer. The caller must call
+ *   `hrtimer_cancel` to wait for the callback to be finished. Please use
+ *   the function with care. Concurrency errors are prone to occur in this
+ *   use case.
  *
  *   This function is non-blocking and does not wait for a running callback
  *   to finish.
  *
  * Input Parameters:
- *   hrtimer - Timer instance to cancel
+ *   hrtimer - Pointer to the high-resolution timer instance to cancel.
  *
  * Returned Value:
- *   OK on success; a negated errno value on failure.
+ *   OK (0) on success; a negated errno value on failure.
+ *   > 0 on if the timer callback is running.
+ *
+ * Assumptions/Notes:
+ *   - This function acquires the global hrtimer spinlock to protect both
+ *     the red-black tree and the timer state.
+ *   - The caller must ensure that the timer structure is not freed until
+ *     it is guaranteed that any running callback has returned.
+ *
  ****************************************************************************/
 
 int hrtimer_cancel(FAR hrtimer_t *hrtimer);
@@ -152,12 +160,11 @@ int hrtimer_cancel(FAR hrtimer_t *hrtimer);
  * Description:
  *   Cancel a high-resolution timer and wait until it becomes inactive.
  *
- *   - Calls hrtimer_cancel() to request timer cancellation.
- *   - If the timer callback is running, waits until it completes and
- *     the timer state transitions to HRTIMER_STATE_INACTIVE.
- *   - If sleeping is allowed (normal task context), yields CPU briefly
- *     to avoid busy-waiting.
- *   - Otherwise (interrupt or idle task context), spins until completion.
+ *   Cancel a high-resolution timer and synchronously wait the callback to
+ *   be finished. This function set the timer to the cancelled state and wait
+ *   for all references to be released. The caller will then acquire full
+ *   ownership of the hrtimer. After the function returns, the caller can
+ *   safely deallocate the hrtimer.
  *
  * Input Parameters:
  *   hrtimer - Pointer to the high-resolution timer instance to cancel.
diff --git a/sched/hrtimer/hrtimer.h b/sched/hrtimer/hrtimer.h
index fabcbcf2dd2..ff1d40a947a 100644
--- a/sched/hrtimer/hrtimer.h
+++ b/sched/hrtimer/hrtimer.h
@@ -71,7 +71,7 @@ extern struct list_node g_hrtimer_list;
  */
 
 #ifdef CONFIG_SMP
-extern FAR hrtimer_t *g_hrtimer_running[CONFIG_SMP_NCPUS];
+extern uintptr_t g_hrtimer_running[CONFIG_SMP_NCPUS];
 #endif
 
 /****************************************************************************
@@ -310,5 +310,115 @@ static inline_function bool hrtimer_is_first(FAR 
hrtimer_t *hrtimer)
 #endif
 }
 
+/****************************************************************************
+ * Name: hrtimer_mark_running
+ *
+ * Description:
+ *   Mark the timer as running.
+ *
+ * Input Parameters:
+ *   timer - The timer to be marked.
+ *   cpu   - The CPU core Id.
+ *
+ * Returned Value:
+ *   None.
+ *
+ * Assumption:
+ *   The caller must hold the lock.
+ *
+ ****************************************************************************/
+
+#ifdef CONFIG_SMP
+#  define hrtimer_mark_running(timer, cpu) \
+  (g_hrtimer_running[cpu] = (uintptr_t)(timer))
+#else
+#  define hrtimer_mark_running(timer, cpu) UNUSED(cpu)
+#endif
+#define hrtimer_unmark_running(cpu) hrtimer_mark_running(NULL, cpu)
+
+/****************************************************************************
+ * Name: hrtimer_is_running
+ *
+ * Description:
+ *   Check if the CPU core is running the timer.
+ *
+ * Input Parameters:
+ *   timer - The timer to be marked.
+ *   cpu   - The CPU core Id.
+ *
+ * Returned Value:
+ *   true if the CPU core is running the timer, false otherwise.
+ *
+ * Assumption:
+ *   The caller must hold the lock.
+ *
+ ****************************************************************************/
+
+#ifdef CONFIG_SMP
+#  define hrtimer_is_running(timer, cpu) \
+  (g_hrtimer_running[cpu] == (uintptr_t)(timer))
+#else
+#  define hrtimer_is_running(timer, cpu) (true)
+#endif
+#define hrtimer_is_cancelling(timer, cpu) \
+  hrtimer_is_running((uintptr_t)(timer) | 0x1u, cpu)
+
+/****************************************************************************
+ * Name: hrtimer_cancel_running
+ *
+ * Description:
+ *   Cancel the timer, revoke the ownership of the cancelled timer, and
+ *   return the references count to the timer.
+ *
+ * Input Parameters:
+ *   hrtimer - The cancelled timer.
+ *
+ * Returned Value:
+ *   The references count to the timer.
+ *
+ * Assumption:
+ *   The caller must hold the queue lock.
+ *
+ ****************************************************************************/
+
+static inline_function
+int hrtimer_cancel_running(FAR hrtimer_t *timer)
+{
+  int refs            = 0;
+#ifdef CONFIG_SMP
+  uintptr_t cancelled = (uintptr_t)timer | 0x1u;
+  int cpu;
+
+  /* Check if the timer is referenced by any CPU core.
+   * Generally, only one reference to a timer can exist at the same time.
+   * However, when a timer may be restarted at the cancelled state,
+   * more references to the timer may exist.
+   */
+
+  unroll_loop(CONFIG_SMP_NCPUS) /* Tell the compiler to unroll the loop. */
+
+  for (cpu = 0; cpu < CONFIG_SMP_NCPUS; cpu++)
+    {
+      /* This is a faster implementation equivalent to
+       * (g_hrtimer_running[cpu] & (~(uintptr_t)0x1u)) == timer,
+       * Assuming the pointer of the timer is at least 2-bytes aligned
+       * (the last bit must be zero).
+       */
+
+      if ((g_hrtimer_running[cpu] ^ cancelled) <= 0x1u)
+        {
+          /* Set the timer to the cancelled state and revoke the write
+           * ownership of the timer from the queue.
+           */
+
+          g_hrtimer_running[cpu] = cancelled;
+          refs++;
+        }
+    }
+#endif
+
+  return refs;
+}
+
 #endif /* CONFIG_HRTIMER */
 #endif /* __SCHED_HRTIMER_HRTIMER_H */
diff --git a/sched/hrtimer/hrtimer_cancel.c b/sched/hrtimer/hrtimer_cancel.c
index a93f9ba437d..43a6d2f371e 100644
--- a/sched/hrtimer/hrtimer_cancel.c
+++ b/sched/hrtimer/hrtimer_cancel.c
@@ -60,11 +60,12 @@
 #ifdef CONFIG_SMP
 static inline_function bool hrtimer_is_active(FAR hrtimer_t *hrtimer)
 {
+  int  cpu;
   bool is_active = false;
 
-  for (int i = 0; i < CONFIG_SMP_NCPUS; i++)
+  for (cpu = 0; cpu < CONFIG_SMP_NCPUS; cpu++)
     {
-      if (g_hrtimer_running[i] == hrtimer)
+      if (hrtimer_is_cancelling(hrtimer, cpu))
         {
           is_active = true;
           break;
@@ -83,16 +84,21 @@ static inline_function bool hrtimer_is_active(FAR hrtimer_t 
*hrtimer)
  * Name: hrtimer_cancel
  *
  * Description:
- *   Cancel a high-resolution timer.
+ *   Cancel a high-resolution timer asynchronously.
  *
- *   If the timer is currently armed, it will be removed from the active
- *   hrtimer container (tree or list) and will not be executed.
+ *   If the timer is currently pending, it will be removed from the
+ *   hrtimer queue and will not be executed.
  *
- *   If the timer callback is currently executing, the timer will be marked
- *   as canceled. The running callback is allowed to complete, but the timer
- *   will not be re-armed or executed again.
+ *   If the timer callback is currently executing. This function set the
+ *   timer to the cancelled state. The caller will acquire the limited
+ *   ownership of the hrtimer, which allow the caller restart the hrtimer,
+ *   but the callback function may still be executing on another CPU,
+ *   which prevent the caller from freeing the hrtimer.
+ *   The caller must call `hrtimer_cancel_sync` to wait for the callback
+ *   to be finished. Please use the function with care.
+ *   Concurrency errors are prone to occur in this use case.
  *
- *   If the canceled timer was the earliest (head) timer in the container,
+ *   If the canceled timer was the earliest expired timer in the queue,
  *   the expiration of the underlying hardware timer will be updated to the
  *   expiration time of the next earliest timer
  *
@@ -104,6 +110,7 @@ static inline_function bool hrtimer_is_active(FAR hrtimer_t 
*hrtimer)
  *
  * Returned Value:
  *   OK (0) on success; a negated errno value on failure.
+ *   > 0 on if the timer callback is running.
  *
  * Assumptions/Notes:
  *   - This function acquires the global hrtimer spinlock to protect
@@ -117,7 +124,7 @@ int hrtimer_cancel(FAR hrtimer_t *hrtimer)
 {
   FAR hrtimer_t *first;
   irqstate_t flags;
-  int ret = OK;
+  int ret;
 
   DEBUGASSERT(hrtimer != NULL);
 
@@ -125,17 +132,15 @@ int hrtimer_cancel(FAR hrtimer_t *hrtimer)
 
   flags = spin_lock_irqsave(&g_hrtimer_spinlock);
 
+  /* Ensure no core can write the hrtimer. */
+
+  ret = hrtimer_cancel_running(hrtimer);
+
   if (hrtimer_is_armed(hrtimer))
     {
       hrtimer_remove(hrtimer);
     }
 
-  /* If the timer was running, increment its expiration count to prevent
-   * it from being re-armed by the callback.
-   */
-
-  hrtimer->expired++;
-
   /* If the canceled timer was the earliest one, update the hardware timer */
 
   if (hrtimer_is_first(hrtimer))
@@ -157,10 +162,13 @@ int hrtimer_cancel(FAR hrtimer_t *hrtimer)
  * Name: hrtimer_cancel_sync
  *
  * Description:
- *   Cancel a high-resolution timer and wait until it becomes inactive.
+ *   Cancel a high-resolution timer and synchronously wait the callback to
+ *   be finished.
  *
- *   - Calls hrtimer_cancel() to request timer cancellation.
- *   - If the timer callback is running, waits until it completes.
+ *   If the timer callback is running, this function set the timer to the
+ *   cancelled state and wait for all all references to be released.
+ *   The caller will then acquire full ownership of the hrtimer. After the
+ *   function returns, the caller can safely deallocate the hrtimer.
  *   - If sleeping is allowed (normal task context), yields CPU briefly
  *     to avoid busy-waiting.
  *   - Otherwise (interrupt or idle task context), spins until completion.
@@ -182,26 +190,24 @@ int hrtimer_cancel_sync(FAR hrtimer_t *hrtimer)
   /* Request cancellation of the timer */
 
   ret = hrtimer_cancel(hrtimer);
-  if (ret < 0)
+  if (ret > 0)
     {
-      return ret;
-    }
-
-  /* Wait until all cpu finish running the timer callback.
-   *
-   * If sleeping is permitted, yield the CPU briefly to avoid
-   * busy-waiting. Otherwise, spin until the callback completes.
-   */
+      /* Wait until the timer transitions to the inactive state.
+       *
+       * If sleeping is permitted, yield the CPU briefly to avoid
+       * busy-waiting. Otherwise, spin until the callback completes.
+       */
 #ifdef CONFIG_SMP
-  while (hrtimer_is_active(hrtimer))
-    {
-      if (!up_interrupt_context() &&
-          !is_idle_task(this_task()))
+      while (hrtimer_is_active(hrtimer))
         {
-          nxsched_msleep(HRTIMER_CANCEL_SYNC_DELAY_MS);
+          if (!up_interrupt_context() &&
+              !is_idle_task(this_task()))
+            {
+              nxsched_msleep(HRTIMER_CANCEL_SYNC_DELAY_MS);
+            }
         }
-    }
 #endif
+    }
 
   return ret;
 }
diff --git a/sched/hrtimer/hrtimer_initialize.c 
b/sched/hrtimer/hrtimer_initialize.c
index a79d52b8e26..43010f074d2 100644
--- a/sched/hrtimer/hrtimer_initialize.c
+++ b/sched/hrtimer/hrtimer_initialize.c
@@ -37,7 +37,7 @@
  */
 
 #ifdef CONFIG_SMP
-FAR hrtimer_t *g_hrtimer_running[CONFIG_SMP_NCPUS];
+uintptr_t g_hrtimer_running[CONFIG_SMP_NCPUS];
 #endif
 
 /* Global spinlock protecting the high-resolution timer subsystem.
diff --git a/sched/hrtimer/hrtimer_process.c b/sched/hrtimer/hrtimer_process.c
index ff7f945b6d3..97266cacf8c 100644
--- a/sched/hrtimer/hrtimer_process.c
+++ b/sched/hrtimer/hrtimer_process.c
@@ -75,9 +75,7 @@ void hrtimer_process(uint64_t now)
   hrtimer_entry_t func;
   uint64_t expired;
   uint64_t period;
-#ifdef CONFIG_SMP
   int cpu = this_cpu();
-#endif
 
   /* Lock the hrtimer container to protect access */
 
@@ -108,9 +106,8 @@ void hrtimer_process(uint64_t now)
 
       hrtimer_remove(hrtimer);
 
-#ifdef CONFIG_SMP
-      g_hrtimer_running[cpu] = hrtimer;
-#endif
+      hrtimer_mark_running(hrtimer, cpu);
+
       /* Leave critical section before invoking the callback */
 
       spin_unlock_irqrestore(&g_hrtimer_spinlock, flags);
@@ -123,22 +120,18 @@ void hrtimer_process(uint64_t now)
 
       flags = spin_lock_irqsave(&g_hrtimer_spinlock);
 
-#ifdef CONFIG_SMP
-      g_hrtimer_running[cpu] = NULL;
-#endif
-
       /* If the timer is periodic and has not been rearmed or
        * cancelled concurrently,
        * compute next expiration and reinsert into container
        */
 
-      if (period > 0 && hrtimer->expired == expired)
+      if (period != 0u && hrtimer_is_running(hrtimer, cpu))
         {
           hrtimer->expired += period;
 
           /* Ensure no overflow occurs */
 
-          DEBUGASSERT(hrtimer->expired > period);
+          DEBUGASSERT(hrtimer->expired >= period);
 
           hrtimer->func = func;
           hrtimer_insert(hrtimer);
@@ -149,6 +142,8 @@ void hrtimer_process(uint64_t now)
       hrtimer = hrtimer_get_first();
     }
 
+  hrtimer_unmark_running(cpu);
+
   /* Schedule the next timer expiration */
 
   if (hrtimer != NULL)
diff --git a/sched/hrtimer/hrtimer_start.c b/sched/hrtimer/hrtimer_start.c
index 81f688de23d..61ed886dd39 100644
--- a/sched/hrtimer/hrtimer_start.c
+++ b/sched/hrtimer/hrtimer_start.c
@@ -73,6 +73,10 @@ int hrtimer_start(FAR hrtimer_t *hrtimer, hrtimer_entry_t 
func,
 
   flags = spin_lock_irqsave(&g_hrtimer_spinlock);
 
+  /* Ensure no core can write the hrtimer. */
+
+  hrtimer_cancel_running(hrtimer);
+
   if (hrtimer_is_armed(hrtimer))
     {
       hrtimer_remove(hrtimer);

Reply via email to