Fix-Point commented on PR #17675:
URL: https://github.com/apache/nuttx/pull/17675#issuecomment-3698712790
@wangchdo As I have emphasized time and time again, there are **fundamental
issues** with your concurrent state machine and API design. Even with the
addition of reference counting, your `hrtimer_cancel` implementation cannot
guarantee that a periodic timer that has already been canceled—but is still
executing—will not restart and overwrite a new timer. As I claimed before:
**After carefully reviewing your implementation, I could not really find a good
solution.**.
Here is incorrect thread interleaving I mentioned before.
```bash
Timeline:
CPU1 (Timer Callback) CPU2 (Set New Timer)
------|--------------------------------------|-------------------------
| |
t1: timer triggers at t1 |
|--- Callback starts |
| hrtimer->state <- running |
| |
| [Unlock] |--- hrtimer_cancel(hrtimer)
| |--- hrtimer_start(hrtimer, t2)
| | hrtimer->state <- armed
| ... | [Unlock]
| | ...
| Callback executes... | [Lock]
| |--- New timer triggered
| | hrtimer->state <- running
| | [Unlock]
| | Calllback executes....
| |
| Returns next = UINT32_MAX |
| [Lock] |
| if (hrtimer->state == running) |
| hrtimer->expired |
| <- t2 + UINT32_MAX (Incorrect! expected t1 + UINT32_MAX)
| hrtimer->state <- armed |
| [Unlock] |
| | Returns next = 1
| | [Lock]
| |--- hrtimer->state != running
| | failed to set next
(Incorrect!)
| | The previous cancelled
timer
| | callback restarted the new
timer.
------|--------------------------------------|-------------------------
```
You can make a very easy test-case to reproduce this interleaving, where the
**new timer is overwritten by a cancelled timer callback**.
```c
#include <nuttx/config.h>
#include <stdio.h>
#include <nuttx/hrtimer.h>
#include <nuttx/spinlock.h>
#define HRTIMER_TEST_THREAD_NR (4)
/****************************************************************************
* Public Functions
****************************************************************************/
static volatile int count = 0;
static hrtimer_t timer;
static spinlock_t lock = SP_UNLOCKED;
/****************************************************************************
* Name: hrtimer_process
*
* Description:
* Called from the timer interrupt handler to process expired
* high-resolution timers. If a timer has expired, its callback
* function will be executed in the context of the timer interrupt.
*
* Input Parameters:
* now - The current time (nsecs).
*
* Returned Value:
* None
****************************************************************************/
void hrtimer_process(uint64_t now);
/****************************************************************************
* Inline Functions
****************************************************************************/
/****************************************************************************
* Name: hrtimer_gettime
*
* Description:
* Get the current high-resolution time in nanoseconds.
*
* Returned Value:
* Current time in nanoseconds.
****************************************************************************/
static inline_function
uint64_t hrtimer_gettime(void)
{
struct timespec ts;
/* Get current time from platform-specific timer */
clock_systime_timespec(&ts);
/* Convert timespec to nanoseconds */
return clock_time2nsec(&ts);
}
static uint32_t test_callback_period(FAR struct hrtimer_s *hrtimer)
{
irqstate_t flags;
flags = spin_lock_irqsave(&lock);
count++;
spin_unlock_irqrestore(&lock, flags);
return 100 * NSEC_PER_USEC;
}
static uint32_t test_callback(FAR struct hrtimer_s *hrtimer)
{
count++;
return 0;
}
static void* test_thread(void *arg)
{
irqstate_t flags;
int tid = (int)arg;
int ret;
if (tid == 0)
{
hrtimer_init(&timer, test_callback, NULL);
}
while (1)
{
if (tid == 0)
{
int tmp_count;
flags = spin_lock_irqsave(&lock);
ret = hrtimer_cancel(&timer);
hrtimer_init(&timer, test_callback_period, NULL);
ret = hrtimer_start(&timer, 0, HRTIMER_MODE_REL);
spin_unlock_irqrestore(&lock, flags);
up_ndelay(1000);
spin_lock_irqsave(&lock);
ret = hrtimer_cancel(&timer);
hrtimer_init(&timer, test_callback, NULL);
ret = hrtimer_start(&timer, 0, HRTIMER_MODE_REL);
spin_unlock_irqrestore(&lock, flags);
// Wait until the oneshot hrtimer finished.
while (*(volatile int *)(&timer.state) != HRTIMER_STATE_INACTIVE)
{
up_ndelay(NSEC_PER_SEC); // Stucked at here, Because the old
peridical callback restarted the timer.
printf("Waitting... timer state: %d\n", *(volatile int
*)(&timer.state));
}
printf("passed %d\n", count);
}
else
{
flags = up_irq_save();
hrtimer_process(hrtimer_gettime());
up_irq_restore(flags);
}
}
UNUSED(ret);
return NULL;
}
/****************************************************************************
* hello_main
****************************************************************************/
int main(int argc, FAR char *argv[])
{
unsigned int thread_id;
pthread_attr_t attr;
pthread_t pthreads[HRTIMER_TEST_THREAD_NR];
printf("hrtimer_test start...\n");
ASSERT(pthread_attr_init(&attr) == 0);
/* Create wdog test thread */
for (thread_id = 0; thread_id < HRTIMER_TEST_THREAD_NR; thread_id++)
{
ASSERT(pthread_create(&pthreads[thread_id], &attr,
test_thread, (void *)thread_id) == 0);
}
for (thread_id = 0; thread_id < HRTIMER_TEST_THREAD_NR; thread_id++)
{
pthread_join(pthreads[thread_id], NULL);
}
ASSERT(pthread_attr_destroy(&attr) == 0);
printf("hrtimer_test end...\n");
return 0;
}
```
The test result on your latest code base:
```bash
➜ qemu-system-riscv32 -semihosting -M virt,aclint=on -cpu rv32 -smp 8
-bios none -nographic -kernel nuttx
NuttShell (NSH) NuttX-12.11.0
nsh> hello
hrtimer_test start...
Waitting... timer state: 0
passed 2
Waitting... timer state: 0
passed 3
Waitting... timer state: 0
passed 4
Waitting... timer state: 0
passed 5
Waitting... timer state: 0
passed 6
Waitting... timer state: 0
passed 8
Waitting... timer state: 0
passed 9
Waitting... timer state: 0
passed 10
Waitting... timer state: 0
passed 11
Waitting... timer state: 0
passed 12
Waitting... timer state: 0
passed 13
Waitting... timer state: 2
Waitting... timer state: 2
Waitting... timer state: 2
Waitting... timer state: 2
Waitting... timer state: 2
Waitting... timer state: 2
Waitting... timer state: 2
Waitting... timer state: 2
Waitting... timer state: 2
Waitting... timer state: 2
.... (Expected passed)
```
Sorry, I have to say that I am currently occupied with other tasks and do
not have time to test your implementation further. I have already provided
**multiple concurrent test cases** demonstrating issues in your implementation,
yet you stubbornly insist that you are correct. I believe **your understanding
of parallel programming is superficial, and this meaningless debate is wasting
both our time**.
As for your suggestion that I conduct performance tests on your
implementation, my response is: please first ensure the functional correctness
of your implementation, because performance built upon incorrect functionality
is meaningless.
I have spent a considerable amount of time carefully explaining why your
implementation is functionally incorrect, why my implementation cannot be
improved on your implementation, and have provided analysis, data, and test
cases. Yet, you have failed to present any data, theoretically possible
interleaving scenarios, or test cases proving that my implementation has issues.
So I have to wonder why my implementation was ignored, allowing such a
functional incorrect piece of code to be merged into the master branch. Isn’t
this simply a case of factional bias clouding judgment? I am sorry that I have
no interest in the factional struggles at all. My only goal is to do my best to
make NuttX faster and more reliable. If your implementation is proved to be
superior than mine, I will surely support you.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]