[qpid-proton] 03/03: PROTON-2362: epoll proactor fix for tsan_tr2.txt. Make scheduling and re-scheduling completely separate.

2021-11-22 Thread cliffjansen
This is an automated email from the ASF dual-hosted git repository.

cliffjansen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/qpid-proton.git

commit d31f829baafd3a45208c85e7d791452b4e997235
Author: Cliff Jansen 
AuthorDate: Mon Nov 22 10:25:23 2021 -0800

PROTON-2362: epoll proactor fix for tsan_tr2.txt. Make scheduling and 
re-scheduling completely separate.
---
 c/src/proactor/epoll-internal.h |  14 ++-
 c/src/proactor/epoll.c  | 226 +++-
 c/tests/proactor_test.cpp   |  30 --
 3 files changed, 168 insertions(+), 102 deletions(-)

diff --git a/c/src/proactor/epoll-internal.h b/c/src/proactor/epoll-internal.h
index f0f57af..8e9e1b2 100644
--- a/c/src/proactor/epoll-internal.h
+++ b/c/src/proactor/epoll-internal.h
@@ -88,8 +88,9 @@ typedef struct task_t {
   bool working;
   bool ready;// ready to run and on ready list.  Poller 
notified by eventfd.
   bool waking;
-  bool on_ready_list;// todo: protected by eventfd_mutex or sched 
mutex?  needed?
+  unsigned int ready_generation;
   struct task_t *ready_next; // ready list, guarded by proactor eventfd_mutex
+  struct task_t *resched_next; // resched list, guarded by sched mutex
   bool closing;
   // Next 4 are protected by the proactor mutex
   struct task_t* next;  /* Protected by proactor.mutex */
@@ -164,6 +165,8 @@ struct pn_proactor_t {
   bool ready_list_active;
   task_t *ready_list_first;
   task_t *ready_list_last;
+  unsigned int ready_list_count;
+  unsigned int ready_list_generation; // protected by both eventfd_mutex and a 
single p->poller instance
   // Interrupts have a dedicated eventfd because they must be async-signal 
safe.
   int interruptfd;
   // If the process runs out of file descriptors, disarm listening sockets 
temporarily and save them here.
@@ -188,7 +191,14 @@ struct pn_proactor_t {
   tslot_t *last_earmark;
   task_t *sched_ready_first;
   task_t *sched_ready_last;
-  task_t *sched_ready_current;
+  task_t *sched_ready_current; // TODO: remove or use for sceduling priority 
or fairness
+  unsigned int sched_ready_count;
+  task_t *resched_first;
+  task_t *resched_last;
+  task_t *resched_cutoff; // last resched task of current poller work 
snapshot.  TODO: superseded by polled_resched_count?
+  task_t *resched_next;
+  unsigned int resched_count;
+  unsigned int polled_resched_count; 
   pmutex tslot_mutex;
   int earmark_count;
   bool earmark_drain;
diff --git a/c/src/proactor/epoll.c b/c/src/proactor/epoll.c
index 31edfbe..ea2e25a 100644
--- a/c/src/proactor/epoll.c
+++ b/c/src/proactor/epoll.c
@@ -260,28 +260,31 @@ static void pop_ready_task(task_t *tsk) {
   // !ready .. schedule() .. on ready_list .. on sched_ready_list .. working 
task .. !sched_ready && !ready
   //
   // Intervening locks at each transition ensures ready_next has memory 
coherence throughout the ready task scheduling cycle.
+  // TODO: sched_ready list changed to sequential processing.  Review need for 
sched_ready_current.
   pn_proactor_t *p = tsk->proactor;
   if (tsk == p->sched_ready_current)
 p->sched_ready_current = tsk->ready_next;
-  if (tsk == p->sched_ready_first) {
-// normal code path
-if (tsk == p->sched_ready_last) {
-  p->sched_ready_first = p->sched_ready_last = NULL;
-} else {
-  p->sched_ready_first = tsk->ready_next;
-}
-if (!p->sched_ready_first)
-  p->sched_ready_last = NULL;
+  assert (tsk == p->sched_ready_first);
+  assert (p->sched_ready_count);
+  p->sched_ready_count--;
+  if (tsk == p->sched_ready_last) {
+p->sched_ready_first = p->sched_ready_last = NULL;
   } else {
-// tsk is not first in a multi-element list
-task_t *prev = NULL;
-for (task_t *i = p->sched_ready_first; i != tsk; i = i->ready_next)
-  prev = i;
-prev->ready_next = tsk->ready_next;
-if (tsk == p->sched_ready_last)
-  p->sched_ready_last = prev;
+p->sched_ready_first = tsk->ready_next;
   }
-  tsk->on_ready_list = false;
+  if (!p->sched_ready_first) {
+p->sched_ready_last = NULL;
+assert(p->sched_ready_count == 0);
+  }
+}
+
+// Call only as the poller task that has already called schedule_ready_list() 
and already
+// incremented p->ready_list_generation.  All list elements before 
sched_ready_last have
+// correct generation from mutex barrier and cannot have tsk->ready_generation 
set to a
+// new generation until after the poller task releases the sched lock and 
allows tsk to
+// run again.
+inline static bool on_sched_ready_list(task_t *tsk, pn_proactor_t *p) {
+  return tsk->ready_generation && (tsk->ready_generation != 
p->ready_list_generation);
 }
 
 // part1: call with tsk->owner lock held, return true if notify_poller 
required by caller.
@@ -294,8 +297,10 @@ bool schedule(task_t *tsk) {
   tsk->ready = true;
   pn_proactor_t *p = tsk->proactor;
   lock(>eventfd_mutex);
+  assert(tsk->ready_generation == 0);  // Can't be on list 

[qpid-proton] 01/03: PROTON-2362: epoll proactor fix for tsan_tr1.txt. Check earmark edge case at same time and with same lock as for unassign_thread.

2021-11-22 Thread cliffjansen
This is an automated email from the ASF dual-hosted git repository.

cliffjansen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/qpid-proton.git

commit 239a39eb2d04f0588081975a4f173bb5f121d1fa
Author: Cliff Jansen 
AuthorDate: Sun Nov 21 12:37:32 2021 -0800

PROTON-2362: epoll proactor fix for tsan_tr1.txt.  Check earmark edge case 
at same time and with same lock as for unassign_thread.
---
 c/src/proactor/epoll-internal.h   |   3 +-
 c/src/proactor/epoll.c| 114 ++
 c/src/proactor/epoll_raw_connection.c |   4 +-
 3 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/c/src/proactor/epoll-internal.h b/c/src/proactor/epoll-internal.h
index 5f7ed9b..f0f57af 100644
--- a/c/src/proactor/epoll-internal.h
+++ b/c/src/proactor/epoll-internal.h
@@ -348,7 +348,7 @@ int pclosefd(pn_proactor_t *p, int fd);
 void proactor_add(task_t *tsk);
 bool proactor_remove(task_t *tsk);
 
-bool unassign_thread(tslot_t *ts, tslot_state new_state);
+bool unassign_thread(pn_proactor_t *p, tslot_t *ts, tslot_state new_state, 
tslot_t **resume_thread);
 
 void task_init(task_t *tsk, task_type_t t, pn_proactor_t *p);
 static void task_finalize(task_t* tsk) {
@@ -385,6 +385,7 @@ void pni_timer_manager_finalize(pni_timer_manager_t *tm);
 pn_event_batch_t *pni_timer_manager_process(pni_timer_manager_t *tm, bool 
timeout, bool sched_ready);
 void pni_pconnection_timeout(pconnection_t *pc);
 void pni_proactor_timeout(pn_proactor_t *p);
+void pni_resume(pn_proactor_t *p, tslot_t *ts);
 
 // Generic wake primitives for a task.
 
diff --git a/c/src/proactor/epoll.c b/c/src/proactor/epoll.c
index 1adaeb3..c481274 100644
--- a/c/src/proactor/epoll.c
+++ b/c/src/proactor/epoll.c
@@ -406,7 +406,26 @@ static void resume(pn_proactor_t *p, tslot_t *ts) {
 pthread_cond_signal(>cond);
   }
   unlock(>mutex);
+}
+
+// Call with no lock
+void pni_resume(pn_proactor_t *p, tslot_t *ts) {
+  resume(p, ts);
+}
+
+// Call with shed_lock held
+// Caller must resume() return value if not null
+static tslot_t *resume_one_thread(pn_proactor_t *p) {
+  // If pn_proactor_get has an early return, we need to resume one suspended 
thread (if any)
+  // to be the new poller.
 
+  tslot_t *ts = p->suspend_list_head;
+  if (ts) {
+LL_REMOVE(p, suspend_list, ts);
+p->suspend_list_count--;
+ts->state = PROCESSING;
+  }
+  return ts;
 }
 
 // Call with sched lock
@@ -445,11 +464,14 @@ static bool reschedule(task_t *tsk) {
   return notify;
 }
 
-// Call with sched lock
-bool unassign_thread(tslot_t *ts, tslot_state new_state) {
+// Call with sched lock.
+// If true returned, caller must call notify_poller() after releasing the lock.
+// If resume_thread is set, the caller must call resume() after releasing the 
lock.
+bool unassign_thread(pn_proactor_t *p, tslot_t *ts, tslot_state new_state, 
tslot_t **resume_thread) {
   task_t *tsk = ts->task;
   bool notify = false;
   bool deleting = (ts->state == DELETING);
+  *resume_thread = NULL;
   ts->task = NULL;
   ts->state = new_state;
   if (tsk) {
@@ -460,7 +482,6 @@ bool unassign_thread(tslot_t *ts, tslot_state new_state) {
   // Check if unseen events or schedule() calls occurred while task was 
working.
 
   if (tsk && !deleting) {
-pn_proactor_t *p = tsk->proactor;
 ts->prev_task = tsk;
 if (tsk->sched_pending) {
   // Make sure the task is already scheduled or put it on the ready list
@@ -482,6 +503,19 @@ bool unassign_thread(tslot_t *ts, tslot_state new_state) {
   }
 }
   }
+
+  // Earmark drain accounting.
+  if (ts->earmark_override) {
+// This thread "stole" the task previously assigned to thread 
ts->earmark_override.
+if (ts->earmark_override->generation == ts->earmark_override_gen) {
+  // Other (overridden) thread not seen since this thread completed the 
pending work on the task.
+  // Thread is perhaps gone forever, which may leave us short of a poller 
thread and hanging.
+  // Find a thread to resume if available.  Worst case is a spurious 
resume/suspend by an idle thread.
+  *resume_thread = resume_one_thread(p);
+}
+ts->earmark_override = NULL;
+  }
+
   return notify;
 }
 
@@ -1014,10 +1048,11 @@ static void pconnection_done(pconnection_t *pc) {
   pconnection_cleanup(pc);
   // pc may be undefined now
   lock(>sched_mutex);
-  notify = unassign_thread(ts, UNUSED);
+  tslot_t *resume_thread;
+  notify = unassign_thread(p, ts, UNUSED, _thread);
   unlock(>sched_mutex);
-  if (notify)
-notify_poller(p);
+  if (notify) notify_poller(p);
+  if (resume_thread) resume(p, resume_thread);
   return;
 }
   }
@@ -1029,10 +1064,12 @@ static void pconnection_done(pconnection_t *pc) {
 
   if (wanted) pconnection_rearm(pc, wanted);  // May free pc on another 
thread.  Return without touching pc again.
   lock(>sched_mutex);
-  if (unassign_thread(ts, UNUSED))
+  tslot_t *resume_thread;
+  if 

[qpid-proton] 02/03: PROTON-2362: epoll proactor fix for tsan_tr3.txt. Use safe local variable not subject to deletion in another thread.

2021-11-22 Thread cliffjansen
This is an automated email from the ASF dual-hosted git repository.

cliffjansen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/qpid-proton.git

commit 586d94464d2ad6fc69e04aaa42f0df54a788561c
Author: Cliff Jansen 
AuthorDate: Sun Nov 21 13:09:25 2021 -0800

PROTON-2362: epoll proactor fix for tsan_tr3.txt. Use safe local variable 
not subject to deletion in another thread.
---
 c/src/proactor/epoll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/c/src/proactor/epoll.c b/c/src/proactor/epoll.c
index c481274..31edfbe 100644
--- a/c/src/proactor/epoll.c
+++ b/c/src/proactor/epoll.c
@@ -1451,7 +1451,7 @@ void pn_proactor_connect2(pn_proactor_t *p, 
pn_connection_t *c, pn_transport_t *
   }
   /* We need to issue INACTIVE on immediate failure */
   unlock(>task.mutex);
-  if (notify) notify_poller(pc->task.proactor);
+  if (notify) notify_poller(p);
 }
 
 static void pconnection_tick(pconnection_t *pc) {

-
To unsubscribe, e-mail: commits-unsubscr...@qpid.apache.org
For additional commands, e-mail: commits-h...@qpid.apache.org



[qpid-proton] branch main updated (8b816f4 -> d31f829)

2021-11-22 Thread cliffjansen
This is an automated email from the ASF dual-hosted git repository.

cliffjansen pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/qpid-proton.git.


from 8b816f4  NO-JIRA upgrade from Catch v2.13.6 to v2.13.7 (#342)
 new 239a39e  PROTON-2362: epoll proactor fix for tsan_tr1.txt.  Check 
earmark edge case at same time and with same lock as for unassign_thread.
 new 586d944  PROTON-2362: epoll proactor fix for tsan_tr3.txt. Use safe 
local variable not subject to deletion in another thread.
 new d31f829  PROTON-2362: epoll proactor fix for tsan_tr2.txt. Make 
scheduling and re-scheduling completely separate.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 c/src/proactor/epoll-internal.h   |  17 +-
 c/src/proactor/epoll.c| 348 +++---
 c/src/proactor/epoll_raw_connection.c |   4 +-
 c/tests/proactor_test.cpp |  30 ++-
 4 files changed, 237 insertions(+), 162 deletions(-)

-
To unsubscribe, e-mail: commits-unsubscr...@qpid.apache.org
For additional commands, e-mail: commits-h...@qpid.apache.org