Instead of waking up frequently to check if actors need to run, set an
alarm for the first timeout and use a signalfd to get us out of poll() when
the alarm expires and SIGALRM is sent.

alarm(2) only has second granularity but we are using delayed actors for
multi-second timeout handling so this is ok. All callsites were multiplying
by 1000 for wait-in-msec, so just change actor_timer() callsites to pass
timeout in seconds.

Fold actor_check() into actor_poll().

Remove actor_timer_mod, not used.

Rename actor_list to ready_list for clarity.

Remove poll_list, no longer needed.

Remove ACTOR_POLL_WAITING state, no longer needed.

Add some more helpful macros to list.h.

Based on earlier work by Chris Leech.

Signed-off-by: Andy Grover <agro...@redhat.com>
---
 include/list.h   |   6 ++
 usr/Makefile     |   4 +-
 usr/actor.c      | 264 +++++++++++++++++++++++++++----------------------------
 usr/actor.h      |  11 +--
 usr/event_poll.c |  47 ++++++++--
 usr/initiator.c  |  14 +--
 6 files changed, 190 insertions(+), 156 deletions(-)

diff --git a/include/list.h b/include/list.h
index cccc3c3..94ad99b 100644
--- a/include/list.h
+++ b/include/list.h
@@ -38,6 +38,12 @@ static inline int list_empty(const struct list_head *head)
 #define list_entry(ptr, type, member) \
        list_container_of(ptr, type, member)
 
+#define list_first_entry(ptr, type, member) \
+       list_entry((ptr)->next, type, member)
+
+#define list_first_entry_or_null(ptr, type, member) \
+       (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
+
 #define list_for_each(pos, head) \
        for (pos = (head)->next; pos != (head); pos = pos->next)
 
diff --git a/usr/Makefile b/usr/Makefile
index 3d8ee22..550fdff 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -55,14 +55,14 @@ all: $(PROGRAMS)
 
 iscsid: $(ISCSI_LIB_SRCS) $(INITIATOR_SRCS) $(DISCOVERY_SRCS) \
        iscsid.o session_mgmt.o discoveryd.o
-       $(CC) $(CFLAGS) $^ -o $@  -L../utils/open-isns -lisns
+       $(CC) $(CFLAGS) $^ -o $@  -L../utils/open-isns -lisns -lrt
 
 iscsiadm: $(ISCSI_LIB_SRCS) $(DISCOVERY_SRCS) iscsiadm.o session_mgmt.o
        $(CC) $(CFLAGS) $^ -o $@ -L../utils/open-isns -lisns
 
 iscsistart: $(ISCSI_LIB_SRCS) $(INITIATOR_SRCS) $(FW_BOOT_SRCS) \
                iscsistart.o statics.o
-       $(CC) $(CFLAGS) -static $^ -o $@
+       $(CC) $(CFLAGS) -static $^ -o $@ -lrt
 clean:
        rm -f *.o $(PROGRAMS) .depend $(LIBSYS)
 
diff --git a/usr/actor.c b/usr/actor.c
index a1373d6..37b5024 100644
--- a/usr/actor.c
+++ b/usr/actor.c
@@ -1,7 +1,8 @@
 /*
- * iSCSI usermode single-threaded scheduler
+ * iSCSI timeout & deferred work handling
  *
  * Copyright (C) 2004 Dmitry Yusupov, Alex Aizman
+ * Copyright (C) 2014 Red Hat Inc.
  * maintained by open-iscsi@googlegroups.com
  *
  * This program is free software; you can redistribute it and/or modify
@@ -17,34 +18,25 @@
  * See the file COPYING included with this distribution for more details.
  */
 #include <inttypes.h>
+#include <time.h>
+#include <sys/signalfd.h>
+#include <assert.h>
+#include <unistd.h>
 #include "actor.h"
 #include "log.h"
 #include "list.h"
 
 static LIST_HEAD(pend_list);
-static LIST_HEAD(poll_list);
-static LIST_HEAD(actor_list);
+static LIST_HEAD(ready_list);
 static volatile int poll_in_progress;
-static volatile uint64_t actor_jiffies = 0;
-
-#define actor_diff(_time1, _time2) ({ \
-        uint64_t __ret; \
-        if ((_time2) >= (_time1)) \
-           __ret = (_time2) - (_time1); \
-        else \
-           __ret = ((~0ULL) - (_time1)) + (_time2); \
-        __ret; \
-})
-
-#define ACTOR_MS_TO_TICKS(_a)  ((_a)/ACTOR_RESOLUTION)
 
 static uint64_t
-actor_diff_time(actor_t *thread, uint64_t current_time)
+actor_time_left(actor_t *thread, uint64_t current_time)
 {
-       uint64_t diff_time = actor_diff(thread->scheduled_at, current_time);
-       if(diff_time >= thread->ttschedule)
+       if (current_time > thread->ttschedule)
                return 0;
-       return (thread->ttschedule - diff_time);
+       else
+               return (thread->ttschedule - current_time);
 }
 
 #define time_after(a,b) \
@@ -65,11 +57,18 @@ actor_delete(actor_t *thread)
        log_debug(7, "thread %08lx delete: state %d", (long)thread,
                        thread->state);
        switch(thread->state) {
-       case ACTOR_SCHEDULED:
        case ACTOR_WAITING:
-       case ACTOR_POLL_WAITING:
+               /* TODO: remove/reset alarm if we were 1st entry in pend_list */
+               /* priority: low */
+               /* fallthrough */
+       case ACTOR_SCHEDULED:
                log_debug(1, "deleting a scheduled/waiting thread!");
                list_del_init(&thread->list);
+               if (list_empty(&pend_list)) {
+                       log_debug(7, "nothing left on pend_list, deactivating 
alarm\n");
+                       alarm(0);
+               }
+
                break;
        default:
                break;
@@ -77,73 +76,81 @@ actor_delete(actor_t *thread)
        thread->state = ACTOR_NOTSCHEDULED;
 }
 
+/*
+ * Inserts actor on pend list and sets alarm if new item is
+ * sooner than previous entries.
+ */
+static void
+actor_insert_on_pend_list(actor_t *thread, uint32_t delay_secs)
+{
+       struct actor *orig_head;
+       struct actor *new_head;
+       struct actor *next_thread;
+
+       orig_head = list_first_entry_or_null(&pend_list,
+                                            struct actor, list);
+
+       /* insert new entry in sort order */
+       list_for_each_entry(next_thread, &pend_list, list) {
+               log_debug(7, "thread %p %lld", next_thread,
+                         (long long)next_thread->ttschedule);
+
+               if (time_after(next_thread->ttschedule, thread->ttschedule)) {
+                       list_add(&thread->list, &next_thread->list);
+                       goto inserted;
+               }
+       }
+
+       /* Not before any existing entries */
+       list_add_tail(&thread->list, &pend_list);
+
+inserted:
+       new_head = list_first_entry(&pend_list, struct actor, list);
+       if (orig_head != new_head) {
+               int result = alarm(delay_secs);
+               log_debug(7, "new alarm set for %d seconds, old alarm %d",
+                         delay_secs, result);
+       }
+}
+
 static void
-actor_schedule_private(actor_t *thread, uint32_t ttschedule, int head)
+actor_schedule_private(actor_t *thread, uint32_t delay_secs, int head)
 {
-       uint64_t delay_time, current_time;
-       actor_t *next_thread;
+       time_t current_time;
+
+       struct timespec tv;
 
-       delay_time = ACTOR_MS_TO_TICKS(ttschedule);
-       current_time = actor_jiffies;
+       if (clock_gettime(CLOCK_MONOTONIC_COARSE, &tv)) {
+               log_error("clock_getime failed, can't schedule!\n");
+               return;
+       }
+
+       current_time = tv.tv_sec;
 
-       log_debug(7, "thread %p schedule: delay %" PRIu64 " state %d",
-               thread, delay_time, thread->state);
+       log_debug(7, "thread %p schedule: delay %u state %d",
+               thread, delay_secs, thread->state);
 
-       /* convert ttscheduled msecs in 10s of msecs by dividing for now.
-        * later we will change param to 10s of msecs */
        switch(thread->state) {
        case ACTOR_WAITING:
                log_error("rescheduling a waiting thread!");
                list_del(&thread->list);
+               /* fall-through */
        case ACTOR_NOTSCHEDULED:
                INIT_LIST_HEAD(&thread->list);
-               /* if ttschedule is 0, put in scheduled queue and change
-                * state to scheduled, else add current time to ttschedule and
-                * insert in the queue at the correct point */
-               if (delay_time == 0) {
-                       /* For head addition, it must go onto the head of the
-                          actor_list regardless if poll is in progress or not
-                        */
-                       if (poll_in_progress && !head) {
-                               thread->state = ACTOR_POLL_WAITING;
-                               list_add_tail(&thread->list,
-                                             &poll_list);
-                       } else {
-                               thread->state = ACTOR_SCHEDULED;
-                               if (head)
-                                       list_add(&thread->list,
-                                                &actor_list);
-                               else
-                                       list_add_tail(&thread->list,
-                                                     &actor_list);
-                       }
+
+               if (delay_secs == 0) {
+                       thread->state = ACTOR_SCHEDULED;
+                       if (head)
+                               list_add(&thread->list, &ready_list);
+                       else
+                               list_add_tail(&thread->list, &ready_list);
                } else {
                        thread->state = ACTOR_WAITING;
-                       thread->ttschedule = delay_time;
-                       thread->scheduled_at = current_time;
-
-                       /* insert new entry in sort order */
-                       list_for_each_entry(next_thread, &pend_list, list) {
-                               log_debug(7, "thread %p %" PRIu64 " %"PRIu64,
-                                       next_thread,
-                                       next_thread->scheduled_at +
-                                       next_thread->ttschedule,
-                                       current_time + delay_time);
-
-                               if (time_after(next_thread->scheduled_at +
-                                                      next_thread->ttschedule,
-                                               current_time + delay_time)) {
-                                       list_add(&thread->list,
-                                                &next_thread->list);
-                                       goto done;
-                               }
-                       }
-
-                       list_add_tail(&thread->list, &pend_list);
+                       thread->ttschedule = current_time + delay_secs;
+
+                       actor_insert_on_pend_list(thread, delay_secs);
                }
-done:
                break;
-       case ACTOR_POLL_WAITING:
        case ACTOR_SCHEDULED:
                // don't do anything
                break;
@@ -168,75 +175,82 @@ actor_schedule(actor_t *thread)
 }
 
 void
-actor_timer(actor_t *thread, uint32_t timeout, void (*callback)(void *),
+actor_timer(actor_t *thread, uint32_t timeout_secs, void (*callback)(void *),
            void *data)
 {
-       actor_new(thread, callback, data);
-       actor_schedule_private(thread, timeout, 0);
+       actor_init(thread, callback, data);
+       actor_schedule_private(thread, timeout_secs, 0);
 }
 
-int
-actor_timer_mod(actor_t *thread, uint32_t timeout, void *data)
+/*
+ * Execute all items that have expired.
+ *
+ * Set an alarm if items remain. Caller must catch SIGALRM and
+ * then re-invoke this function.
+ */
+void
+actor_poll(void)
 {
-       if (thread->state == ACTOR_WAITING) {
-               list_del_init(&thread->list);
-               thread->data = data;
-               actor_schedule_private(thread, timeout, 0);
-               return 1;
+       struct actor *thread, *tmp;
+       uint64_t current_time;
+       struct timespec tv;
+
+       if (poll_in_progress) {
+               log_error("recursive actor_poll() is not allowed");
+               return;
        }
-       return 0;
-}
 
-static void
-actor_check(uint64_t current_time)
-{
-       struct actor *thread, *tmp;
+       if (clock_gettime(CLOCK_MONOTONIC_COARSE, &tv)) {
+               log_error("clock_gettime failed, can't schedule!\n");
+               return;
+       }
 
+       current_time = tv.tv_sec;
+
+       /*
+        * Move items that are ripe from pend_list to ready_list.
+        * Actors are in sorted order of ascending run time, so
+        * stop at the first unripe entry.
+        */
        list_for_each_entry_safe(thread, tmp, &pend_list, list) {
-               if (actor_diff_time(thread, current_time)) {
+               uint64_t time_left = actor_time_left(thread, current_time);
+               if (time_left) {
                        log_debug(7, "thread %08lx wait some more",
-                               (long)thread);
-                       /* wait some more */
+                                 (long)thread);
+
+                       alarm(time_left);
                        break;
                }
 
-               /* it is time to schedule this entry */
+               /* This entry can be run now */
                list_del_init(&thread->list);
 
-               log_debug(2, "thread %08lx was scheduled at %" PRIu64 ":"
-                       "%" PRIu64 ", curtime %" PRIu64 " q_forw %p "
-                       "&pend_list %p",
-                       (long)thread, thread->scheduled_at, thread->ttschedule,
-                       current_time, pend_list.next, &pend_list);
+               log_debug(2, "thread %08lx was scheduled for "
+                         "%" PRIu64 ", curtime %" PRIu64 " q_forw %p "
+                         "&pend_list %p",
+                         (long)thread, thread->ttschedule,
+                         current_time, pend_list.next, &pend_list);
 
+               list_add_tail(&thread->list, &ready_list);
+               assert(thread->state == ACTOR_WAITING);
                thread->state = ACTOR_SCHEDULED;
-               list_add_tail(&thread->list, &actor_list);
-               log_debug(7, "thread %08lx now in actor_list",
-                       (long)thread);
+               log_debug(7, "thread %08lx now in ready_list",
+                         (long)thread);
        }
-}
 
-void
-actor_poll(void)
-{
-       struct actor *thread;
-
-       /* check that there are no any concurrency */
-       if (poll_in_progress) {
-               log_error("concurrent actor_poll() is not allowed");
+       /* Disable alarm if nothing else pending */
+       if (list_empty(&pend_list)) {
+               log_debug(7, "nothing on pend_list, deactivating alarm\n");
+               alarm(0);
        }
 
-       /* check the wait list */
-       actor_check(actor_jiffies);
-
-       /* the following code to check in the main data path */
        poll_in_progress = 1;
-       while (!list_empty(&actor_list)) {
-               thread = list_entry(actor_list.next, struct actor, list);
+       while (!list_empty(&ready_list)) {
+               thread = list_first_entry(&ready_list, struct actor, list);
                list_del_init(&thread->list);
 
                if (thread->state != ACTOR_SCHEDULED)
-                       log_error("actor_list: thread state corrupted! "
+                       log_error("ready_list: thread state corrupted! "
                                  "Thread with state %d in actor list.",
                                  thread->state);
                thread->state = ACTOR_NOTSCHEDULED;
@@ -245,20 +259,4 @@ actor_poll(void)
                log_debug(7, "thread removed\n");
        }
        poll_in_progress = 0;
-
-       while (!list_empty(&poll_list)) {
-               thread = list_entry(poll_list.next, struct actor, list);
-               list_del_init(&thread->list);
-
-               if (thread->state != ACTOR_POLL_WAITING)
-                       log_error("poll_list: thread state corrupted!"
-                                 "Thread with state %d in poll list.",
-                                 thread->state);
-               thread->state = ACTOR_SCHEDULED;
-               list_add_tail(&thread->list, &actor_list);
-               log_debug(7, "thread %08lx removed from poll_list",
-                       (long)thread);
-       }
-
-       actor_jiffies++;
 }
diff --git a/usr/actor.h b/usr/actor.h
index 697f13c..7283dce 100644
--- a/usr/actor.h
+++ b/usr/actor.h
@@ -22,14 +22,11 @@
 #include "types.h"
 #include "list.h"
 
-#define ACTOR_RESOLUTION       250     /* in millis */
-
 typedef enum actor_state_e {
     ACTOR_INVALID,
     ACTOR_WAITING,
     ACTOR_SCHEDULED,
     ACTOR_NOTSCHEDULED,
-    ACTOR_POLL_WAITING
 } actor_state_e;
 
 typedef struct actor {
@@ -37,18 +34,16 @@ typedef struct actor {
        actor_state_e state;
        void *data;
        void (*callback)(void * );
-       uint64_t scheduled_at;
-       uint64_t ttschedule;
+       time_t ttschedule;
 } actor_t;
 
 extern void actor_init(actor_t *thread, void (*callback)(void *), void * data);
 extern void actor_delete(actor_t *thread);
 extern void actor_schedule_head(actor_t *thread);
 extern void actor_schedule(actor_t *thread);
-extern void actor_timer(actor_t *thread, uint32_t timeout,
+extern void actor_timer(actor_t *thread, uint32_t delay_secs,
                        void (*callback)(void *), void *data);
-extern int actor_timer_mod(actor_t *thread, uint32_t new_timeout, void *data);
+extern int actor_timer_mod(actor_t *thread, uint32_t new_delay_secs, void 
*data);
 extern void actor_poll(void);
-extern void actor_init(void);
 
 #endif /* ACTOR_H */
diff --git a/usr/event_poll.c b/usr/event_poll.c
index 939f1a2..a4ab3ea 100644
--- a/usr/event_poll.c
+++ b/usr/event_poll.c
@@ -26,6 +26,8 @@
 #include <sys/poll.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <sys/signalfd.h>
+#include <unistd.h>
 
 #include "mgmt_ipc.h"
 #include "iscsi_ipc.h"
@@ -116,12 +118,12 @@ static int shutdown_wait_pids(void)
 
 #define POLL_CTRL      0
 #define POLL_IPC       1
-#define POLL_MAX       2
+#define POLL_ALARM     2
+#define POLL_MAX       3
 
 static int event_loop_stop;
 static queue_task_t *shutdown_qtask; 
 
-
 void event_loop_exit(queue_task_t *qtask)
 {
        shutdown_qtask = qtask;
@@ -132,11 +134,26 @@ void event_loop(struct iscsi_ipc *ipc, int control_fd, 
int mgmt_ipc_fd)
 {
        struct pollfd poll_array[POLL_MAX];
        int res, has_shutdown_children = 0;
+       sigset_t sigset;
+       int sig_fd;
+
+       /* Mask off SIGALRM so we can recv it via signalfd */
+       sigemptyset(&sigset);
+       sigaddset(&sigset, SIGALRM);
+       sigprocmask(SIG_SETMASK, &sigset, NULL);
+
+       sig_fd = signalfd(-1, &sigset, SFD_NONBLOCK);
+       if (sig_fd == -1) {
+               log_error("signalfd failed: %m\n");
+               return;
+       }
 
        poll_array[POLL_CTRL].fd = control_fd;
        poll_array[POLL_CTRL].events = POLLIN;
        poll_array[POLL_IPC].fd = mgmt_ipc_fd;
        poll_array[POLL_IPC].events = POLLIN;
+       poll_array[POLL_ALARM].fd = sig_fd;
+       poll_array[POLL_ALARM].events = POLLIN;
 
        event_loop_stop = 0;
        while (1) {
@@ -149,7 +166,11 @@ void event_loop(struct iscsi_ipc *ipc, int control_fd, int 
mgmt_ipc_fd)
                                break;
                }
 
-               res = poll(poll_array, POLL_MAX, ACTOR_RESOLUTION);
+               /* Runs actors and may set alarm for future actors */
+               actor_poll();
+
+               res = poll(poll_array, POLL_MAX, -1);
+
                if (res > 0) {
                        log_debug(6, "poll result %d", res);
                        if (poll_array[POLL_CTRL].revents)
@@ -157,6 +178,18 @@ void event_loop(struct iscsi_ipc *ipc, int control_fd, int 
mgmt_ipc_fd)
 
                        if (poll_array[POLL_IPC].revents)
                                mgmt_ipc_handle(mgmt_ipc_fd);
+
+                       if (poll_array[POLL_ALARM].revents) {
+                               struct signalfd_siginfo si;
+
+                               if (read(sig_fd, &si, sizeof(si)) == -1) {
+                                       log_error("got sigfd read() error, 
errno (%d), "
+                                                 "exiting", errno);
+                                       break;
+                               } else {
+                                       log_debug(1, "Poll was woken by an 
alarm");
+                               }
+                       }
                } else if (res < 0) {
                        if (errno == EINTR) {
                                log_debug(1, "event_loop interrupted");
@@ -167,16 +200,18 @@ void event_loop(struct iscsi_ipc *ipc, int control_fd, 
int mgmt_ipc_fd)
                        }
                }
 
-               if (res >= 0)
-                       actor_poll();
-
                reap_proc();
+
                /*
                 * flush sysfs cache since kernel objs may
                 * have changed as a result of handling op
                 */
                sysfs_cleanup();
        }
+
        if (shutdown_qtask)
                mgmt_ipc_write_rsp(shutdown_qtask, ISCSI_SUCCESS);
+
+       close(sig_fd);
+       sigprocmask(SIG_UNBLOCK, &sigset, NULL);
 }
diff --git a/usr/initiator.c b/usr/initiator.c
index 1e55f3a..f54b708 100644
--- a/usr/initiator.c
+++ b/usr/initiator.c
@@ -527,7 +527,7 @@ queue_delayed_reopen(queue_task_t *qtask, int delay)
         * if it were login time out
         */
        actor_delete(&conn->login_timer);
-       actor_timer(&conn->login_timer, delay * 1000,
+       actor_timer(&conn->login_timer, delay,
                    iscsi_login_timedout, qtask);
 }
 
@@ -563,7 +563,7 @@ static int iscsi_conn_connect(struct iscsi_conn *conn, 
queue_task_t *qtask)
        iscsi_sched_ev_context(ev_context, conn, 0, EV_CONN_POLL);
        log_debug(3, "Setting login timer %p timeout %d", &conn->login_timer,
                  conn->login_timeout);
-       actor_timer(&conn->login_timer, conn->login_timeout * 1000,
+       actor_timer(&conn->login_timer, conn->login_timeout,
                    iscsi_login_timedout, qtask);
        return 0;
 }
@@ -605,7 +605,7 @@ static int iscsi_sched_uio_poll(queue_task_t *qtask)
 
        log_debug(3, "Setting login UIO poll timer %p timeout %d",
                  &conn->login_timer, conn->login_timeout);
-       actor_timer(&conn->login_timer, conn->login_timeout * 1000,
+       actor_timer(&conn->login_timer, conn->login_timeout,
                    iscsi_uio_poll_login_timedout, qtask);
        return -EAGAIN;
 }
@@ -1012,7 +1012,7 @@ static void conn_send_nop_out(void *data)
 
        __send_nopout(conn);
 
-       actor_timer(&conn->nop_out_timer, conn->noop_out_timeout*1000,
+       actor_timer(&conn->nop_out_timer, conn->noop_out_timeout,
                    conn_nop_out_timeout, conn);
        log_debug(3, "noop out timeout timer %p start, timeout %d\n",
                 &conn->nop_out_timer, conn->noop_out_timeout);
@@ -1115,7 +1115,7 @@ setup_full_feature_phase(iscsi_conn_t *conn)
 
        /* noop_out */
        if (conn->userspace_nop && conn->noop_out_interval) {
-               actor_timer(&conn->nop_out_timer, conn->noop_out_interval*1000,
+               actor_timer(&conn->nop_out_timer, conn->noop_out_interval,
                           conn_send_nop_out, conn);
                log_debug(3, "noop out timer %p start\n",
                          &conn->nop_out_timer);
@@ -1199,7 +1199,7 @@ static void iscsi_recv_nop_in(iscsi_conn_t *conn, struct 
iscsi_hdr *hdr)
                /* noop out rsp */
                actor_delete(&conn->nop_out_timer);
                /* schedule a new ping */
-               actor_timer(&conn->nop_out_timer, conn->noop_out_interval*1000,
+               actor_timer(&conn->nop_out_timer, conn->noop_out_interval,
                            conn_send_nop_out, conn);
        } else /*  noop in req */
                if (!__send_nopin_rsp(conn, (struct iscsi_nopin*)hdr,
@@ -2064,7 +2064,7 @@ static int queue_session_login_task_retry(struct 
login_task_retry_info *info,
        info->retry_count++;
        log_debug(4, "queue session setup attempt in %d secs, retries %d\n",
                  3, info->retry_count);
-       actor_timer(&info->retry_actor, 3000, session_login_task_retry, info);
+       actor_timer(&info->retry_actor, 3, session_login_task_retry, info);
        return 0;
 }
 
-- 
1.9.3

-- 
You received this message because you are subscribed to the Google Groups 
"open-iscsi" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to open-iscsi+unsubscr...@googlegroups.com.
To post to this group, send email to open-iscsi@googlegroups.com.
Visit this group at http://groups.google.com/group/open-iscsi.
For more options, visit https://groups.google.com/d/optout.

Reply via email to