Instead of waking up frequently to check if actors need to run, set an alarm for the first timeout and use a signalfd to get us out of poll() when the alarm expires and SIGALRM is sent.
alarm(2) only has second granularity but we are using delayed actors for multi-second timeout handling so this is ok. All callsites were multiplying by 1000 for wait-in-msec, so just change actor_timer() callsites to pass timeout in seconds. Fold actor_check() into actor_poll(). Remove actor_timer_mod, not used. Rename actor_list to ready_list for clarity. Remove poll_list, no longer needed. Remove ACTOR_POLL_WAITING state, no longer needed. Add some more helpful macros to list.h. Based on earlier work by Chris Leech. Signed-off-by: Andy Grover <agro...@redhat.com> --- include/list.h | 6 ++ usr/Makefile | 4 +- usr/actor.c | 264 +++++++++++++++++++++++++++---------------------------- usr/actor.h | 11 +-- usr/event_poll.c | 47 ++++++++-- usr/initiator.c | 14 +-- 6 files changed, 190 insertions(+), 156 deletions(-) diff --git a/include/list.h b/include/list.h index cccc3c3..94ad99b 100644 --- a/include/list.h +++ b/include/list.h @@ -38,6 +38,12 @@ static inline int list_empty(const struct list_head *head) #define list_entry(ptr, type, member) \ list_container_of(ptr, type, member) +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +#define list_first_entry_or_null(ptr, type, member) \ + (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) + #define list_for_each(pos, head) \ for (pos = (head)->next; pos != (head); pos = pos->next) diff --git a/usr/Makefile b/usr/Makefile index 3d8ee22..550fdff 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -55,14 +55,14 @@ all: $(PROGRAMS) iscsid: $(ISCSI_LIB_SRCS) $(INITIATOR_SRCS) $(DISCOVERY_SRCS) \ iscsid.o session_mgmt.o discoveryd.o - $(CC) $(CFLAGS) $^ -o $@ -L../utils/open-isns -lisns + $(CC) $(CFLAGS) $^ -o $@ -L../utils/open-isns -lisns -lrt iscsiadm: $(ISCSI_LIB_SRCS) $(DISCOVERY_SRCS) iscsiadm.o session_mgmt.o $(CC) $(CFLAGS) $^ -o $@ -L../utils/open-isns -lisns iscsistart: $(ISCSI_LIB_SRCS) $(INITIATOR_SRCS) $(FW_BOOT_SRCS) \ iscsistart.o statics.o - $(CC) $(CFLAGS) -static $^ -o $@ + $(CC) $(CFLAGS) -static $^ -o $@ -lrt clean: rm -f *.o $(PROGRAMS) .depend $(LIBSYS) diff --git a/usr/actor.c b/usr/actor.c index a1373d6..37b5024 100644 --- a/usr/actor.c +++ b/usr/actor.c @@ -1,7 +1,8 @@ /* - * iSCSI usermode single-threaded scheduler + * iSCSI timeout & deferred work handling * * Copyright (C) 2004 Dmitry Yusupov, Alex Aizman + * Copyright (C) 2014 Red Hat Inc. * maintained by open-iscsi@googlegroups.com * * This program is free software; you can redistribute it and/or modify @@ -17,34 +18,25 @@ * See the file COPYING included with this distribution for more details. */ #include <inttypes.h> +#include <time.h> +#include <sys/signalfd.h> +#include <assert.h> +#include <unistd.h> #include "actor.h" #include "log.h" #include "list.h" static LIST_HEAD(pend_list); -static LIST_HEAD(poll_list); -static LIST_HEAD(actor_list); +static LIST_HEAD(ready_list); static volatile int poll_in_progress; -static volatile uint64_t actor_jiffies = 0; - -#define actor_diff(_time1, _time2) ({ \ - uint64_t __ret; \ - if ((_time2) >= (_time1)) \ - __ret = (_time2) - (_time1); \ - else \ - __ret = ((~0ULL) - (_time1)) + (_time2); \ - __ret; \ -}) - -#define ACTOR_MS_TO_TICKS(_a) ((_a)/ACTOR_RESOLUTION) static uint64_t -actor_diff_time(actor_t *thread, uint64_t current_time) +actor_time_left(actor_t *thread, uint64_t current_time) { - uint64_t diff_time = actor_diff(thread->scheduled_at, current_time); - if(diff_time >= thread->ttschedule) + if (current_time > thread->ttschedule) return 0; - return (thread->ttschedule - diff_time); + else + return (thread->ttschedule - current_time); } #define time_after(a,b) \ @@ -65,11 +57,18 @@ actor_delete(actor_t *thread) log_debug(7, "thread %08lx delete: state %d", (long)thread, thread->state); switch(thread->state) { - case ACTOR_SCHEDULED: case ACTOR_WAITING: - case ACTOR_POLL_WAITING: + /* TODO: remove/reset alarm if we were 1st entry in pend_list */ + /* priority: low */ + /* fallthrough */ + case ACTOR_SCHEDULED: log_debug(1, "deleting a scheduled/waiting thread!"); list_del_init(&thread->list); + if (list_empty(&pend_list)) { + log_debug(7, "nothing left on pend_list, deactivating alarm\n"); + alarm(0); + } + break; default: break; @@ -77,73 +76,81 @@ actor_delete(actor_t *thread) thread->state = ACTOR_NOTSCHEDULED; } +/* + * Inserts actor on pend list and sets alarm if new item is + * sooner than previous entries. + */ +static void +actor_insert_on_pend_list(actor_t *thread, uint32_t delay_secs) +{ + struct actor *orig_head; + struct actor *new_head; + struct actor *next_thread; + + orig_head = list_first_entry_or_null(&pend_list, + struct actor, list); + + /* insert new entry in sort order */ + list_for_each_entry(next_thread, &pend_list, list) { + log_debug(7, "thread %p %lld", next_thread, + (long long)next_thread->ttschedule); + + if (time_after(next_thread->ttschedule, thread->ttschedule)) { + list_add(&thread->list, &next_thread->list); + goto inserted; + } + } + + /* Not before any existing entries */ + list_add_tail(&thread->list, &pend_list); + +inserted: + new_head = list_first_entry(&pend_list, struct actor, list); + if (orig_head != new_head) { + int result = alarm(delay_secs); + log_debug(7, "new alarm set for %d seconds, old alarm %d", + delay_secs, result); + } +} + static void -actor_schedule_private(actor_t *thread, uint32_t ttschedule, int head) +actor_schedule_private(actor_t *thread, uint32_t delay_secs, int head) { - uint64_t delay_time, current_time; - actor_t *next_thread; + time_t current_time; + + struct timespec tv; - delay_time = ACTOR_MS_TO_TICKS(ttschedule); - current_time = actor_jiffies; + if (clock_gettime(CLOCK_MONOTONIC_COARSE, &tv)) { + log_error("clock_getime failed, can't schedule!\n"); + return; + } + + current_time = tv.tv_sec; - log_debug(7, "thread %p schedule: delay %" PRIu64 " state %d", - thread, delay_time, thread->state); + log_debug(7, "thread %p schedule: delay %u state %d", + thread, delay_secs, thread->state); - /* convert ttscheduled msecs in 10s of msecs by dividing for now. - * later we will change param to 10s of msecs */ switch(thread->state) { case ACTOR_WAITING: log_error("rescheduling a waiting thread!"); list_del(&thread->list); + /* fall-through */ case ACTOR_NOTSCHEDULED: INIT_LIST_HEAD(&thread->list); - /* if ttschedule is 0, put in scheduled queue and change - * state to scheduled, else add current time to ttschedule and - * insert in the queue at the correct point */ - if (delay_time == 0) { - /* For head addition, it must go onto the head of the - actor_list regardless if poll is in progress or not - */ - if (poll_in_progress && !head) { - thread->state = ACTOR_POLL_WAITING; - list_add_tail(&thread->list, - &poll_list); - } else { - thread->state = ACTOR_SCHEDULED; - if (head) - list_add(&thread->list, - &actor_list); - else - list_add_tail(&thread->list, - &actor_list); - } + + if (delay_secs == 0) { + thread->state = ACTOR_SCHEDULED; + if (head) + list_add(&thread->list, &ready_list); + else + list_add_tail(&thread->list, &ready_list); } else { thread->state = ACTOR_WAITING; - thread->ttschedule = delay_time; - thread->scheduled_at = current_time; - - /* insert new entry in sort order */ - list_for_each_entry(next_thread, &pend_list, list) { - log_debug(7, "thread %p %" PRIu64 " %"PRIu64, - next_thread, - next_thread->scheduled_at + - next_thread->ttschedule, - current_time + delay_time); - - if (time_after(next_thread->scheduled_at + - next_thread->ttschedule, - current_time + delay_time)) { - list_add(&thread->list, - &next_thread->list); - goto done; - } - } - - list_add_tail(&thread->list, &pend_list); + thread->ttschedule = current_time + delay_secs; + + actor_insert_on_pend_list(thread, delay_secs); } -done: break; - case ACTOR_POLL_WAITING: case ACTOR_SCHEDULED: // don't do anything break; @@ -168,75 +175,82 @@ actor_schedule(actor_t *thread) } void -actor_timer(actor_t *thread, uint32_t timeout, void (*callback)(void *), +actor_timer(actor_t *thread, uint32_t timeout_secs, void (*callback)(void *), void *data) { - actor_new(thread, callback, data); - actor_schedule_private(thread, timeout, 0); + actor_init(thread, callback, data); + actor_schedule_private(thread, timeout_secs, 0); } -int -actor_timer_mod(actor_t *thread, uint32_t timeout, void *data) +/* + * Execute all items that have expired. + * + * Set an alarm if items remain. Caller must catch SIGALRM and + * then re-invoke this function. + */ +void +actor_poll(void) { - if (thread->state == ACTOR_WAITING) { - list_del_init(&thread->list); - thread->data = data; - actor_schedule_private(thread, timeout, 0); - return 1; + struct actor *thread, *tmp; + uint64_t current_time; + struct timespec tv; + + if (poll_in_progress) { + log_error("recursive actor_poll() is not allowed"); + return; } - return 0; -} -static void -actor_check(uint64_t current_time) -{ - struct actor *thread, *tmp; + if (clock_gettime(CLOCK_MONOTONIC_COARSE, &tv)) { + log_error("clock_gettime failed, can't schedule!\n"); + return; + } + current_time = tv.tv_sec; + + /* + * Move items that are ripe from pend_list to ready_list. + * Actors are in sorted order of ascending run time, so + * stop at the first unripe entry. + */ list_for_each_entry_safe(thread, tmp, &pend_list, list) { - if (actor_diff_time(thread, current_time)) { + uint64_t time_left = actor_time_left(thread, current_time); + if (time_left) { log_debug(7, "thread %08lx wait some more", - (long)thread); - /* wait some more */ + (long)thread); + + alarm(time_left); break; } - /* it is time to schedule this entry */ + /* This entry can be run now */ list_del_init(&thread->list); - log_debug(2, "thread %08lx was scheduled at %" PRIu64 ":" - "%" PRIu64 ", curtime %" PRIu64 " q_forw %p " - "&pend_list %p", - (long)thread, thread->scheduled_at, thread->ttschedule, - current_time, pend_list.next, &pend_list); + log_debug(2, "thread %08lx was scheduled for " + "%" PRIu64 ", curtime %" PRIu64 " q_forw %p " + "&pend_list %p", + (long)thread, thread->ttschedule, + current_time, pend_list.next, &pend_list); + list_add_tail(&thread->list, &ready_list); + assert(thread->state == ACTOR_WAITING); thread->state = ACTOR_SCHEDULED; - list_add_tail(&thread->list, &actor_list); - log_debug(7, "thread %08lx now in actor_list", - (long)thread); + log_debug(7, "thread %08lx now in ready_list", + (long)thread); } -} -void -actor_poll(void) -{ - struct actor *thread; - - /* check that there are no any concurrency */ - if (poll_in_progress) { - log_error("concurrent actor_poll() is not allowed"); + /* Disable alarm if nothing else pending */ + if (list_empty(&pend_list)) { + log_debug(7, "nothing on pend_list, deactivating alarm\n"); + alarm(0); } - /* check the wait list */ - actor_check(actor_jiffies); - - /* the following code to check in the main data path */ poll_in_progress = 1; - while (!list_empty(&actor_list)) { - thread = list_entry(actor_list.next, struct actor, list); + while (!list_empty(&ready_list)) { + thread = list_first_entry(&ready_list, struct actor, list); list_del_init(&thread->list); if (thread->state != ACTOR_SCHEDULED) - log_error("actor_list: thread state corrupted! " + log_error("ready_list: thread state corrupted! " "Thread with state %d in actor list.", thread->state); thread->state = ACTOR_NOTSCHEDULED; @@ -245,20 +259,4 @@ actor_poll(void) log_debug(7, "thread removed\n"); } poll_in_progress = 0; - - while (!list_empty(&poll_list)) { - thread = list_entry(poll_list.next, struct actor, list); - list_del_init(&thread->list); - - if (thread->state != ACTOR_POLL_WAITING) - log_error("poll_list: thread state corrupted!" - "Thread with state %d in poll list.", - thread->state); - thread->state = ACTOR_SCHEDULED; - list_add_tail(&thread->list, &actor_list); - log_debug(7, "thread %08lx removed from poll_list", - (long)thread); - } - - actor_jiffies++; } diff --git a/usr/actor.h b/usr/actor.h index 697f13c..7283dce 100644 --- a/usr/actor.h +++ b/usr/actor.h @@ -22,14 +22,11 @@ #include "types.h" #include "list.h" -#define ACTOR_RESOLUTION 250 /* in millis */ - typedef enum actor_state_e { ACTOR_INVALID, ACTOR_WAITING, ACTOR_SCHEDULED, ACTOR_NOTSCHEDULED, - ACTOR_POLL_WAITING } actor_state_e; typedef struct actor { @@ -37,18 +34,16 @@ typedef struct actor { actor_state_e state; void *data; void (*callback)(void * ); - uint64_t scheduled_at; - uint64_t ttschedule; + time_t ttschedule; } actor_t; extern void actor_init(actor_t *thread, void (*callback)(void *), void * data); extern void actor_delete(actor_t *thread); extern void actor_schedule_head(actor_t *thread); extern void actor_schedule(actor_t *thread); -extern void actor_timer(actor_t *thread, uint32_t timeout, +extern void actor_timer(actor_t *thread, uint32_t delay_secs, void (*callback)(void *), void *data); -extern int actor_timer_mod(actor_t *thread, uint32_t new_timeout, void *data); +extern int actor_timer_mod(actor_t *thread, uint32_t new_delay_secs, void *data); extern void actor_poll(void); -extern void actor_init(void); #endif /* ACTOR_H */ diff --git a/usr/event_poll.c b/usr/event_poll.c index 939f1a2..a4ab3ea 100644 --- a/usr/event_poll.c +++ b/usr/event_poll.c @@ -26,6 +26,8 @@ #include <sys/poll.h> #include <sys/types.h> #include <sys/wait.h> +#include <sys/signalfd.h> +#include <unistd.h> #include "mgmt_ipc.h" #include "iscsi_ipc.h" @@ -116,12 +118,12 @@ static int shutdown_wait_pids(void) #define POLL_CTRL 0 #define POLL_IPC 1 -#define POLL_MAX 2 +#define POLL_ALARM 2 +#define POLL_MAX 3 static int event_loop_stop; static queue_task_t *shutdown_qtask; - void event_loop_exit(queue_task_t *qtask) { shutdown_qtask = qtask; @@ -132,11 +134,26 @@ void event_loop(struct iscsi_ipc *ipc, int control_fd, int mgmt_ipc_fd) { struct pollfd poll_array[POLL_MAX]; int res, has_shutdown_children = 0; + sigset_t sigset; + int sig_fd; + + /* Mask off SIGALRM so we can recv it via signalfd */ + sigemptyset(&sigset); + sigaddset(&sigset, SIGALRM); + sigprocmask(SIG_SETMASK, &sigset, NULL); + + sig_fd = signalfd(-1, &sigset, SFD_NONBLOCK); + if (sig_fd == -1) { + log_error("signalfd failed: %m\n"); + return; + } poll_array[POLL_CTRL].fd = control_fd; poll_array[POLL_CTRL].events = POLLIN; poll_array[POLL_IPC].fd = mgmt_ipc_fd; poll_array[POLL_IPC].events = POLLIN; + poll_array[POLL_ALARM].fd = sig_fd; + poll_array[POLL_ALARM].events = POLLIN; event_loop_stop = 0; while (1) { @@ -149,7 +166,11 @@ void event_loop(struct iscsi_ipc *ipc, int control_fd, int mgmt_ipc_fd) break; } - res = poll(poll_array, POLL_MAX, ACTOR_RESOLUTION); + /* Runs actors and may set alarm for future actors */ + actor_poll(); + + res = poll(poll_array, POLL_MAX, -1); + if (res > 0) { log_debug(6, "poll result %d", res); if (poll_array[POLL_CTRL].revents) @@ -157,6 +178,18 @@ void event_loop(struct iscsi_ipc *ipc, int control_fd, int mgmt_ipc_fd) if (poll_array[POLL_IPC].revents) mgmt_ipc_handle(mgmt_ipc_fd); + + if (poll_array[POLL_ALARM].revents) { + struct signalfd_siginfo si; + + if (read(sig_fd, &si, sizeof(si)) == -1) { + log_error("got sigfd read() error, errno (%d), " + "exiting", errno); + break; + } else { + log_debug(1, "Poll was woken by an alarm"); + } + } } else if (res < 0) { if (errno == EINTR) { log_debug(1, "event_loop interrupted"); @@ -167,16 +200,18 @@ void event_loop(struct iscsi_ipc *ipc, int control_fd, int mgmt_ipc_fd) } } - if (res >= 0) - actor_poll(); - reap_proc(); + /* * flush sysfs cache since kernel objs may * have changed as a result of handling op */ sysfs_cleanup(); } + if (shutdown_qtask) mgmt_ipc_write_rsp(shutdown_qtask, ISCSI_SUCCESS); + + close(sig_fd); + sigprocmask(SIG_UNBLOCK, &sigset, NULL); } diff --git a/usr/initiator.c b/usr/initiator.c index 1e55f3a..f54b708 100644 --- a/usr/initiator.c +++ b/usr/initiator.c @@ -527,7 +527,7 @@ queue_delayed_reopen(queue_task_t *qtask, int delay) * if it were login time out */ actor_delete(&conn->login_timer); - actor_timer(&conn->login_timer, delay * 1000, + actor_timer(&conn->login_timer, delay, iscsi_login_timedout, qtask); } @@ -563,7 +563,7 @@ static int iscsi_conn_connect(struct iscsi_conn *conn, queue_task_t *qtask) iscsi_sched_ev_context(ev_context, conn, 0, EV_CONN_POLL); log_debug(3, "Setting login timer %p timeout %d", &conn->login_timer, conn->login_timeout); - actor_timer(&conn->login_timer, conn->login_timeout * 1000, + actor_timer(&conn->login_timer, conn->login_timeout, iscsi_login_timedout, qtask); return 0; } @@ -605,7 +605,7 @@ static int iscsi_sched_uio_poll(queue_task_t *qtask) log_debug(3, "Setting login UIO poll timer %p timeout %d", &conn->login_timer, conn->login_timeout); - actor_timer(&conn->login_timer, conn->login_timeout * 1000, + actor_timer(&conn->login_timer, conn->login_timeout, iscsi_uio_poll_login_timedout, qtask); return -EAGAIN; } @@ -1012,7 +1012,7 @@ static void conn_send_nop_out(void *data) __send_nopout(conn); - actor_timer(&conn->nop_out_timer, conn->noop_out_timeout*1000, + actor_timer(&conn->nop_out_timer, conn->noop_out_timeout, conn_nop_out_timeout, conn); log_debug(3, "noop out timeout timer %p start, timeout %d\n", &conn->nop_out_timer, conn->noop_out_timeout); @@ -1115,7 +1115,7 @@ setup_full_feature_phase(iscsi_conn_t *conn) /* noop_out */ if (conn->userspace_nop && conn->noop_out_interval) { - actor_timer(&conn->nop_out_timer, conn->noop_out_interval*1000, + actor_timer(&conn->nop_out_timer, conn->noop_out_interval, conn_send_nop_out, conn); log_debug(3, "noop out timer %p start\n", &conn->nop_out_timer); @@ -1199,7 +1199,7 @@ static void iscsi_recv_nop_in(iscsi_conn_t *conn, struct iscsi_hdr *hdr) /* noop out rsp */ actor_delete(&conn->nop_out_timer); /* schedule a new ping */ - actor_timer(&conn->nop_out_timer, conn->noop_out_interval*1000, + actor_timer(&conn->nop_out_timer, conn->noop_out_interval, conn_send_nop_out, conn); } else /* noop in req */ if (!__send_nopin_rsp(conn, (struct iscsi_nopin*)hdr, @@ -2064,7 +2064,7 @@ static int queue_session_login_task_retry(struct login_task_retry_info *info, info->retry_count++; log_debug(4, "queue session setup attempt in %d secs, retries %d\n", 3, info->retry_count); - actor_timer(&info->retry_actor, 3000, session_login_task_retry, info); + actor_timer(&info->retry_actor, 3, session_login_task_retry, info); return 0; } -- 1.9.3 -- You received this message because you are subscribed to the Google Groups "open-iscsi" group. To unsubscribe from this group and stop receiving emails from it, send an email to open-iscsi+unsubscr...@googlegroups.com. To post to this group, send email to open-iscsi@googlegroups.com. Visit this group at http://groups.google.com/group/open-iscsi. For more options, visit https://groups.google.com/d/optout.