These 2 patches result in the library returning NO_RESOURCES when corosync is running out of file descriptors. I think it's better...
-Angus Signed-off-by: Angus Salkeld <asalk...@redhat.com> --- exec/coropoll.c | 69 +++++++++++++++++++++++++++++++++++++ exec/main.c | 19 ++++++++++ include/corosync/totem/coropoll.h | 7 ++++ 3 files changed, 95 insertions(+), 0 deletions(-) diff --git a/exec/coropoll.c b/exec/coropoll.c index 7910214..bad4607 100644 --- a/exec/coropoll.c +++ b/exec/coropoll.c @@ -42,6 +42,8 @@ #include <string.h> #include <stdio.h> #include <unistd.h> +#include <sys/time.h> +#include <sys/resource.h> #include <corosync/hdb.h> #include <corosync/totem/coropoll.h> @@ -63,6 +65,8 @@ struct poll_instance { struct timerlist timerlist; int stop_requested; int pipefds[2]; + poll_low_fds_event_fn low_fds_event_fn; + int32_t not_enough_fds; }; DECLARE_HDB_DATABASE (poll_instance_database,NULL); @@ -92,6 +96,7 @@ hdb_handle_t poll_create (void) poll_instance->ufds = 0; poll_instance->poll_entry_count = 0; poll_instance->stop_requested = 0; + poll_instance->not_enough_fds = 0; timerlist_init (&poll_instance->timerlist); res = pipe (poll_instance->pipefds); @@ -380,6 +385,69 @@ error_exit: return (res); } +int poll_low_fds_event_set( + hdb_handle_t handle, + poll_low_fds_event_fn fn) +{ + struct poll_instance *poll_instance; + + if (hdb_handle_get (&poll_instance_database, handle, + (void *)&poll_instance) != 0) { + return -ENOENT; + } + + poll_instance->low_fds_event_fn = fn; + + hdb_handle_put (&poll_instance_database, handle); + return 0; +} + +/* logs, std(in|out|err), pipe */ +#define POLL_FDS_USED_MISC 20 + +static void poll_fds_usage_check(struct poll_instance *poll_instance) +{ + struct rlimit lim; + static int32_t socks_limit = 0; + int32_t send_event = 0; + int32_t socks_used = 0; + int32_t socks_avail = 0; + int32_t i; + + if (socks_limit == 0) { + if (getrlimit(RLIMIT_NOFILE, &lim) == -1) { + char error_str[100]; + strerror_r(errno, error_str, 100); + printf("getrlimit: %s\n", error_str); + return; + } + socks_limit = lim.rlim_cur; + socks_limit -= POLL_FDS_USED_MISC; + } + + for (i = 0; i < poll_instance->poll_entry_count; i++) { + if (poll_instance->poll_entries[i].ufd.fd != -1) { + socks_used++; + } + } + socks_avail = socks_limit - socks_used; + send_event = 0; + if (poll_instance->not_enough_fds) { + if (socks_avail > 2) { + poll_instance->not_enough_fds = 0; + send_event = 1; + } + } else { + if (socks_avail <= 1) { + poll_instance->not_enough_fds = 1; + send_event = 1; + } + } + if (send_event) { + poll_instance->low_fds_event_fn(poll_instance->not_enough_fds, + socks_avail); + } +} int poll_run ( hdb_handle_t handle) @@ -403,6 +471,7 @@ rebuild_poll: &poll_instance->poll_entries[i].ufd, sizeof (struct pollfd)); } + poll_fds_usage_check(poll_instance); expire_timeout_msec = timerlist_msec_duration_to_expire (&poll_instance->timerlist); if (expire_timeout_msec != -1 && expire_timeout_msec > 0xFFFFFFFF) { diff --git a/exec/main.c b/exec/main.c index 4ddb5f2..d5306a3 100644 --- a/exec/main.c +++ b/exec/main.c @@ -139,6 +139,8 @@ static pthread_t corosync_exit_thread; static sem_t corosync_exit_sem; +static int32_t corosync_not_enough_fds_left = 0; + static void serialize_unlock (void); hdb_handle_t corosync_poll_handle_get (void) @@ -921,6 +923,11 @@ static coroipcs_handler_fn_lvalue corosync_handler_fn_get (unsigned int service, static int corosync_security_valid (int euid, int egid) { struct list_head *iter; + + if (corosync_not_enough_fds_left) { + return 0; + } + if (euid == 0 || egid == 0) { return (1); } @@ -1329,6 +1336,17 @@ static void corosync_stats_init (void) OBJDB_VALUETYPE_UINT64); } +static void main_low_fds_event(int32_t not_enough, size_t fds_available) +{ + corosync_not_enough_fds_left = not_enough; + if (not_enough) { + log_printf(LOGSYS_LEVEL_WARNING, "refusing new connections (fds_available:%zu)\n", + fds_available); + } else { + log_printf(LOGSYS_LEVEL_NOTICE, "allowing new connections (fds_available:%zu)\n", + fds_available); + } +} static void main_service_ready (void) { @@ -1614,6 +1632,7 @@ int main (int argc, char **argv, char **envp) sched_priority); corosync_poll_handle = poll_create (); + poll_low_fds_event_set(corosync_poll_handle, main_low_fds_event); /* * Sleep for a while to let other nodes in the cluster diff --git a/include/corosync/totem/coropoll.h b/include/corosync/totem/coropoll.h index 3c0bca7..1e9c703 100644 --- a/include/corosync/totem/coropoll.h +++ b/include/corosync/totem/coropoll.h @@ -36,11 +36,14 @@ #include <corosync/hdb.h> #include <pthread.h> +#include <stdlib.h> #ifdef __cplusplus extern "C" { #endif +typedef void (*poll_low_fds_event_fn) (int32_t not_enough, size_t fds_available); + typedef void * poll_timer_handle; hdb_handle_t poll_create (void); @@ -73,6 +76,10 @@ int poll_dispatch_delete ( hdb_handle_t handle, int fd); +int poll_low_fds_event_set( + hdb_handle_t handle, + poll_low_fds_event_fn fn); + int poll_timer_add ( hdb_handle_t handle, int msec_in_future, void *data, -- 1.7.1 _______________________________________________ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais