On Mon, 2012-06-18 at 13:32 -0400, Simo Sorce wrote: > On Mon, 2012-06-18 at 11:33 -0400, Stephen Gallagher wrote: > > On Mon, 2012-06-18 at 09:33 -0400, Stephen Gallagher wrote: > > > On Mon, 2012-06-18 at 06:30 -0700, Shantanu Goel wrote: > > > > Hi Stephen, > > > > > > > > > > > > Please feel free to modify the patch in any way or shape you deem > > > > necessary for inclusion. We are just glad that you agree there is a > > > > real problem which needs fixing. One thing I ask is if you expect to > > > > have rhel 5 or 6 test RPMs that we could test with the ultimate fix > > > > any time soon, please drop me a note and we will gladly install them > > > > on some of our problematic machines here to see if they address the > > > > problems we have seen. > > > > > > Sure, once this is done I'm going to be committing it upstream for the > > > master branch (future 1.9), the sssd-1-8 branch (our current LTM > > > release) and the sssd-1-5 branch (our previous LTM release). > > > > > > You should be able to pull the patches from the sssd-1-5 branch and > > > build them for your systems once they're ready. > > > > Ok, new patches attached. Shantanu, these are currently designed for the > > master branch. We'll get them committed there first and tested out for a > > little while, then we'll backport them. > > > > Patch 0001: Return the correct errno value. Previously it could have > > been reset by closing the socket. > > > > Patch 0002: Add some additional debugging to the client_destructor() > > > > Patch 0003: On systems that support MSG_NOSIGNAL, we should use it. This > > way, if a client app isn't configured to listen for SIGPIPE, it will not > > crash. > > > > Patch 0004: Add a timer to each client context. If sixty seconds pass > > (configurable in the patch 0005) without either read or write activity, > > we will free the client context and close the socket. The client code is > > already written to be tolerant of this and will reconnect on the next > > request. This will help us avoid resource exhaustion if we have clients > > that hang on to NSS and PAM file descriptors indefinitely (like 'su' and > > 'login' do for PAM). > > > > Patch 0005: Make the client idle timeout value configurable and add it > > to the manpages and config API. > > 0001 ack > 0002 ack > 0003 Please always use send with a default set of flags, make the ifdef > set the default set of flags (0 vs MSG_NOSIGNAL) > 0004 ack (not like much the TODO but I was told 4/5 got split for > reviewability, so ok) > 0005 ack
Thanks for the review. New patches attached.
From 488bdb948f35fd155a478fc8634ad1d77eb259d2 Mon Sep 17 00:00:00 2001 From: Shantanu Goel <sg...@trade4.test-jc.tower-research.com> Date: Mon, 18 Jun 2012 08:45:32 -0400 Subject: [PATCH 1/5] Set return errno to the value prior to calling close(). --- src/sss_client/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sss_client/common.c b/src/sss_client/common.c index 28adb4424dfc72dab79ca00fc905152a7cff3e8a..90713d5b25785a137f1d0f03689c8877ede12d28 100644 --- a/src/sss_client/common.c +++ b/src/sss_client/common.c @@ -154,7 +154,7 @@ static enum sss_status sss_cli_send_req(enum sss_cli_command cmd, /* Write failed */ sss_cli_close_socket(); - *errnop = errno; + *errnop = error; return SSS_STATUS_UNAVAIL; } @@ -264,7 +264,7 @@ static enum sss_status sss_cli_recv_rep(enum sss_cli_command cmd, * through. */ sss_cli_close_socket(); - *errnop = errno; + *errnop = error; ret = SSS_STATUS_UNAVAIL; goto failed; } -- 1.7.10.2
From 4bb4d571cb597d7b444ae75f517d3dc57306c398 Mon Sep 17 00:00:00 2001 From: Shantanu Goel <sg...@trade4.test-jc.tower-research.com> Date: Mon, 18 Jun 2012 08:49:10 -0400 Subject: [PATCH 2/5] Log message if close() fails in destructor. --- src/responder/common/responder_common.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/responder/common/responder_common.c b/src/responder/common/responder_common.c index 2e3e98a9cceec66cdac200d93274c6f3cea41f0c..5d5248809d095420e8a4d11ce80fc13e1949c873 100644 --- a/src/responder/common/responder_common.c +++ b/src/responder/common/responder_common.c @@ -87,7 +87,18 @@ static errno_t set_close_on_exec(int fd) static int client_destructor(struct cli_ctx *ctx) { - if (ctx->cfd > 0) close(ctx->cfd); + errno_t ret; + + if ((ctx->cfd > 0) && close(ctx->cfd) < 0) { + ret = errno; + DEBUG(SSSDBG_CRIT_FAILURE, + ("Failed to close fd [%d]: [%s]\n", + ctx->cfd, strerror(ret))); + } + + DEBUG(SSSDBG_TRACE_INTERNAL, + ("Terminated client [%p][%d]\n", + ctx, ctx->cfd)); return 0; } -- 1.7.10.2
From 6e4c3aa12bbe790216f1eff511ef1d18ddd8a2c9 Mon Sep 17 00:00:00 2001 From: Shantanu Goel <sg...@trade4.test-jc.tower-research.com> Date: Mon, 18 Jun 2012 09:26:45 -0400 Subject: [PATCH 3/5] Do not send SIGPIPE on disconnection Note we set MSG_NOSIGNAL to avoid having to fiddle with signal masks but also do not want to die in case SIGPIPE gets raised and the application does not handle it. --- src/sss_client/common.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/sss_client/common.c b/src/sss_client/common.c index 90713d5b25785a137f1d0f03689c8877ede12d28..9fbb5887c4fa1a426604675080995a4ac6c937f3 100644 --- a/src/sss_client/common.c +++ b/src/sss_client/common.c @@ -49,6 +49,19 @@ #include <pthread.h> #endif +/* +* Note we set MSG_NOSIGNAL to avoid +* having to fiddle with signal masks +* but also do not want to die in case +* SIGPIPE gets raised and the application +* does not handle it. +*/ +#ifdef MSG_NOSIGNAL +#define SSS_DEFAULT_WRITE_FLAGS MSG_NOSIGNAL +#else +#define SSS_DEFAULT_WRITE_FLAGS 0 +#endif + /* common functions */ int sss_cli_sd = -1; /* the sss client socket descriptor */ @@ -133,14 +146,16 @@ static enum sss_status sss_cli_send_req(enum sss_cli_command cmd, errno = 0; if (datasent < SSS_NSS_HEADER_SIZE) { - res = write(sss_cli_sd, - (char *)header + datasent, - SSS_NSS_HEADER_SIZE - datasent); + res = send(sss_cli_sd, + (char *)header + datasent, + SSS_NSS_HEADER_SIZE - datasent, + SSS_DEFAULT_WRITE_FLAGS); } else { rdsent = datasent - SSS_NSS_HEADER_SIZE; - res = write(sss_cli_sd, - (const char *)rd->data + rdsent, - rd->len - rdsent); + res = send(sss_cli_sd, + (const char *)rd->data + rdsent, + rd->len - rdsent, + SSS_DEFAULT_WRITE_FLAGS); } error = errno; -- 1.7.10.2
From 036da0953a10b77d4d0723c11e558cfe0f5c85b9 Mon Sep 17 00:00:00 2001 From: Shantanu Goel <sg...@trade4.test-jc.tower-research.com> Date: Mon, 18 Jun 2012 10:54:44 -0400 Subject: [PATCH 4/5] Add support for terminating idle connections --- src/responder/common/responder.h | 2 + src/responder/common/responder_common.c | 67 ++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/responder/common/responder.h b/src/responder/common/responder.h index 2cc85445c6d802ba2de275c143c8aeccd2eb5ece..67884ed7659d3d99ac8d693ad083b4f7e293bf71 100644 --- a/src/responder/common/responder.h +++ b/src/responder/common/responder.h @@ -126,6 +126,8 @@ struct cli_ctx { int netgrent_cur; char *automntmap_name; + + struct tevent_timer *idle; }; struct sss_cmd_table { diff --git a/src/responder/common/responder_common.c b/src/responder/common/responder_common.c index 5d5248809d095420e8a4d11ce80fc13e1949c873..3c269725256c88cfad65a0fc9f1b0d4ac4009797 100644 --- a/src/responder/common/responder_common.c +++ b/src/responder/common/responder_common.c @@ -219,12 +219,24 @@ static void client_recv(struct cli_ctx *cctx) return; } +static errno_t reset_idle_timer(struct cli_ctx *cctx); + static void client_fd_handler(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, void *ptr) { + errno_t ret; struct cli_ctx *cctx = talloc_get_type(ptr, struct cli_ctx); + /* Always reset the idle timer on any activity */ + ret = reset_idle_timer(cctx); + if (ret != EOK) { + DEBUG(SSSDBG_CRIT_FAILURE, + ("Could not create idle timer for client. " + "This connection may not auto-terminate\n")); + /* Non-fatal, continue */ + } + if (flags & TEVENT_FD_READ) { client_recv(cctx); return; @@ -240,6 +252,11 @@ struct accept_fd_ctx { bool is_private; }; +static void idle_handler(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *data); + static void accept_fd_handler(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, void *ptr) @@ -317,12 +334,58 @@ static void accept_fd_handler(struct tevent_context *ev, talloc_set_destructor(cctx, client_destructor); - DEBUG(4, ("Client connected%s!\n", - accept_ctx->is_private ? " to privileged pipe" : "")); + /* Set up the idle timer */ + ret = reset_idle_timer(cctx); + if (ret != EOK) { + DEBUG(SSSDBG_CRIT_FAILURE, + ("Could not create idle timer for client. " + "This connection may not auto-terminate\n")); + /* Non-fatal, continue */ + } + + DEBUG(SSSDBG_TRACE_FUNC, + ("Client connected%s!\n", + accept_ctx->is_private ? " to privileged pipe" : "")); return; } +static errno_t reset_idle_timer(struct cli_ctx *cctx) +{ + struct timeval tv; + + /* TODO: make this configurable */ + tv = tevent_timeval_current_ofs(60, 0); + + talloc_zfree(cctx->idle); + + cctx->idle = tevent_add_timer(cctx->ev, cctx, tv, idle_handler, cctx); + if (!cctx->idle) return ENOMEM; + + DEBUG(SSSDBG_TRACE_ALL, + ("Idle timer re-set for client [%p][%d]\n", + cctx, cctx->cfd)); + + return EOK; +} + +static void idle_handler(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *data) +{ + /* This connection is idle. Terminate it */ + struct cli_ctx *cctx = + talloc_get_type(data, struct cli_ctx); + + DEBUG(SSSDBG_TRACE_INTERNAL, + ("Terminating idle client [%p][%d]\n", + cctx, cctx->cfd)); + + /* The cli_ctx destructor will handle the rest */ + talloc_free(cctx); +} + static int sss_dp_init(struct resp_ctx *rctx, struct sbus_interface *intf, const char *cli_name, -- 1.7.10.2
From 32105397d4466266ee70809cb22889f18fbc87ab Mon Sep 17 00:00:00 2001 From: Stephen Gallagher <sgall...@redhat.com> Date: Mon, 18 Jun 2012 11:23:04 -0400 Subject: [PATCH 5/5] Make the client idle timeout configurable --- src/confdb/confdb.h | 2 ++ src/config/SSSDConfig/__init__.py.in | 1 + src/config/SSSDConfigTest.py | 3 ++- src/config/etc/sssd.api.conf | 1 + src/man/sssd.conf.5.xml | 15 +++++++++++++++ src/responder/common/responder.h | 1 + src/responder/common/responder_common.c | 22 ++++++++++++++++++---- 7 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/confdb/confdb.h b/src/confdb/confdb.h index db247b184d2dbecdd281d16d7b5bc9659bb1cfc3..3fa8b0377d701548e737ae413925358fda6b6884 100644 --- a/src/confdb/confdb.h +++ b/src/confdb/confdb.h @@ -72,6 +72,8 @@ /* Responders */ #define CONFDB_RESPONDER_GET_DOMAINS_TIMEOUT "get_domains_timeout" +#define CONFDB_RESPONDER_CLI_IDLE_TIMEOUT "client_idle_timeout" +#define CONFDB_RESPONDER_CLI_IDLE_DEFAULT_TIMEOUT 60 /* NSS */ #define CONFDB_NSS_CONF_ENTRY "config/nss" diff --git a/src/config/SSSDConfig/__init__.py.in b/src/config/SSSDConfig/__init__.py.in index d7895b49af9e6c9a3b48bb282679c61d6ebbfa6a..b90f505d110bf293e8b65964a38c7d620e779a11 100644 --- a/src/config/SSSDConfig/__init__.py.in +++ b/src/config/SSSDConfig/__init__.py.in @@ -45,6 +45,7 @@ option_strings = { 'command' : _('Command to start service'), 'reconnection_retries' : _('Number of times to attempt connection to Data Providers'), 'fd_limit' : _('The number of file descriptors that may be opened by this responder'), + 'client_idle_timeout' : _('Idle time before automatic disconnection of a client'), # [sssd] 'services' : _('SSSD Services to start'), diff --git a/src/config/SSSDConfigTest.py b/src/config/SSSDConfigTest.py index 1e1fe98e6850f7539579db00fc0bd96d2a865c9b..ef696e98c0b1ccce3dccda8ead7a3bda10049cf8 100755 --- a/src/config/SSSDConfigTest.py +++ b/src/config/SSSDConfigTest.py @@ -274,7 +274,8 @@ class SSSDConfigTestSSSDService(unittest.TestCase): 'debug_to_files', 'command', 'reconnection_retries', - 'fd_limit'] + 'fd_limit', + 'client_idle_timeout'] self.assertTrue(type(options) == dict, "Options should be a dictionary") diff --git a/src/config/etc/sssd.api.conf b/src/config/etc/sssd.api.conf index e09a8bf033f38144cd9824e2592728c2898e3db2..c3d6fa8119213c874c5f9f1d5f77746a0e1fc88f 100644 --- a/src/config/etc/sssd.api.conf +++ b/src/config/etc/sssd.api.conf @@ -10,6 +10,7 @@ debug_to_files = bool, None, false command = str, None, false reconnection_retries = int, None, false fd_limit = int, None, false +client_idle_timeout = int, None, false [sssd] # Monitor service diff --git a/src/man/sssd.conf.5.xml b/src/man/sssd.conf.5.xml index 1dae3ccbcbef6ac73c2bfae82a1ab04251e84259..bdf2543b7900aed7b4e75999a4088f91985fe69b 100644 --- a/src/man/sssd.conf.5.xml +++ b/src/man/sssd.conf.5.xml @@ -303,6 +303,21 @@ </para> </listitem> </varlistentry> + <varlistentry> + <term>client_idle_timeout</term> + <listitem> + <para> + This option specifies the number of seconds that + a client of an SSSD process can hold onto a file + descriptor without communicating on it. This value + is limited in order to avoid resource exhasution + on the system. + </para> + <para> + Default: 60 + </para> + </listitem> + </varlistentry> </variablelist> </refsect2> diff --git a/src/responder/common/responder.h b/src/responder/common/responder.h index 67884ed7659d3d99ac8d693ad083b4f7e293bf71..43a4fa0236c313c3cdeb2a0b181ffd4c96167bb7 100644 --- a/src/responder/common/responder.h +++ b/src/responder/common/responder.h @@ -87,6 +87,7 @@ struct resp_ctx { struct sss_domain_info *domains; int domains_timeout; + int client_idle_timeout; struct sysdb_ctx_list *db_list; struct sss_cmd_table *sss_cmds; diff --git a/src/responder/common/responder_common.c b/src/responder/common/responder_common.c index 3c269725256c88cfad65a0fc9f1b0d4ac4009797..242fae996634469cb6a4e29c137613fb4bf19a03 100644 --- a/src/responder/common/responder_common.c +++ b/src/responder/common/responder_common.c @@ -352,10 +352,8 @@ static void accept_fd_handler(struct tevent_context *ev, static errno_t reset_idle_timer(struct cli_ctx *cctx) { - struct timeval tv; - - /* TODO: make this configurable */ - tv = tevent_timeval_current_ofs(60, 0); + struct timeval tv = + tevent_timeval_current_ofs(cctx->rctx->client_idle_timeout, 0); talloc_zfree(cctx->idle); @@ -620,6 +618,22 @@ int sss_process_init(TALLOC_CTX *mem_ctx, rctx->confdb_service_path = confdb_service_path; ret = confdb_get_int(rctx->cdb, rctx->confdb_service_path, + CONFDB_RESPONDER_CLI_IDLE_TIMEOUT, + CONFDB_RESPONDER_CLI_IDLE_DEFAULT_TIMEOUT, + &rctx->client_idle_timeout); + if (ret != EOK) { + DEBUG(SSSDBG_OP_FAILURE, + ("Cannot get the client idle timeout [%d]: %s\n", + ret, strerror(ret))); + return ret; + } + + /* Ensure that the client timeout is at least ten seconds */ + if (rctx->client_idle_timeout < 10) { + rctx->client_idle_timeout = 10; + } + + ret = confdb_get_int(rctx->cdb, rctx->confdb_service_path, CONFDB_RESPONDER_GET_DOMAINS_TIMEOUT, GET_DOMAINS_DEFAULT_TIMEOUT, &rctx->domains_timeout); if (ret != EOK) { -- 1.7.10.2
signature.asc
Description: This is a digitally signed message part
_______________________________________________ sssd-devel mailing list sssd-devel@lists.fedorahosted.org https://fedorahosted.org/mailman/listinfo/sssd-devel