Module Name: src Committed By: tls Date: Sun Aug 10 07:00:38 UTC 2014
Modified Files: src/usr.sbin/ypbind [tls-earlyentropy]: ypbind.8 ypbind.c Log Message: Rebase. To generate a diff of this commit: cvs rdiff -u -r1.18 -r1.18.38.1 src/usr.sbin/ypbind/ypbind.8 cvs rdiff -u -r1.90 -r1.90.18.1 src/usr.sbin/ypbind/ypbind.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.sbin/ypbind/ypbind.8 diff -u src/usr.sbin/ypbind/ypbind.8:1.18 src/usr.sbin/ypbind/ypbind.8:1.18.38.1 --- src/usr.sbin/ypbind/ypbind.8:1.18 Wed Apr 30 13:11:03 2008 +++ src/usr.sbin/ypbind/ypbind.8 Sun Aug 10 07:00:38 2014 @@ -1,4 +1,4 @@ -.\" $NetBSD: ypbind.8,v 1.18 2008/04/30 13:11:03 martin Exp $ +.\" $NetBSD: ypbind.8,v 1.18.38.1 2014/08/10 07:00:38 tls Exp $ .\" .\" Copyright (c) 1996 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -27,7 +27,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd February 26, 2005 +.Dd June 14, 2014 .Dt YPBIND 8 .Os .Sh NAME @@ -94,9 +94,9 @@ it is bound. If the binding is somehow lost, e.g by server reboot, .Nm marks the domain as unbound and attempts to re-establish the binding. -When the binding is once again successful, +If a binding cannot be re-established within 60 seconds, .Nm -marks the domain as bound and resumes its periodic check. +backs off exponentially to trying only once per hour. .Pp The options are as follows: .Bl -tag -width "-broadcast" @@ -114,7 +114,7 @@ or servers. .It Fl ypset .Xr ypset 8 -may be used to change the server to which a domain is bound. +may be used from anywhere to change the server to which a domain is bound. .It Fl ypsetme .Xr ypset 8 may be used only from this machine to change the server @@ -122,11 +122,22 @@ to which a domain is bound. .El .Pp The -.Fl broadcast +.Fl broadcast , .Fl ypset , and -.Fl ypsetme , +.Fl ypsetme options are inherently insecure and should be avoided. +.Sh SIGNALS +.Nm +responds to the following signals: +.Bl -tag -width TERM -compact +.It Dv HUP +causes +.Nm +to immediately retry any unbound domains that are currently in +exponential backoff. +Use this to resume immediately after a long network outage is +resolved. .Sh FILES .Pa /var/yp/binding/\*[Lt]domain\*[Gt].version - binding file for \*[Lt]domain\*[Gt]. @@ -147,7 +158,10 @@ facility. .Xr yppoll 8 , .Xr ypset 8 .Sh AUTHORS +.An -nosplit This version of .Nm -was originally implemented by Theo de Raadt. -The ypservers support was implemented by Luke Mewburn. +was originally implemented by +.An Theo de Raadt . +The ypservers support was implemented by +.An Luke Mewburn . Index: src/usr.sbin/ypbind/ypbind.c diff -u src/usr.sbin/ypbind/ypbind.c:1.90 src/usr.sbin/ypbind/ypbind.c:1.90.18.1 --- src/usr.sbin/ypbind/ypbind.c:1.90 Tue Aug 30 17:06:22 2011 +++ src/usr.sbin/ypbind/ypbind.c Sun Aug 10 07:00:38 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: ypbind.c,v 1.90 2011/08/30 17:06:22 plunky Exp $ */ +/* $NetBSD: ypbind.c,v 1.90.18.1 2014/08/10 07:00:38 tls Exp $ */ /* * Copyright (c) 1992, 1993 Theo de Raadt <dera...@fsa.ca> @@ -28,7 +28,7 @@ #include <sys/cdefs.h> #ifndef LINT -__RCSID("$NetBSD: ypbind.c,v 1.90 2011/08/30 17:06:22 plunky Exp $"); +__RCSID("$NetBSD: ypbind.c,v 1.90.18.1 2014/08/10 07:00:38 tls Exp $"); #endif #include <sys/types.h> @@ -50,6 +50,7 @@ __RCSID("$NetBSD: ypbind.c,v 1.90 2011/0 #include <ifaddrs.h> #include <limits.h> #include <netdb.h> +#include <signal.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -84,16 +85,26 @@ typedef enum { YPBIND_DIRECT, YPBIND_BROADCAST, } ypbind_mode_t; +enum domainstates { + DOM_NEW, /* not yet bound */ + DOM_ALIVE, /* bound and healthy */ + DOM_PINGING, /* ping outstanding */ + DOM_LOST, /* binding timed out, looking for a new one */ + DOM_DEAD, /* long-term lost, in exponential backoff */ +}; + struct domain { struct domain *dom_next; char dom_name[YPMAXDOMAIN + 1]; struct sockaddr_in dom_server_addr; long dom_vers; - time_t dom_checktime; - time_t dom_asktime; + time_t dom_checktime; /* time of next check/contact */ + time_t dom_asktime; /* time we were last DOMAIN'd */ + time_t dom_losttime; /* time the binding was lost, or 0 */ + unsigned dom_backofftime; /* current backoff period, when DEAD */ int dom_lockfd; - int dom_alive; + enum domainstates dom_state; uint32_t dom_xid; FILE *dom_serversfile; /* /var/yp/binding/foo.ypservers */ int dom_been_ypset; /* ypset been done on this domain? */ @@ -102,26 +113,36 @@ struct domain { #define BUFSIZE 1400 -static char *domainname; - +/* the list of all domains */ static struct domain *domains; static int check; +/* option settings */ static ypbind_mode_t default_ypbindmode; - static int allow_local_ypset = 0, allow_any_ypset = 0; static int insecure; +/* the sockets we use to interact with servers */ static int rpcsock, pingsock; + +/* stuff used for manually interacting with servers */ static struct rmtcallargs rmtca; static struct rmtcallres rmtcr; static bool_t rmtcr_outval; static unsigned long rmtcr_port; + +/* The ypbind service transports */ static SVCXPRT *udptransp, *tcptransp; +/* set if we get SIGHUP */ +static sig_atomic_t hupped; + //////////////////////////////////////////////////////////// // utilities +/* + * Combo of open() and flock(). + */ static int open_locked(const char *path, int flags, mode_t mode) { @@ -138,6 +159,39 @@ open_locked(const char *path, int flags, return fd; } +/* + * Exponential backoff for pinging servers for a dead domain. + * + * We go 10 -> 20 -> 40 -> 60 seconds, then 2 -> 4 -> 8 -> 15 -> 30 -> + * 60 minutes, and stay at 60 minutes. This is overengineered. + * + * With a 60 minute max backoff the response time for when things come + * back is not awful, but we only try (and log) about 60 times even if + * things are down for a whole long weekend. This is an acceptable log + * load, I think. + */ +static void +backoff(unsigned *psecs) +{ + unsigned secs; + + secs = *psecs; + if (secs < 60) { + secs *= 2; + if (secs > 60) { + secs = 60; + } + } else if (secs < 60 * 15) { + secs *= 2; + if (secs > 60 * 15) { + secs = 60 * 15; + } + } else if (secs < 60 * 60) { + secs *= 2; + } + *psecs = secs; +} + //////////////////////////////////////////////////////////// // logging @@ -150,6 +204,9 @@ static int debug; static void yp_log(int, const char *, ...) __printflike(2, 3); +/* + * Log some stuff, to syslog or stderr depending on the debug setting. + */ static void yp_log(int pri, const char *fmt, ...) { @@ -187,6 +244,34 @@ ypservers_filename(const char *domain) //////////////////////////////////////////////////////////// // struct domain +/* + * The state transitions of a domain work as follows: + * + * in state NEW: + * nag_servers every 5 seconds + * upon answer, state is ALIVE + * + * in state ALIVE: + * every 60 seconds, send ping and switch to state PINGING + * + * in state PINGING: + * upon answer, go to state ALIVE + * if no answer in 5 seconds, go to state LOST and do nag_servers + * + * in state LOST: + * do nag_servers every 5 seconds + * upon answer, go to state ALIVE + * if no answer in 60 seconds, go to state DEAD + * + * in state DEAD + * do nag_servers every backofftime seconds (starts at 10) + * upon answer go to state ALIVE + * backofftime doubles (approximately) each try, with a cap of 1 hour + */ + +/* + * Look up a domain by the XID we assigned it. + */ static struct domain * domain_find(uint32_t xid) { @@ -198,6 +283,11 @@ domain_find(uint32_t xid) return dom; } +/* + * Pick an XID for a domain. + * + * XXX: this should just generate a random number. + */ static uint32_t unique_xid(struct domain *dom) { @@ -210,6 +300,10 @@ unique_xid(struct domain *dom) return tmp_xid; } +/* + * Construct a new domain. Adds it to the global linked list of all + * domains. + */ static struct domain * domain_create(const char *name) { @@ -230,8 +324,10 @@ domain_create(const char *name) dom->dom_vers = YPVERS; dom->dom_checktime = 0; dom->dom_asktime = 0; + dom->dom_losttime = 0; + dom->dom_backofftime = 10; dom->dom_lockfd = -1; - dom->dom_alive = 0; + dom->dom_state = DOM_NEW; dom->dom_xid = unique_xid(dom); dom->dom_been_ypset = 0; dom->dom_serversfile = NULL; @@ -265,6 +361,10 @@ domain_create(const char *name) //////////////////////////////////////////////////////////// // locks +/* + * Open a new binding file. Does not write the contents out; the + * caller (there's only one) does that. + */ static int makelock(struct domain *dom) { @@ -286,6 +386,9 @@ makelock(struct domain *dom) return fd; } +/* + * Remove a binding file. + */ static void removelock(struct domain *dom) { @@ -297,12 +400,14 @@ removelock(struct domain *dom) } /* - * purge_bindingdir: remove old binding files (i.e. "rm BINDINGDIR\/\*.[0-9]") + * purge_bindingdir: remove old binding files (i.e. "rm *.[0-9]" in BINDINGDIR) + * + * The local YP functions [e.g. yp_master()] will fail without even + * talking to ypbind if there is a stale (non-flock'd) binding file + * present. * - * local YP functions [e.g. yp_master()] will fail without even talking - * to ypbind if there is a stale (non-flock'd) binding file present. - * we have to scan the entire BINDINGDIR for binding files, because - * ypbind may bind more than just the yp_get_default_domain() domain. + * We have to remove all binding files in BINDINGDIR, not just the one + * for the default domain. */ static int purge_bindingdir(const char *dirpath) @@ -344,7 +449,39 @@ purge_bindingdir(const char *dirpath) // sunrpc twaddle /* - * LOOPBACK IS MORE IMPORTANT: PUT IN HACK + * Check if the info coming in is (at least somewhat) valid. + */ +static int +rpc_is_valid_response(char *name, struct sockaddr_in *addr) +{ + if (name == NULL) { + return 0; + } + + if (_yp_invalid_domain(name)) { + return 0; + } + + /* don't support insecure servers by default */ + if (!insecure && ntohs(addr->sin_port) >= IPPORT_RESERVED) { + return 0; + } + + return 1; +} + +/* + * Take note of the fact that we've received a reply from a ypserver. + * Or, in the case of being ypset, that we've been ypset, which + * functions much the same. + * + * Note that FORCE is set if and only if IS_YPSET is set. + * + * This function has also for the past 20+ years carried the annotation + * + * LOOPBACK IS MORE IMPORTANT: PUT IN HACK + * + * whose meaning isn't entirely clear. */ static void rpc_received(char *dom_name, struct sockaddr_in *raddrp, int force, @@ -359,46 +496,152 @@ rpc_received(char *dom_name, struct sock DPRINTF("returned from %s about %s\n", inet_ntoa(raddrp->sin_addr), dom_name); - if (dom_name == NULL) - return; - - if (_yp_invalid_domain(dom_name)) - return; - - /* don't support insecure servers by default */ - if (!insecure && ntohs(raddrp->sin_port) >= IPPORT_RESERVED) + /* validate some stuff */ + if (!rpc_is_valid_response(dom_name, raddrp)) { return; + } + /* look for the domain */ for (dom = domains; dom != NULL; dom = dom->dom_next) if (!strcmp(dom->dom_name, dom_name)) break; + /* if not found, create it, but only if FORCE; otherwise ignore */ if (dom == NULL) { if (force == 0) return; dom = domain_create(dom_name); } + /* the domain needs to know if it's been explicitly ypset */ if (is_ypset) { dom->dom_been_ypset = 1; } - /* soft update, alive */ - if (dom->dom_alive == 1 && force == 0) { + /* + * If the domain is alive and we aren't being called by ypset, + * we shouldn't be getting a response at all. Log it, as it + * might be hostile. + */ + if (dom->dom_state == DOM_ALIVE && force == 0) { + if (!memcmp(&dom->dom_server_addr, raddrp, + sizeof(dom->dom_server_addr))) { + yp_log(LOG_WARNING, + "Unexpected reply from server %s for domain %s", + inet_ntoa(dom->dom_server_addr.sin_addr), + dom->dom_name); + } else { + yp_log(LOG_WARNING, + "Falsified reply from %s for domain %s", + inet_ntoa(dom->dom_server_addr.sin_addr), + dom->dom_name); + } + return; + } + + /* + * If we're expected a ping response, and we've got it + * (meaning we aren't being called by ypset), we don't need to + * do anything. + */ + if (dom->dom_state == DOM_PINGING && force == 0) { + /* + * If the reply came from the server we expect, set + * dom_state back to ALIVE and ping again in 60 + * seconds. + * + * If it came from somewhere else, log it. + */ if (!memcmp(&dom->dom_server_addr, raddrp, sizeof(dom->dom_server_addr))) { - dom->dom_alive = 1; + dom->dom_state = DOM_ALIVE; /* recheck binding in 60 sec */ dom->dom_checktime = time(NULL) + 60; + } else { + yp_log(LOG_WARNING, + "Falsified reply from %s for domain %s", + inet_ntoa(dom->dom_server_addr.sin_addr), + dom->dom_name); } return; } - + +#ifdef HEURISTIC + /* + * If transitioning to the alive state from a non-alive state, + * clear dom_asktime. This will help prevent any requests that + * are still coming in from triggering unnecessary pings via + * the HEURISTIC code. + * + * XXX: this may not be an adequate measure; we may need to + * keep more state so we can disable the HEURISTIC code for + * the first few seconds after rebinding. + */ + if (dom->dom_state == DOM_NEW || + dom->dom_state == DOM_LOST || + dom->dom_state == DOM_DEAD) { + dom->dom_asktime = 0; + } +#endif + + /* + * Take the address we got the message from (or in the case of + * ypset, the explicit address we were given) as the server + * address for this domain, mark the domain alive, and we'll + * check it again in 60 seconds. + * + * XXX: it looks like if we get a random unsolicited reply + * from somewhere, we'll silently switch to that server + * address, regardless of merit. + * + * 1. If we have a foo.ypservers file the address should be + * checked against it and rejected if it's not one of the + * addresses of one of the listed hostnames. Note that it + * might not be the same address we sent to; even fairly smart + * UDP daemons don't always handle multihomed hosts correctly + * and we can't expect sunrpc code to do anything intelligent + * at all. + * + * 2. If we're in broadcast mode the address should be + * checked against the local addresses and netmasks so we + * don't accept responses from Mars. + * + * 2a. If we're in broadcast mode and we've been ypset, we + * should not accept anything else until we drop the ypset + * state for not responding. + * + * 3. Either way we should not accept a response from an + * arbitrary host unless we don't currently have a binding. + * (This is now fixed above.) + * + * Note that for a random unsolicited reply to work it has to + * carry the XID of one of the domains we know about; but + * those values are predictable. + */ (void)memcpy(&dom->dom_server_addr, raddrp, sizeof(dom->dom_server_addr)); /* recheck binding in 60 seconds */ dom->dom_checktime = time(NULL) + 60; - dom->dom_alive = 1; + dom->dom_state = DOM_ALIVE; + + /* Clear the dead/backoff state. */ + dom->dom_losttime = 0; + dom->dom_backofftime = 10; + + if (is_ypset == 0) { + yp_log(LOG_NOTICE, "Domain %s is alive; server %s", + dom->dom_name, + inet_ntoa(dom->dom_server_addr.sin_addr)); + } + + /* + * Generate a new binding file. If this fails, forget about it. + * (But we keep the binding and we'll report it to anyone who + * asks via the ypbind service.) XXX: this will interact badly, + * maybe very badly, with the code in HEURISTIC. + * + * Note that makelock() doesn't log on failure. + */ if (dom->dom_lockfd != -1) (void)close(dom->dom_lockfd); @@ -406,10 +649,6 @@ rpc_received(char *dom_name, struct sock if ((fd = makelock(dom)) == -1) return; - /* - * ok, if BINDINGDIR exists, and we can create the binding file, - * then write to it.. - */ dom->dom_lockfd = fd; iov[0].iov_base = &(udptransp->xp_port); @@ -436,6 +675,10 @@ rpc_received(char *dom_name, struct sock } } +/* + * The NULL call: do nothing. This is obliged to exist because of + * sunrpc silliness. + */ static void * /*ARGSUSED*/ ypbindproc_null_2(SVCXPRT *transp, void *argp) @@ -447,6 +690,9 @@ ypbindproc_null_2(SVCXPRT *transp, void return (void *)&res; } +/* + * The DOMAIN call: look up the ypserver for a specified domain. + */ static void * /*ARGSUSED*/ ypbindproc_domain_2(SVCXPRT *transp, void *argp) @@ -458,12 +704,23 @@ ypbindproc_domain_2(SVCXPRT *transp, voi int count; DPRINTF("ypbindproc_domain_2 %s\n", arg); + + /* Reject invalid domains. */ if (_yp_invalid_domain(arg)) return NULL; (void)memset(&res, 0, sizeof res); res.ypbind_status = YPBIND_FAIL_VAL; + /* + * Look for the domain. XXX: Behave erratically if we have + * more than 100 domains. The intent here is to avoid allowing + * arbitrary incoming requests to create more than 100 + * domains; but this logic means that if we legitimately have + * more than 100 (e.g. via ypset) we'll only actually bind the + * first 100 and the rest will fail. The test on 'count' should + * be moved further down. + */ for (count = 0, dom = domains; dom != NULL; dom = dom->dom_next, count++) { @@ -473,6 +730,16 @@ ypbindproc_domain_2(SVCXPRT *transp, voi break; } + /* + * If the domain doesn't exist, create it, then fail the call + * because we have no information yet. + * + * Set "check" so that checkwork() will run and look for a + * server. + * + * XXX: like during startup there's a spurious call to + * removelock() after domain_create(). + */ if (dom == NULL) { dom = domain_create(arg); removelock(dom); @@ -481,14 +748,30 @@ ypbindproc_domain_2(SVCXPRT *transp, voi return NULL; } - if (dom->dom_alive == 0) { - DPRINTF("dead domain %s\n", arg); + if (dom->dom_state == DOM_NEW) { + DPRINTF("new domain %s\n", arg); return NULL; } #ifdef HEURISTIC + /* + * Keep track of the last time we were explicitly asked about + * this domain. If it happens a lot, force a ping. This works + * (or "works") because we only get asked specifically when + * things aren't going; otherwise the client code in libc and + * elsewhere uses the binding file. + * + * Note: HEURISTIC is enabled by default. + * + * dholland 20140609: I think this is part of the mechanism + * that causes ypbind to spam. I'm changing this logic so it + * only triggers when the state is DOM_ALIVE: if the domain + * is new, lost, or dead we shouldn't send more requests than + * the ones already scheduled, and if we're already in the + * middle of pinging there's no point doing it again. + */ (void)time(&now); - if (now < dom->dom_asktime + 5) { + if (dom->dom_state == DOM_ALIVE && now < dom->dom_asktime + 5) { /* * Hmm. More than 2 requests in 5 seconds have indicated * that my binding is possibly incorrect. @@ -514,6 +797,19 @@ ypbindproc_domain_2(SVCXPRT *transp, voi return &res; } +/* + * The SETDOM call: ypset. + * + * Unless -ypsetme was given on the command line, this is rejected; + * even then it's only allowed from localhost unless -ypset was + * given on the command line. + * + * Allowing anyone anywhere to ypset you (and therefore provide your + * password file and such) is a horrible thing and it isn't clear to + * me why this functionality even exists. + * + * ypset from localhost has some but limited utility. + */ static void * ypbindproc_setdom_2(SVCXPRT *transp, void *argp) { @@ -525,6 +821,10 @@ ypbindproc_setdom_2(SVCXPRT *transp, voi fromsin = svc_getcaller(transp); DPRINTF("ypbindproc_setdom_2 from %s\n", inet_ntoa(fromsin->sin_addr)); + /* + * Reject unless enabled. + */ + if (allow_any_ypset) { /* nothing */ } else if (allow_local_ypset) { @@ -538,16 +838,27 @@ ypbindproc_setdom_2(SVCXPRT *transp, voi return NULL; } + /* Make a "security" check. */ if (ntohs(fromsin->sin_port) >= IPPORT_RESERVED) { DPRINTF("ypset from unprivileged port denied\n"); return &res; } + /* Ignore requests we don't understand. */ if (sd->ypsetdom_vers != YPVERS) { DPRINTF("ypset with wrong version denied\n"); return &res; } + /* + * Fetch the arguments out of the xdr-decoded blob and call + * rpc_received(), setting FORCE so that the domain will be + * created if we don't already know about it, and also saying + * that it's actually a ypset. + * + * Effectively we're telilng rpc_received() that we got an + * RPC response from the server specified by ypset. + */ (void)memset(&bindsin, 0, sizeof bindsin); bindsin.sin_family = AF_INET; bindsin.sin_len = sizeof(bindsin); @@ -561,6 +872,13 @@ ypbindproc_setdom_2(SVCXPRT *transp, voi return &res; } +/* + * Dispatcher for the ypbind service. + * + * There are three calls: NULL, which does nothing, DOMAIN, which + * gets the binding for a particular domain, and SETDOM, which + * does ypset. + */ static void ypbindprog_2(struct svc_req *rqstp, register SVCXPRT *transp) { @@ -622,6 +940,12 @@ ypbindprog_2(struct svc_req *rqstp, regi return; } +/* + * Set up sunrpc stuff. + * + * This sets up the ypbind service (both TCP and UDP) and also opens + * the sockets we use for talking to ypservers. + */ static void sunrpc_setup(void) { @@ -670,6 +994,10 @@ sunrpc_setup(void) //////////////////////////////////////////////////////////// // operational logic +/* + * Broadcast an RPC packet to hopefully contact some servers for a + * domain. + */ static int broadcast(char *buf, int outlen) { @@ -720,6 +1048,13 @@ broadcast(char *buf, int outlen) return (0); } +/* + * Send an RPC packet to all the configured (in /var/yp/foo.ypservers) + * servers for a domain. + * + * XXX: we should read and parse the file up front and reread it only + * if it changes. + */ static int direct(char *buf, int outlen, struct domain *dom) { @@ -801,6 +1136,11 @@ direct(char *buf, int outlen, struct dom return 0; } +/* + * Send an RPC packet to the server that's been selected with ypset. + * (This is only used when in broadcast mode and when ypset is + * allowed.) + */ static int direct_set(char *buf, int outlen, struct domain *dom) { @@ -857,6 +1197,9 @@ direct_set(char *buf, int outlen, struct return 0; } +/* + * Receive and dispatch packets on the general RPC socket. + */ static enum clnt_stat handle_replies(void) { @@ -911,6 +1254,9 @@ try_again: return RPC_SUCCESS; } +/* + * Receive and dispatch packets on the ping socket. + */ static enum clnt_stat handle_ping(void) { @@ -965,6 +1311,14 @@ try_again: return RPC_SUCCESS; } +/* + * Contact all known servers for a domain in the hopes that one of + * them's awake. Also, if we previously had a binding but it timed + * out, try the portmapper on that host in case ypserv moved ports for + * some reason. + * + * As a side effect, wipe out any existing binding file. + */ static int nag_servers(struct domain *dom) { @@ -1023,7 +1377,7 @@ nag_servers(struct domain *dom) removelock(dom); } - if (dom->dom_alive == 2) { + if (dom->dom_state == DOM_PINGING || dom->dom_state == DOM_LOST) { /* * This resolves the following situation: * ypserver on other subnet was once bound, @@ -1058,6 +1412,9 @@ nag_servers(struct domain *dom) return -1; } +/* + * Send a ping message to a domain's current ypserver. + */ static int ping(struct domain *dom) { @@ -1107,7 +1464,6 @@ ping(struct domain *dom) } AUTH_DESTROY(rpcua); - dom->dom_alive = 2; DPRINTF("ping %x\n", dom->dom_server_addr.sin_addr.s_addr); if (sendto(pingsock, buf, outlen, 0, @@ -1119,14 +1475,20 @@ ping(struct domain *dom) } /* - * State transition is done like this: + * Scan for timer-based work to do. + * + * If the domain is currently alive, ping the server we're currently + * bound to. Otherwise, try all known servers and/or broadcast for a + * server via nag_servers. + * + * Try again in five seconds. * - * STATE EVENT ACTION NEWSTATE TIMEOUT - * no binding timeout broadcast no binding 5 sec - * no binding answer -- binding 60 sec - * binding timeout ping server checking 5 sec - * checking timeout ping server + broadcast checking 5 sec - * checking answer -- binding 60 sec + * If we get back here and the state is still DOM_PINGING, it means + * we didn't receive a ping response within five seconds. Declare the + * binding lost. If the binding is already lost, and it's been lost + * for 60 seconds, switch to DOM_DEAD and begin exponential backoff. + * The exponential backoff starts at 10 seconds and tops out at one + * hour; see above. */ static void checkwork(void) @@ -1138,20 +1500,106 @@ checkwork(void) (void)time(&t); for (dom = domains; dom != NULL; dom = dom->dom_next) { - if (dom->dom_checktime < t) { - if (dom->dom_alive == 1) - (void)ping(dom); - else - (void)nag_servers(dom); - (void)time(&t); + if (dom->dom_checktime >= t) { + continue; + } + switch (dom->dom_state) { + case DOM_NEW: + /* XXX should be a timeout for this state */ + dom->dom_checktime = t + 5; + (void)nag_servers(dom); + break; + + case DOM_ALIVE: + dom->dom_state = DOM_PINGING; + dom->dom_checktime = t + 5; + (void)ping(dom); + break; + + case DOM_PINGING: + dom->dom_state = DOM_LOST; + dom->dom_losttime = t; dom->dom_checktime = t + 5; + yp_log(LOG_NOTICE, "Domain %s lost its binding to " + "server %s", dom->dom_name, + inet_ntoa(dom->dom_server_addr.sin_addr)); + (void)nag_servers(dom); + break; + + case DOM_LOST: + if (t > dom->dom_losttime + 60) { + dom->dom_state = DOM_DEAD; + dom->dom_backofftime = 10; + yp_log(LOG_NOTICE, "Domain %s dead; " + "going to exponential backoff", + dom->dom_name); + } + dom->dom_checktime = t + 5; + (void)nag_servers(dom); + break; + + case DOM_DEAD: + dom->dom_checktime = t + dom->dom_backofftime; + backoff(&dom->dom_backofftime); + (void)nag_servers(dom); + break; } + /* re-fetch the time in case we hung sending packets */ + (void)time(&t); + } +} + +/* + * Process a hangup signal. + * + * Do an extra nag_servers() for any domains that are DEAD. This way + * if you know things are back up you can restore service by sending + * ypbind a SIGHUP rather than waiting for the timeout period. + */ +static void +dohup(void) +{ + struct domain *dom; + + hupped = 0; + for (dom = domains; dom != NULL; dom = dom->dom_next) { + if (dom->dom_state == DOM_DEAD) { + (void)nag_servers(dom); + } + } +} + +/* + * Receive a hangup signal. + */ +static void +hup(int __unused sig) +{ + hupped = 1; +} + +/* + * Initialize hangup processing. + */ +static void +starthup(void) +{ + struct sigaction sa; + + sa.sa_handler = hup; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + if (sigaction(SIGHUP, &sa, NULL) == -1) { + err(1, "sigaction"); } } //////////////////////////////////////////////////////////// // main +/* + * Usage message. + */ __dead static void usage(void) { @@ -1166,23 +1614,25 @@ usage(void) exit(1); } +/* + * Main. + */ int main(int argc, char *argv[]) { struct timeval tv; fd_set fdsr; int width, lockfd; - int evil = 0; + int started = 0; + char *domainname; setprogname(argv[0]); - (void)yp_get_default_domain(&domainname); - if (domainname[0] == '\0') - errx(1, "Domainname not set. Aborting."); - if (_yp_invalid_domain(domainname)) - errx(1, "Invalid domainname: %s", domainname); - default_ypbindmode = YPBIND_DIRECT; + /* + * Process arguments. + */ + default_ypbindmode = YPBIND_DIRECT; while (--argc) { ++argv; if (!strcmp("-insecure", *argv)) { @@ -1204,27 +1654,72 @@ main(int argc, char *argv[]) } } - /* initialise syslog */ + /* + * Look up the name of the default domain. + */ + + (void)yp_get_default_domain(&domainname); + if (domainname[0] == '\0') + errx(1, "Domainname not set. Aborting."); + if (_yp_invalid_domain(domainname)) + errx(1, "Invalid domainname: %s", domainname); + + /* + * Start things up. + */ + + /* Open the system log. */ openlog("ypbind", LOG_PERROR | LOG_PID, LOG_DAEMON); - /* acquire ypbind.lock */ + /* Acquire /var/run/ypbind.lock. */ lockfd = open_locked(_PATH_YPBIND_LOCK, O_CREAT|O_RDWR|O_TRUNC, 0644); if (lockfd == -1) err(1, "Cannot create %s", _PATH_YPBIND_LOCK); - /* initialize sunrpc stuff */ + /* Accept hangups. */ + starthup(); + + /* Initialize sunrpc stuff. */ sunrpc_setup(); - /* blow away old bindings in BINDINGDIR */ + /* Clean out BINDINGDIR, deleting all existing (now stale) bindings */ if (purge_bindingdir(BINDINGDIR) < 0) - errx(1, "unable to purge old bindings from %s", BINDINGDIR); + errx(1, "Unable to purge old bindings from %s", BINDINGDIR); + + /* + * We start with one binding, for the default domain. It starts + * out "unsuccessful". + * + * XXX: domain_create adds the new domain to 'domains' (the + * global linked list) and therefore we shouldn't assign + * 'domains' again on return. + */ - /* build initial domain binding, make it "unsuccessful" */ domains = domain_create(domainname); + + /* + * Delete the lock for the default domain again, just in case something + * magically caused it to appear since purge_bindingdir() was called. + * XXX: this is useless and redundant; remove it. + */ removelock(domains); + /* + * Main loop. Wake up at least once a second and check for + * timer-based work to do (checkwork) and also handle incoming + * responses from ypservers and any RPCs made to the ypbind + * service. + * + * There are two sockets used for ypserver traffic: one for + * pings and one for everything else. These call XDR manually + * for encoding and are *not* dispatched via the sunrpc + * libraries. + * + * The ypbind serivce *is* dispatched via the sunrpc libraries. + * svc_getreqset() does whatever internal muck and ultimately + * ypbind service calls arrive at ypbindprog_2(). + */ checkwork(); - for (;;) { width = svc_maxfd; if (rpcsock > width) @@ -1240,24 +1735,55 @@ main(int argc, char *argv[]) switch (select(width, &fdsr, NULL, NULL, &tv)) { case 0: + /* select timed out - check for timer-based work */ + if (hupped) { + dohup(); + } checkwork(); break; case -1: - yp_log(LOG_WARNING, "select: %s", strerror(errno)); + if (hupped) { + dohup(); + } + if (errno != EINTR) { + yp_log(LOG_WARNING, "select: %s", + strerror(errno)); + } break; default: + if (hupped) { + dohup(); + } + /* incoming of our own; read it */ if (FD_ISSET(rpcsock, &fdsr)) (void)handle_replies(); if (FD_ISSET(pingsock, &fdsr)) (void)handle_ping(); + + /* read any incoming packets for the ypbind service */ svc_getreqset(&fdsr); + + /* + * Only check for timer-based work if + * something in the incoming RPC logic said + * to. This might be just a hack to avoid + * scanning the list unnecessarily, but I + * suspect it's also a hack to cover wrong + * state logic. - dholland 20140609 + */ if (check) checkwork(); break; } - if (!evil && domains->dom_alive) { - evil = 1; + /* + * Defer daemonizing until the default domain binds + * successfully. XXX: there seems to be no timeout + * on this, which means that if the default domain + * is dead upstream boot will hang indefinitely. + */ + if (!started && domains->dom_state == DOM_ALIVE) { + started = 1; #ifdef DEBUG if (!debug) #endif