Module Name:    src
Committed By:   tls
Date:           Sun Aug 10 07:00:38 UTC 2014

Modified Files:
        src/usr.sbin/ypbind [tls-earlyentropy]: ypbind.8 ypbind.c

Log Message:
Rebase.


To generate a diff of this commit:
cvs rdiff -u -r1.18 -r1.18.38.1 src/usr.sbin/ypbind/ypbind.8
cvs rdiff -u -r1.90 -r1.90.18.1 src/usr.sbin/ypbind/ypbind.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.sbin/ypbind/ypbind.8
diff -u src/usr.sbin/ypbind/ypbind.8:1.18 src/usr.sbin/ypbind/ypbind.8:1.18.38.1
--- src/usr.sbin/ypbind/ypbind.8:1.18	Wed Apr 30 13:11:03 2008
+++ src/usr.sbin/ypbind/ypbind.8	Sun Aug 10 07:00:38 2014
@@ -1,4 +1,4 @@
-.\"	$NetBSD: ypbind.8,v 1.18 2008/04/30 13:11:03 martin Exp $
+.\"	$NetBSD: ypbind.8,v 1.18.38.1 2014/08/10 07:00:38 tls Exp $
 .\"
 .\" Copyright (c) 1996 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@@ -27,7 +27,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd February 26, 2005
+.Dd June 14, 2014
 .Dt YPBIND 8
 .Os
 .Sh NAME
@@ -94,9 +94,9 @@ it is bound.
 If the binding is somehow lost, e.g by server reboot,
 .Nm
 marks the domain as unbound and attempts to re-establish the binding.
-When the binding is once again successful,
+If a binding cannot be re-established within 60 seconds,
 .Nm
-marks the domain as bound and resumes its periodic check.
+backs off exponentially to trying only once per hour.
 .Pp
 The options are as follows:
 .Bl -tag -width "-broadcast"
@@ -114,7 +114,7 @@ or
 servers.
 .It Fl ypset
 .Xr ypset 8
-may be used to change the server to which a domain is bound.
+may be used from anywhere to change the server to which a domain is bound.
 .It Fl ypsetme
 .Xr ypset 8
 may be used only from this machine to change the server
@@ -122,11 +122,22 @@ to which a domain is bound.
 .El
 .Pp
 The
-.Fl broadcast
+.Fl broadcast ,
 .Fl ypset ,
 and
-.Fl ypsetme ,
+.Fl ypsetme
 options are inherently insecure and should be avoided.
+.Sh SIGNALS
+.Nm
+responds to the following signals:
+.Bl -tag -width TERM -compact
+.It Dv HUP
+causes
+.Nm
+to immediately retry any unbound domains that are currently in
+exponential backoff.
+Use this to resume immediately after a long network outage is
+resolved.
 .Sh FILES
 .Pa /var/yp/binding/\*[Lt]domain\*[Gt].version
 - binding file for \*[Lt]domain\*[Gt].
@@ -147,7 +158,10 @@ facility.
 .Xr yppoll 8 ,
 .Xr ypset 8
 .Sh AUTHORS
+.An -nosplit
 This version of
 .Nm
-was originally implemented by Theo de Raadt.
-The ypservers support was implemented by Luke Mewburn.
+was originally implemented by
+.An Theo de Raadt .
+The ypservers support was implemented by
+.An Luke Mewburn .

Index: src/usr.sbin/ypbind/ypbind.c
diff -u src/usr.sbin/ypbind/ypbind.c:1.90 src/usr.sbin/ypbind/ypbind.c:1.90.18.1
--- src/usr.sbin/ypbind/ypbind.c:1.90	Tue Aug 30 17:06:22 2011
+++ src/usr.sbin/ypbind/ypbind.c	Sun Aug 10 07:00:38 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: ypbind.c,v 1.90 2011/08/30 17:06:22 plunky Exp $	*/
+/*	$NetBSD: ypbind.c,v 1.90.18.1 2014/08/10 07:00:38 tls Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993 Theo de Raadt <dera...@fsa.ca>
@@ -28,7 +28,7 @@
 
 #include <sys/cdefs.h>
 #ifndef LINT
-__RCSID("$NetBSD: ypbind.c,v 1.90 2011/08/30 17:06:22 plunky Exp $");
+__RCSID("$NetBSD: ypbind.c,v 1.90.18.1 2014/08/10 07:00:38 tls Exp $");
 #endif
 
 #include <sys/types.h>
@@ -50,6 +50,7 @@ __RCSID("$NetBSD: ypbind.c,v 1.90 2011/0
 #include <ifaddrs.h>
 #include <limits.h>
 #include <netdb.h>
+#include <signal.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -84,16 +85,26 @@ typedef enum {
 	YPBIND_DIRECT, YPBIND_BROADCAST,
 } ypbind_mode_t;
 
+enum domainstates {
+	DOM_NEW,		/* not yet bound */
+	DOM_ALIVE,		/* bound and healthy */
+	DOM_PINGING,		/* ping outstanding */
+	DOM_LOST,		/* binding timed out, looking for a new one */
+	DOM_DEAD,		/* long-term lost, in exponential backoff */
+};
+
 struct domain {
 	struct domain *dom_next;
 
 	char dom_name[YPMAXDOMAIN + 1];
 	struct sockaddr_in dom_server_addr;
 	long dom_vers;
-	time_t dom_checktime;
-	time_t dom_asktime;
+	time_t dom_checktime;		/* time of next check/contact */
+	time_t dom_asktime;		/* time we were last DOMAIN'd */
+	time_t dom_losttime;		/* time the binding was lost, or 0 */
+	unsigned dom_backofftime;	/* current backoff period, when DEAD */
 	int dom_lockfd;
-	int dom_alive;
+	enum domainstates dom_state;
 	uint32_t dom_xid;
 	FILE *dom_serversfile;		/* /var/yp/binding/foo.ypservers */
 	int dom_been_ypset;		/* ypset been done on this domain? */
@@ -102,26 +113,36 @@ struct domain {
 
 #define BUFSIZE		1400
 
-static char *domainname;
-
+/* the list of all domains */
 static struct domain *domains;
 static int check;
 
+/* option settings */
 static ypbind_mode_t default_ypbindmode;
-
 static int allow_local_ypset = 0, allow_any_ypset = 0;
 static int insecure;
 
+/* the sockets we use to interact with servers */
 static int rpcsock, pingsock;
+
+/* stuff used for manually interacting with servers */
 static struct rmtcallargs rmtca;
 static struct rmtcallres rmtcr;
 static bool_t rmtcr_outval;
 static unsigned long rmtcr_port;
+
+/* The ypbind service transports */
 static SVCXPRT *udptransp, *tcptransp;
 
+/* set if we get SIGHUP */
+static sig_atomic_t hupped;
+
 ////////////////////////////////////////////////////////////
 // utilities
 
+/*
+ * Combo of open() and flock().
+ */
 static int
 open_locked(const char *path, int flags, mode_t mode)
 {
@@ -138,6 +159,39 @@ open_locked(const char *path, int flags,
 	return fd;
 }
 
+/*
+ * Exponential backoff for pinging servers for a dead domain.
+ *
+ * We go 10 -> 20 -> 40 -> 60 seconds, then 2 -> 4 -> 8 -> 15 -> 30 ->
+ * 60 minutes, and stay at 60 minutes. This is overengineered.
+ *
+ * With a 60 minute max backoff the response time for when things come
+ * back is not awful, but we only try (and log) about 60 times even if
+ * things are down for a whole long weekend. This is an acceptable log
+ * load, I think.
+ */
+static void
+backoff(unsigned *psecs)
+{
+	unsigned secs;
+
+	secs = *psecs;
+	if (secs < 60) {
+		secs *= 2;
+		if (secs > 60) {
+			secs = 60;
+		}
+	} else if (secs < 60 * 15) {
+		secs *= 2;
+		if (secs > 60 * 15) {
+			secs = 60 * 15;
+		}
+	} else if (secs < 60 * 60) {
+		secs *= 2;
+	}
+	*psecs = secs;
+}
+
 ////////////////////////////////////////////////////////////
 // logging
 
@@ -150,6 +204,9 @@ static int debug;
 
 static void yp_log(int, const char *, ...) __printflike(2, 3);
 
+/*
+ * Log some stuff, to syslog or stderr depending on the debug setting.
+ */
 static void
 yp_log(int pri, const char *fmt, ...)
 {
@@ -187,6 +244,34 @@ ypservers_filename(const char *domain)
 ////////////////////////////////////////////////////////////
 // struct domain
 
+/*
+ * The state transitions of a domain work as follows:
+ *
+ * in state NEW:
+ *    nag_servers every 5 seconds
+ *    upon answer, state is ALIVE
+ *
+ * in state ALIVE:
+ *    every 60 seconds, send ping and switch to state PINGING
+ *
+ * in state PINGING:
+ *    upon answer, go to state ALIVE
+ *    if no answer in 5 seconds, go to state LOST and do nag_servers
+ *
+ * in state LOST:
+ *    do nag_servers every 5 seconds
+ *    upon answer, go to state ALIVE
+ *    if no answer in 60 seconds, go to state DEAD
+ *
+ * in state DEAD
+ *    do nag_servers every backofftime seconds (starts at 10)
+ *    upon answer go to state ALIVE
+ *    backofftime doubles (approximately) each try, with a cap of 1 hour
+ */
+
+/*
+ * Look up a domain by the XID we assigned it.
+ */
 static struct domain *
 domain_find(uint32_t xid)
 {
@@ -198,6 +283,11 @@ domain_find(uint32_t xid)
 	return dom;
 }
 
+/*
+ * Pick an XID for a domain.
+ *
+ * XXX: this should just generate a random number.
+ */
 static uint32_t
 unique_xid(struct domain *dom)
 {
@@ -210,6 +300,10 @@ unique_xid(struct domain *dom)
 	return tmp_xid;
 }
 
+/*
+ * Construct a new domain. Adds it to the global linked list of all
+ * domains.
+ */
 static struct domain *
 domain_create(const char *name)
 {
@@ -230,8 +324,10 @@ domain_create(const char *name)
 	dom->dom_vers = YPVERS;
 	dom->dom_checktime = 0;
 	dom->dom_asktime = 0;
+	dom->dom_losttime = 0;
+	dom->dom_backofftime = 10;
 	dom->dom_lockfd = -1;
-	dom->dom_alive = 0;
+	dom->dom_state = DOM_NEW;
 	dom->dom_xid = unique_xid(dom);
 	dom->dom_been_ypset = 0;
 	dom->dom_serversfile = NULL;
@@ -265,6 +361,10 @@ domain_create(const char *name)
 ////////////////////////////////////////////////////////////
 // locks
 
+/*
+ * Open a new binding file. Does not write the contents out; the
+ * caller (there's only one) does that.
+ */
 static int
 makelock(struct domain *dom)
 {
@@ -286,6 +386,9 @@ makelock(struct domain *dom)
 	return fd;
 }
 
+/*
+ * Remove a binding file.
+ */
 static void
 removelock(struct domain *dom)
 {
@@ -297,12 +400,14 @@ removelock(struct domain *dom)
 }
 
 /*
- * purge_bindingdir: remove old binding files (i.e. "rm BINDINGDIR\/\*.[0-9]")
+ * purge_bindingdir: remove old binding files (i.e. "rm *.[0-9]" in BINDINGDIR)
+ *
+ * The local YP functions [e.g. yp_master()] will fail without even
+ * talking to ypbind if there is a stale (non-flock'd) binding file
+ * present.
  *
- * local YP functions [e.g. yp_master()] will fail without even talking
- * to ypbind if there is a stale (non-flock'd) binding file present.
- * we have to scan the entire BINDINGDIR for binding files, because
- * ypbind may bind more than just the yp_get_default_domain() domain.
+ * We have to remove all binding files in BINDINGDIR, not just the one
+ * for the default domain.
  */
 static int
 purge_bindingdir(const char *dirpath)
@@ -344,7 +449,39 @@ purge_bindingdir(const char *dirpath)
 // sunrpc twaddle
 
 /*
- * LOOPBACK IS MORE IMPORTANT: PUT IN HACK
+ * Check if the info coming in is (at least somewhat) valid.
+ */
+static int
+rpc_is_valid_response(char *name, struct sockaddr_in *addr)
+{
+	if (name == NULL) {
+		return 0;
+	}
+
+	if (_yp_invalid_domain(name)) {
+		return 0;
+	}
+
+	/* don't support insecure servers by default */
+	if (!insecure && ntohs(addr->sin_port) >= IPPORT_RESERVED) {
+		return 0;
+	}
+
+	return 1;
+}
+
+/*
+ * Take note of the fact that we've received a reply from a ypserver.
+ * Or, in the case of being ypset, that we've been ypset, which
+ * functions much the same.
+ *
+ * Note that FORCE is set if and only if IS_YPSET is set.
+ *
+ * This function has also for the past 20+ years carried the annotation
+ *
+ *      LOOPBACK IS MORE IMPORTANT: PUT IN HACK
+ *
+ * whose meaning isn't entirely clear.
  */
 static void
 rpc_received(char *dom_name, struct sockaddr_in *raddrp, int force,
@@ -359,46 +496,152 @@ rpc_received(char *dom_name, struct sock
 	DPRINTF("returned from %s about %s\n",
 		inet_ntoa(raddrp->sin_addr), dom_name);
 
-	if (dom_name == NULL)
-		return;
-
-	if (_yp_invalid_domain(dom_name))
-		return;	
-
-		/* don't support insecure servers by default */
-	if (!insecure && ntohs(raddrp->sin_port) >= IPPORT_RESERVED)
+	/* validate some stuff */
+	if (!rpc_is_valid_response(dom_name, raddrp)) {
 		return;
+	}
 
+	/* look for the domain */
 	for (dom = domains; dom != NULL; dom = dom->dom_next)
 		if (!strcmp(dom->dom_name, dom_name))
 			break;
 
+	/* if not found, create it, but only if FORCE; otherwise ignore */
 	if (dom == NULL) {
 		if (force == 0)
 			return;
 		dom = domain_create(dom_name);
 	}
 
+	/* the domain needs to know if it's been explicitly ypset */
 	if (is_ypset) {
 		dom->dom_been_ypset = 1;
 	}
 
-	/* soft update, alive */
-	if (dom->dom_alive == 1 && force == 0) {
+	/*
+	 * If the domain is alive and we aren't being called by ypset,
+	 * we shouldn't be getting a response at all. Log it, as it
+	 * might be hostile.
+	 */
+	if (dom->dom_state == DOM_ALIVE && force == 0) {
+		if (!memcmp(&dom->dom_server_addr, raddrp,
+			    sizeof(dom->dom_server_addr))) {
+			yp_log(LOG_WARNING,
+			       "Unexpected reply from server %s for domain %s",
+			       inet_ntoa(dom->dom_server_addr.sin_addr),
+			       dom->dom_name);
+		} else {
+			yp_log(LOG_WARNING,
+			       "Falsified reply from %s for domain %s",
+			       inet_ntoa(dom->dom_server_addr.sin_addr),
+			       dom->dom_name);
+		}
+		return;
+	}
+
+	/*
+	 * If we're expected a ping response, and we've got it
+	 * (meaning we aren't being called by ypset), we don't need to
+	 * do anything.
+	 */
+	if (dom->dom_state == DOM_PINGING && force == 0) {
+		/*
+		 * If the reply came from the server we expect, set
+		 * dom_state back to ALIVE and ping again in 60
+		 * seconds.
+		 *
+		 * If it came from somewhere else, log it.
+		 */
 		if (!memcmp(&dom->dom_server_addr, raddrp,
 			    sizeof(dom->dom_server_addr))) {
-			dom->dom_alive = 1;
+			dom->dom_state = DOM_ALIVE;
 			/* recheck binding in 60 sec */
 			dom->dom_checktime = time(NULL) + 60;
+		} else {
+			yp_log(LOG_WARNING,
+			       "Falsified reply from %s for domain %s",
+			       inet_ntoa(dom->dom_server_addr.sin_addr),
+			       dom->dom_name);
 		}
 		return;
 	}
-	
+
+#ifdef HEURISTIC
+	/*
+	 * If transitioning to the alive state from a non-alive state,
+	 * clear dom_asktime. This will help prevent any requests that
+	 * are still coming in from triggering unnecessary pings via
+	 * the HEURISTIC code.
+	 *
+	 * XXX: this may not be an adequate measure; we may need to
+	 * keep more state so we can disable the HEURISTIC code for
+	 * the first few seconds after rebinding.
+	 */
+	if (dom->dom_state == DOM_NEW ||
+	    dom->dom_state == DOM_LOST ||
+	    dom->dom_state == DOM_DEAD) {
+		dom->dom_asktime = 0;
+	}
+#endif
+
+	/*
+	 * Take the address we got the message from (or in the case of
+	 * ypset, the explicit address we were given) as the server
+	 * address for this domain, mark the domain alive, and we'll
+	 * check it again in 60 seconds.
+	 *
+	 * XXX: it looks like if we get a random unsolicited reply
+	 * from somewhere, we'll silently switch to that server
+	 * address, regardless of merit.
+	 *
+	 * 1. If we have a foo.ypservers file the address should be
+	 * checked against it and rejected if it's not one of the
+	 * addresses of one of the listed hostnames. Note that it
+	 * might not be the same address we sent to; even fairly smart
+	 * UDP daemons don't always handle multihomed hosts correctly
+	 * and we can't expect sunrpc code to do anything intelligent
+	 * at all.
+	 *
+	 * 2. If we're in broadcast mode the address should be
+	 * checked against the local addresses and netmasks so we
+	 * don't accept responses from Mars.
+	 *
+	 * 2a. If we're in broadcast mode and we've been ypset, we
+	 * should not accept anything else until we drop the ypset
+	 * state for not responding.
+	 *
+	 * 3. Either way we should not accept a response from an
+	 * arbitrary host unless we don't currently have a binding.
+	 * (This is now fixed above.)
+	 *
+	 * Note that for a random unsolicited reply to work it has to
+	 * carry the XID of one of the domains we know about; but
+	 * those values are predictable.
+	 */
 	(void)memcpy(&dom->dom_server_addr, raddrp,
 	    sizeof(dom->dom_server_addr));
 	/* recheck binding in 60 seconds */
 	dom->dom_checktime = time(NULL) + 60;
-	dom->dom_alive = 1;
+	dom->dom_state = DOM_ALIVE;
+
+	/* Clear the dead/backoff state. */
+	dom->dom_losttime = 0;
+	dom->dom_backofftime = 10;
+
+	if (is_ypset == 0) {
+		yp_log(LOG_NOTICE, "Domain %s is alive; server %s",
+		       dom->dom_name,
+		       inet_ntoa(dom->dom_server_addr.sin_addr));
+	}
+
+	/*
+	 * Generate a new binding file. If this fails, forget about it.
+	 * (But we keep the binding and we'll report it to anyone who
+	 * asks via the ypbind service.) XXX: this will interact badly,
+	 * maybe very badly, with the code in HEURISTIC.
+	 *
+	 * Note that makelock() doesn't log on failure.
+	 */
 
 	if (dom->dom_lockfd != -1)
 		(void)close(dom->dom_lockfd);
@@ -406,10 +649,6 @@ rpc_received(char *dom_name, struct sock
 	if ((fd = makelock(dom)) == -1)
 		return;
 
-	/*
-	 * ok, if BINDINGDIR exists, and we can create the binding file,
-	 * then write to it..
-	 */
 	dom->dom_lockfd = fd;
 
 	iov[0].iov_base = &(udptransp->xp_port);
@@ -436,6 +675,10 @@ rpc_received(char *dom_name, struct sock
 	}
 }
 
+/*
+ * The NULL call: do nothing. This is obliged to exist because of
+ * sunrpc silliness.
+ */
 static void *
 /*ARGSUSED*/
 ypbindproc_null_2(SVCXPRT *transp, void *argp)
@@ -447,6 +690,9 @@ ypbindproc_null_2(SVCXPRT *transp, void 
 	return (void *)&res;
 }
 
+/*
+ * The DOMAIN call: look up the ypserver for a specified domain.
+ */
 static void *
 /*ARGSUSED*/
 ypbindproc_domain_2(SVCXPRT *transp, void *argp)
@@ -458,12 +704,23 @@ ypbindproc_domain_2(SVCXPRT *transp, voi
 	int count;
 
 	DPRINTF("ypbindproc_domain_2 %s\n", arg);
+
+	/* Reject invalid domains. */
 	if (_yp_invalid_domain(arg))
 		return NULL;
 
 	(void)memset(&res, 0, sizeof res);
 	res.ypbind_status = YPBIND_FAIL_VAL;
 
+	/*
+	 * Look for the domain. XXX: Behave erratically if we have
+	 * more than 100 domains. The intent here is to avoid allowing
+	 * arbitrary incoming requests to create more than 100
+	 * domains; but this logic means that if we legitimately have
+	 * more than 100 (e.g. via ypset) we'll only actually bind the
+	 * first 100 and the rest will fail. The test on 'count' should
+	 * be moved further down.
+	 */
 	for (count = 0, dom = domains;
 	    dom != NULL;
 	    dom = dom->dom_next, count++) {
@@ -473,6 +730,16 @@ ypbindproc_domain_2(SVCXPRT *transp, voi
 			break;
 	}
 
+	/*
+	 * If the domain doesn't exist, create it, then fail the call
+	 * because we have no information yet.
+	 *
+	 * Set "check" so that checkwork() will run and look for a
+	 * server.
+	 *
+	 * XXX: like during startup there's a spurious call to
+	 * removelock() after domain_create().
+	 */
 	if (dom == NULL) {
 		dom = domain_create(arg);
 		removelock(dom);
@@ -481,14 +748,30 @@ ypbindproc_domain_2(SVCXPRT *transp, voi
 		return NULL;
 	}
 
-	if (dom->dom_alive == 0) {
-		DPRINTF("dead domain %s\n", arg);
+	if (dom->dom_state == DOM_NEW) {
+		DPRINTF("new domain %s\n", arg);
 		return NULL;
 	}
 
 #ifdef HEURISTIC
+	/*
+	 * Keep track of the last time we were explicitly asked about
+	 * this domain. If it happens a lot, force a ping. This works
+	 * (or "works") because we only get asked specifically when
+	 * things aren't going; otherwise the client code in libc and
+	 * elsewhere uses the binding file.
+	 *
+	 * Note: HEURISTIC is enabled by default.
+	 *
+	 * dholland 20140609: I think this is part of the mechanism
+	 * that causes ypbind to spam. I'm changing this logic so it
+	 * only triggers when the state is DOM_ALIVE: if the domain
+	 * is new, lost, or dead we shouldn't send more requests than
+	 * the ones already scheduled, and if we're already in the
+	 * middle of pinging there's no point doing it again.
+	 */
 	(void)time(&now);
-	if (now < dom->dom_asktime + 5) {
+	if (dom->dom_state == DOM_ALIVE && now < dom->dom_asktime + 5) {
 		/*
 		 * Hmm. More than 2 requests in 5 seconds have indicated
 		 * that my binding is possibly incorrect.
@@ -514,6 +797,19 @@ ypbindproc_domain_2(SVCXPRT *transp, voi
 	return &res;
 }
 
+/*
+ * The SETDOM call: ypset.
+ *
+ * Unless -ypsetme was given on the command line, this is rejected;
+ * even then it's only allowed from localhost unless -ypset was
+ * given on the command line.
+ *
+ * Allowing anyone anywhere to ypset you (and therefore provide your
+ * password file and such) is a horrible thing and it isn't clear to
+ * me why this functionality even exists.
+ *
+ * ypset from localhost has some but limited utility.
+ */
 static void *
 ypbindproc_setdom_2(SVCXPRT *transp, void *argp)
 {
@@ -525,6 +821,10 @@ ypbindproc_setdom_2(SVCXPRT *transp, voi
 	fromsin = svc_getcaller(transp);
 	DPRINTF("ypbindproc_setdom_2 from %s\n", inet_ntoa(fromsin->sin_addr));
 
+	/*
+	 * Reject unless enabled.
+	 */
+
 	if (allow_any_ypset) {
 		/* nothing */
 	} else if (allow_local_ypset) {
@@ -538,16 +838,27 @@ ypbindproc_setdom_2(SVCXPRT *transp, voi
 		return NULL;
 	}
 
+	/* Make a "security" check. */
 	if (ntohs(fromsin->sin_port) >= IPPORT_RESERVED) {
 		DPRINTF("ypset from unprivileged port denied\n");
 		return &res;
 	}
 
+	/* Ignore requests we don't understand. */
 	if (sd->ypsetdom_vers != YPVERS) {
 		DPRINTF("ypset with wrong version denied\n");
 		return &res;
 	}
 
+	/*
+	 * Fetch the arguments out of the xdr-decoded blob and call
+	 * rpc_received(), setting FORCE so that the domain will be
+	 * created if we don't already know about it, and also saying
+	 * that it's actually a ypset.
+	 *
+	 * Effectively we're telilng rpc_received() that we got an
+	 * RPC response from the server specified by ypset.
+	 */
 	(void)memset(&bindsin, 0, sizeof bindsin);
 	bindsin.sin_family = AF_INET;
 	bindsin.sin_len = sizeof(bindsin);
@@ -561,6 +872,13 @@ ypbindproc_setdom_2(SVCXPRT *transp, voi
 	return &res;
 }
 
+/*
+ * Dispatcher for the ypbind service.
+ *
+ * There are three calls: NULL, which does nothing, DOMAIN, which
+ * gets the binding for a particular domain, and SETDOM, which
+ * does ypset.
+ */
 static void
 ypbindprog_2(struct svc_req *rqstp, register SVCXPRT *transp)
 {
@@ -622,6 +940,12 @@ ypbindprog_2(struct svc_req *rqstp, regi
 	return;
 }
 
+/*
+ * Set up sunrpc stuff.
+ *
+ * This sets up the ypbind service (both TCP and UDP) and also opens
+ * the sockets we use for talking to ypservers.
+ */
 static void
 sunrpc_setup(void)
 {
@@ -670,6 +994,10 @@ sunrpc_setup(void)
 ////////////////////////////////////////////////////////////
 // operational logic
 
+/*
+ * Broadcast an RPC packet to hopefully contact some servers for a
+ * domain.
+ */
 static int
 broadcast(char *buf, int outlen)
 {
@@ -720,6 +1048,13 @@ broadcast(char *buf, int outlen)
 	return (0);
 }
 
+/*
+ * Send an RPC packet to all the configured (in /var/yp/foo.ypservers)
+ * servers for a domain.
+ *
+ * XXX: we should read and parse the file up front and reread it only
+ * if it changes.
+ */
 static int
 direct(char *buf, int outlen, struct domain *dom)
 {
@@ -801,6 +1136,11 @@ direct(char *buf, int outlen, struct dom
 	return 0;
 }
 
+/*
+ * Send an RPC packet to the server that's been selected with ypset.
+ * (This is only used when in broadcast mode and when ypset is
+ * allowed.)
+ */
 static int
 direct_set(char *buf, int outlen, struct domain *dom)
 {
@@ -857,6 +1197,9 @@ direct_set(char *buf, int outlen, struct
 	return 0;
 }
 
+/*
+ * Receive and dispatch packets on the general RPC socket.
+ */
 static enum clnt_stat
 handle_replies(void)
 {
@@ -911,6 +1254,9 @@ try_again:
 	return RPC_SUCCESS;
 }
 
+/*
+ * Receive and dispatch packets on the ping socket.
+ */
 static enum clnt_stat
 handle_ping(void)
 {
@@ -965,6 +1311,14 @@ try_again:
 	return RPC_SUCCESS;
 }
 
+/*
+ * Contact all known servers for a domain in the hopes that one of
+ * them's awake. Also, if we previously had a binding but it timed
+ * out, try the portmapper on that host in case ypserv moved ports for
+ * some reason.
+ *
+ * As a side effect, wipe out any existing binding file.
+ */
 static int
 nag_servers(struct domain *dom)
 {
@@ -1023,7 +1377,7 @@ nag_servers(struct domain *dom)
 		removelock(dom);
 	}
 
-	if (dom->dom_alive == 2) {
+	if (dom->dom_state == DOM_PINGING || dom->dom_state == DOM_LOST) {
 		/*
 		 * This resolves the following situation:
 		 * ypserver on other subnet was once bound,
@@ -1058,6 +1412,9 @@ nag_servers(struct domain *dom)
 	return -1;
 }
 
+/*
+ * Send a ping message to a domain's current ypserver.
+ */
 static int
 ping(struct domain *dom)
 {
@@ -1107,7 +1464,6 @@ ping(struct domain *dom)
 	}
 	AUTH_DESTROY(rpcua);
 
-	dom->dom_alive = 2;
 	DPRINTF("ping %x\n", dom->dom_server_addr.sin_addr.s_addr);
 
 	if (sendto(pingsock, buf, outlen, 0, 
@@ -1119,14 +1475,20 @@ ping(struct domain *dom)
 }
 
 /*
- * State transition is done like this: 
+ * Scan for timer-based work to do.
+ *
+ * If the domain is currently alive, ping the server we're currently
+ * bound to. Otherwise, try all known servers and/or broadcast for a
+ * server via nag_servers.
+ *
+ * Try again in five seconds.
  *
- * STATE	EVENT		ACTION			NEWSTATE	TIMEOUT
- * no binding	timeout		broadcast 		no binding	5 sec
- * no binding	answer		--			binding		60 sec
- * binding	timeout		ping server		checking	5 sec
- * checking	timeout		ping server + broadcast	checking	5 sec
- * checking	answer		--			binding		60 sec
+ * If we get back here and the state is still DOM_PINGING, it means
+ * we didn't receive a ping response within five seconds. Declare the
+ * binding lost. If the binding is already lost, and it's been lost
+ * for 60 seconds, switch to DOM_DEAD and begin exponential backoff.
+ * The exponential backoff starts at 10 seconds and tops out at one
+ * hour; see above.
  */
 static void
 checkwork(void)
@@ -1138,20 +1500,106 @@ checkwork(void)
 
 	(void)time(&t);
 	for (dom = domains; dom != NULL; dom = dom->dom_next) {
-		if (dom->dom_checktime < t) {
-			if (dom->dom_alive == 1)
-				(void)ping(dom);
-			else
-				(void)nag_servers(dom);
-			(void)time(&t);
+		if (dom->dom_checktime >= t) {
+			continue;
+		}
+		switch (dom->dom_state) {
+		    case DOM_NEW:
+			/* XXX should be a timeout for this state */
+			dom->dom_checktime = t + 5;
+			(void)nag_servers(dom);
+			break;
+
+		    case DOM_ALIVE:
+			dom->dom_state = DOM_PINGING;
+			dom->dom_checktime = t + 5;
+			(void)ping(dom);
+			break;
+
+		    case DOM_PINGING:
+			dom->dom_state = DOM_LOST;
+			dom->dom_losttime = t;
 			dom->dom_checktime = t + 5;
+			yp_log(LOG_NOTICE, "Domain %s lost its binding to "
+			       "server %s", dom->dom_name,
+			       inet_ntoa(dom->dom_server_addr.sin_addr));
+			(void)nag_servers(dom);
+			break;
+
+		    case DOM_LOST:
+			if (t > dom->dom_losttime + 60) {
+				dom->dom_state = DOM_DEAD;
+				dom->dom_backofftime = 10;
+				yp_log(LOG_NOTICE, "Domain %s dead; "
+				       "going to exponential backoff",
+				       dom->dom_name);
+			}
+			dom->dom_checktime = t + 5;
+			(void)nag_servers(dom);
+			break;
+
+		    case DOM_DEAD:
+			dom->dom_checktime = t + dom->dom_backofftime;
+			backoff(&dom->dom_backofftime);
+			(void)nag_servers(dom);
+			break;
 		}
+		/* re-fetch the time in case we hung sending packets */
+		(void)time(&t);
+	}
+}
+
+/*
+ * Process a hangup signal.
+ *
+ * Do an extra nag_servers() for any domains that are DEAD. This way
+ * if you know things are back up you can restore service by sending
+ * ypbind a SIGHUP rather than waiting for the timeout period.
+ */
+static void
+dohup(void)
+{
+	struct domain *dom;
+
+	hupped = 0;
+	for (dom = domains; dom != NULL; dom = dom->dom_next) {
+		if (dom->dom_state == DOM_DEAD) {
+			(void)nag_servers(dom);
+		}
+	}
+}
+
+/*
+ * Receive a hangup signal.
+ */
+static void
+hup(int __unused sig)
+{
+	hupped = 1;
+}
+
+/*
+ * Initialize hangup processing.
+ */
+static void
+starthup(void)
+{
+	struct sigaction sa;
+
+	sa.sa_handler = hup;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_RESTART;
+	if (sigaction(SIGHUP, &sa, NULL) == -1) {
+		err(1, "sigaction");
 	}
 }
 
 ////////////////////////////////////////////////////////////
 // main
 
+/*
+ * Usage message.
+ */
 __dead static void
 usage(void)
 {
@@ -1166,23 +1614,25 @@ usage(void)
 	exit(1);
 }
 
+/*
+ * Main.
+ */
 int
 main(int argc, char *argv[])
 {
 	struct timeval tv;
 	fd_set fdsr;
 	int width, lockfd;
-	int evil = 0;
+	int started = 0;
+	char *domainname;
 
 	setprogname(argv[0]);
-	(void)yp_get_default_domain(&domainname);
-	if (domainname[0] == '\0')
-		errx(1, "Domainname not set. Aborting.");
-	if (_yp_invalid_domain(domainname))
-		errx(1, "Invalid domainname: %s", domainname);
 
-	default_ypbindmode = YPBIND_DIRECT;
+	/*
+	 * Process arguments.
+	 */
 
+	default_ypbindmode = YPBIND_DIRECT;
 	while (--argc) {
 		++argv;
 		if (!strcmp("-insecure", *argv)) {
@@ -1204,27 +1654,72 @@ main(int argc, char *argv[])
 		}
 	}
 
-	/* initialise syslog */
+	/*
+	 * Look up the name of the default domain.
+	 */
+
+	(void)yp_get_default_domain(&domainname);
+	if (domainname[0] == '\0')
+		errx(1, "Domainname not set. Aborting.");
+	if (_yp_invalid_domain(domainname))
+		errx(1, "Invalid domainname: %s", domainname);
+
+	/*
+	 * Start things up.
+	 */
+
+	/* Open the system log. */
 	openlog("ypbind", LOG_PERROR | LOG_PID, LOG_DAEMON);
 
-	/* acquire ypbind.lock */
+	/* Acquire /var/run/ypbind.lock. */
 	lockfd = open_locked(_PATH_YPBIND_LOCK, O_CREAT|O_RDWR|O_TRUNC, 0644);
 	if (lockfd == -1)
 		err(1, "Cannot create %s", _PATH_YPBIND_LOCK);
 
-	/* initialize sunrpc stuff */
+	/* Accept hangups. */
+	starthup();
+
+	/* Initialize sunrpc stuff. */
 	sunrpc_setup();
 
-	/* blow away old bindings in BINDINGDIR */
+	/* Clean out BINDINGDIR, deleting all existing (now stale) bindings */
 	if (purge_bindingdir(BINDINGDIR) < 0)
-		errx(1, "unable to purge old bindings from %s", BINDINGDIR);
+		errx(1, "Unable to purge old bindings from %s", BINDINGDIR);
+
+	/*
+	 * We start with one binding, for the default domain. It starts
+	 * out "unsuccessful".
+	 *
+	 * XXX: domain_create adds the new domain to 'domains' (the
+	 * global linked list) and therefore we shouldn't assign
+	 * 'domains' again on return.
+	 */
 
-	/* build initial domain binding, make it "unsuccessful" */
 	domains = domain_create(domainname);
+
+	/*
+	 * Delete the lock for the default domain again, just in case something
+	 * magically caused it to appear since purge_bindingdir() was called.
+	 * XXX: this is useless and redundant; remove it.
+	 */
 	removelock(domains);
 
+	/*
+	 * Main loop. Wake up at least once a second and check for
+	 * timer-based work to do (checkwork) and also handle incoming
+	 * responses from ypservers and any RPCs made to the ypbind
+	 * service.
+	 *
+	 * There are two sockets used for ypserver traffic: one for
+	 * pings and one for everything else. These call XDR manually
+	 * for encoding and are *not* dispatched via the sunrpc
+	 * libraries.
+	 *
+	 * The ypbind serivce *is* dispatched via the sunrpc libraries.
+	 * svc_getreqset() does whatever internal muck and ultimately
+	 * ypbind service calls arrive at ypbindprog_2().
+	 */
 	checkwork();
-
 	for (;;) {
 		width = svc_maxfd;
 		if (rpcsock > width)
@@ -1240,24 +1735,55 @@ main(int argc, char *argv[])
 
 		switch (select(width, &fdsr, NULL, NULL, &tv)) {
 		case 0:
+			/* select timed out - check for timer-based work */
+			if (hupped) {
+				dohup();
+			}
 			checkwork();
 			break;
 		case -1:
-			yp_log(LOG_WARNING, "select: %s", strerror(errno));
+			if (hupped) {
+				dohup();
+			}
+			if (errno != EINTR) {
+				yp_log(LOG_WARNING, "select: %s",
+				       strerror(errno));
+			}
 			break;
 		default:
+			if (hupped) {
+				dohup();
+			}
+			/* incoming of our own; read it */
 			if (FD_ISSET(rpcsock, &fdsr))
 				(void)handle_replies();
 			if (FD_ISSET(pingsock, &fdsr))
 				(void)handle_ping();
+
+			/* read any incoming packets for the ypbind service */
 			svc_getreqset(&fdsr);
+
+			/*
+			 * Only check for timer-based work if
+			 * something in the incoming RPC logic said
+			 * to. This might be just a hack to avoid
+			 * scanning the list unnecessarily, but I
+			 * suspect it's also a hack to cover wrong
+			 * state logic. - dholland 20140609
+			 */
 			if (check)
 				checkwork();
 			break;
 		}
 
-		if (!evil && domains->dom_alive) {
-			evil = 1;
+		/*
+		 * Defer daemonizing until the default domain binds
+		 * successfully. XXX: there seems to be no timeout
+		 * on this, which means that if the default domain
+		 * is dead upstream boot will hang indefinitely.
+		 */
+		if (!started && domains->dom_state == DOM_ALIVE) {
+			started = 1;
 #ifdef DEBUG
 			if (!debug)
 #endif

Reply via email to