hastd

Pawel Jakub Dawidek Thu, 05 Aug 2010 12:17:17 -0700

Author: pjd
Date: Thu Aug  5 19:16:31 2010
New Revision: 210886
URL: http://svn.freebsd.org/changeset/base/210886


Log:
  Implement configuration reload on SIGHUP. This includes:
  - Load added resources.
  - Stop and forget removed resources.
  - Update modified resources in least intrusive way, ie. don't touch
    /dev/hast/<name> unless path to local component or provider name were
    modified.
  
  Obtained from:        Wheel Systems Sp. z o.o. http://www.wheelsystems.com
  MFC after:    1 month

Modified:
  head/sbin/hastd/hastd.c
  head/sbin/hastd/hastd.h
  head/sbin/hastd/primary.c

Modified: head/sbin/hastd/hastd.c
==============================================================================
--- head/sbin/hastd/hastd.c     Thu Aug  5 19:12:35 2010        (r210885)
+++ head/sbin/hastd/hastd.c     Thu Aug  5 19:16:31 2010        (r210886)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2009-2010 The FreeBSD Foundation
+ * Copyright (c) 2010 Pawel Jakub Dawidek <p...@freebsd.org>
  * All rights reserved.
  *
  * This software was developed by Pawel Jakub Dawidek under sponsorship from
@@ -57,13 +58,13 @@ __FBSDID("$FreeBSD$");
 #include "subr.h"
 
 /* Path to configuration file. */
-static const char *cfgpath = HAST_CONFIG;
+const char *cfgpath = HAST_CONFIG;
 /* Hastd configuration. */
 static struct hastd_config *cfg;
 /* Was SIGCHLD signal received? */
 static bool sigchld_received = false;
 /* Was SIGHUP signal received? */
-static bool sighup_received = false;
+bool sighup_received = false;
 /* Was SIGINT or SIGTERM signal received? */
 bool sigexit_received = false;
 /* PID file handle. */
@@ -169,12 +170,203 @@ child_exit(void)
        }
 }
 
+static bool
+resource_needs_restart(const struct hast_resource *res0,
+    const struct hast_resource *res1)
+{
+
+       assert(strcmp(res0->hr_name, res1->hr_name) == 0);
+
+       if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
+               return (true);
+       if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
+               return (true);
+       if (res0->hr_role == HAST_ROLE_INIT ||
+           res0->hr_role == HAST_ROLE_SECONDARY) {
+               if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
+                       return (true);
+               if (res0->hr_replication != res1->hr_replication)
+                       return (true);
+               if (res0->hr_timeout != res1->hr_timeout)
+                       return (true);
+       }
+       return (false);
+}
+
+static bool
+resource_needs_reload(const struct hast_resource *res0,
+    const struct hast_resource *res1)
+{
+
+       assert(strcmp(res0->hr_name, res1->hr_name) == 0);
+       assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
+       assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
+
+       if (res0->hr_role != HAST_ROLE_PRIMARY)
+               return (false);
+
+       if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
+               return (true);
+       if (res0->hr_replication != res1->hr_replication)
+               return (true);
+       if (res0->hr_timeout != res1->hr_timeout)
+               return (true);
+       return (false);
+}
+
 static void
 hastd_reload(void)
 {
+       struct hastd_config *newcfg;
+       struct hast_resource *nres, *cres, *tres;
+       uint8_t role;
+
+       pjdlog_info("Reloading configuration...");
+
+       newcfg = yy_config_parse(cfgpath, false);
+       if (newcfg == NULL)
+               goto failed;
+
+       /*
+        * Check if control address has changed.
+        */
+       if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
+               if (proto_server(newcfg->hc_controladdr,
+                   &newcfg->hc_controlconn) < 0) {
+                       pjdlog_errno(LOG_ERR,
+                           "Unable to listen on control address %s",
+                           newcfg->hc_controladdr);
+                       goto failed;
+               }
+       }
+       /*
+        * Check if listen address has changed.
+        */
+       if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
+               if (proto_server(newcfg->hc_listenaddr,
+                   &newcfg->hc_listenconn) < 0) {
+                       pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
+                           newcfg->hc_listenaddr);
+                       goto failed;
+               }
+       }
+       /*
+        * Only when both control and listen sockets are successfully
+        * initialized switch them to new configuration.
+        */
+       if (newcfg->hc_controlconn != NULL) {
+               pjdlog_info("Control socket changed from %s to %s.",
+                   cfg->hc_controladdr, newcfg->hc_controladdr);
+               proto_close(cfg->hc_controlconn);
+               cfg->hc_controlconn = newcfg->hc_controlconn;
+               newcfg->hc_controlconn = NULL;
+               strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
+                   sizeof(cfg->hc_controladdr));
+       }
+       if (newcfg->hc_listenconn != NULL) {
+               pjdlog_info("Listen socket changed from %s to %s.",
+                   cfg->hc_listenaddr, newcfg->hc_listenaddr);
+               proto_close(cfg->hc_listenconn);
+               cfg->hc_listenconn = newcfg->hc_listenconn;
+               newcfg->hc_listenconn = NULL;
+               strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
+                   sizeof(cfg->hc_listenaddr));
+       }
+
+       /*
+        * Stop and remove resources that were removed from the configuration.
+        */
+       TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
+               TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
+                       if (strcmp(cres->hr_name, nres->hr_name) == 0)
+                               break;
+               }
+               if (nres == NULL) {
+                       control_set_role(cres, HAST_ROLE_INIT);
+                       TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
+                       pjdlog_info("Resource %s removed.", cres->hr_name);
+                       free(cres);
+               }
+       }
+       /*
+        * Move new resources to the current configuration.
+        */
+       TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
+               TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
+                       if (strcmp(cres->hr_name, nres->hr_name) == 0)
+                               break;
+               }
+               if (cres == NULL) {
+                       TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
+                       TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
+                       pjdlog_info("Resource %s added.", nres->hr_name);
+               }
+       }
+       /*
+        * Deal with modified resources.
+        * Depending on what has changed exactly we might want to perform
+        * different actions.
+        *
+        * We do full resource restart in the following situations:
+        * Resource role is INIT or SECONDARY.
+        * Resource role is PRIMARY and path to local component or provider
+        * name has changed.
+        * In case of PRIMARY, the worker process will be killed and restarted,
+        * which also means removing /dev/hast/<name> provider and
+        * recreating it.
+        *
+        * We do just reload (send SIGHUP to worker process) if we act as
+        * PRIMARY, but only remote address, replication mode and timeout
+        * has changed. For those, there is no need to restart worker process.
+        * If PRIMARY receives SIGHUP, it will reconnect if remote address or
+        * replication mode has changed or simply set new timeout if only
+        * timeout has changed.
+        */
+       TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
+               TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
+                       if (strcmp(cres->hr_name, nres->hr_name) == 0)
+                               break;
+               }
+               assert(cres != NULL);
+               if (resource_needs_restart(cres, nres)) {
+                       pjdlog_info("Resource %s configuration was modified, 
restarting it.",
+                           cres->hr_name);
+                       role = cres->hr_role;
+                       control_set_role(cres, HAST_ROLE_INIT);
+                       TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
+                       free(cres);
+                       TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
+                       TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
+                       control_set_role(nres, role);
+               } else if (resource_needs_reload(cres, nres)) {
+                       pjdlog_info("Resource %s configuration was modified, 
reloading it.",
+                           cres->hr_name);
+                       strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
+                           sizeof(cres->hr_remoteaddr));
+                       cres->hr_replication = nres->hr_replication;
+                       cres->hr_timeout = nres->hr_timeout;
+                       if (cres->hr_workerpid != 0) {
+                               if (kill(cres->hr_workerpid, SIGHUP) < 0) {
+                                       pjdlog_errno(LOG_WARNING,
+                                           "Unable to send SIGHUP to worker 
process %u",
+                                           (unsigned int)cres->hr_workerpid);
+                               }
+                       }
+               }
+       }
 
-       /* TODO */
-       pjdlog_warning("Configuration reload is not implemented.");
+       yy_config_free(newcfg);
+       pjdlog_info("Configuration reloaded successfully.");
+       return;
+failed:
+       if (newcfg != NULL) {
+               if (newcfg->hc_controlconn != NULL)
+                       proto_close(newcfg->hc_controlconn);
+               if (newcfg->hc_listenconn != NULL)
+                       proto_close(newcfg->hc_listenconn);
+               yy_config_free(newcfg);
+       }
+       pjdlog_warning("Configuration not reloaded.");
 }
 
 static void
@@ -402,10 +594,6 @@ main_loop(void)
        fd_set rfds, wfds;
        int cfd, lfd, maxfd, ret;
 
-       cfd = proto_descriptor(cfg->hc_controlconn);
-       lfd = proto_descriptor(cfg->hc_listenconn);
-       maxfd = cfd > lfd ? cfd : lfd;
-
        for (;;) {
                if (sigchld_received) {
                        sigchld_received = false;
@@ -416,6 +604,10 @@ main_loop(void)
                        hastd_reload();
                }
 
+               cfd = proto_descriptor(cfg->hc_controlconn);
+               lfd = proto_descriptor(cfg->hc_listenconn);
+               maxfd = cfd > lfd ? cfd : lfd;
+
                /* Setup descriptors for select(2). */
                FD_ZERO(&rfds);
                FD_SET(cfd, &rfds);

Modified: head/sbin/hastd/hastd.h
==============================================================================
--- head/sbin/hastd/hastd.h     Thu Aug  5 19:12:35 2010        (r210885)
+++ head/sbin/hastd/hastd.h     Thu Aug  5 19:16:31 2010        (r210886)
@@ -39,7 +39,8 @@
 
 #include "hast.h"
 
-extern bool sigexit_received;
+extern const char *cfgpath;
+extern bool sigexit_received, sighup_received;
 extern struct pidfh *pfh;
 
 void hastd_primary(struct hast_resource *res);

Modified: head/sbin/hastd/primary.c
==============================================================================
--- head/sbin/hastd/primary.c   Thu Aug  5 19:12:35 2010        (r210885)
+++ head/sbin/hastd/primary.c   Thu Aug  5 19:16:31 2010        (r210886)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2009 The FreeBSD Foundation
+ * Copyright (c) 2010 Pawel Jakub Dawidek <p...@freebsd.org>
  * All rights reserved.
  *
  * This software was developed by Pawel Jakub Dawidek under sponsorship from
@@ -65,6 +66,9 @@ __FBSDID("$FreeBSD$");
 #include "subr.h"
 #include "synch.h"
 
+/* The is only one remote component for now. */
+#define        ISREMOTE(no)    ((no) == 1)
+
 struct hio {
        /*
         * Number of components we are still waiting for.
@@ -424,6 +428,7 @@ init_environment(struct hast_resource *r
         */
        signal(SIGINT, sighandler);
        signal(SIGTERM, sighandler);
+       signal(SIGHUP, sighandler);
 }
 
 static void
@@ -1713,6 +1718,9 @@ sighandler(int sig)
        case SIGTERM:
                sigexit_received = true;
                break;
+       case SIGHUP:
+               sighup_received = true;
+               break;
        default:
                assert(!"invalid condition");
        }
@@ -1726,6 +1734,114 @@ sighandler(int sig)
                mtx_unlock(&hio_guard_lock);
 }
 
+static void
+config_reload(void)
+{
+       struct hastd_config *newcfg;
+       struct hast_resource *res;
+       unsigned int ii, ncomps;
+       int modified;
+
+       pjdlog_info("Reloading configuration...");
+
+       ncomps = HAST_NCOMPONENTS;
+
+       newcfg = yy_config_parse(cfgpath, false);
+       if (newcfg == NULL)
+               goto failed;
+
+       TAILQ_FOREACH(res, &newcfg->hc_resources, hr_next) {
+               if (strcmp(res->hr_name, gres->hr_name) == 0)
+                       break;
+       }
+       /*
+        * If resource was removed from the configuration file, resource
+        * name, provider name or path to local component was modified we
+        * shouldn't be here. This means that someone modified configuration
+        * file and send SIGHUP to us instead of main hastd process.
+        * Log advice and ignore the signal.
+        */
+       if (res == NULL || strcmp(gres->hr_name, res->hr_name) != 0 ||
+           strcmp(gres->hr_provname, res->hr_provname) != 0 ||
+           strcmp(gres->hr_localpath, res->hr_localpath) != 0) {
+               pjdlog_warning("To reload configuration send SIGHUP to the main 
hastd process (pid %u).",
+                   (unsigned int)getppid());
+               goto failed;
+       }
+
+#define MODIFIED_REMOTEADDR    0x1
+#define MODIFIED_REPLICATION   0x2
+#define MODIFIED_TIMEOUT       0x4
+       modified = 0;
+       if (strcmp(gres->hr_remoteaddr, res->hr_remoteaddr) != 0) {
+               /*
+                * Don't copy res->hr_remoteaddr to gres just yet.
+                * We want remote_close() to log disconnect from the old
+                * addresses, not from the new ones.
+                */
+               modified |= MODIFIED_REMOTEADDR;
+       }
+       if (gres->hr_replication != res->hr_replication) {
+               gres->hr_replication = res->hr_replication;
+               modified |= MODIFIED_REPLICATION;
+       }
+       if (gres->hr_timeout != res->hr_timeout) {
+               gres->hr_timeout = res->hr_timeout;
+               modified |= MODIFIED_TIMEOUT;
+       }
+       /*
+        * If only timeout was modified we only need to change it without
+        * reconnecting.
+        */
+       if (modified == MODIFIED_TIMEOUT) {
+               for (ii = 0; ii < ncomps; ii++) {
+                       if (!ISREMOTE(ii))
+                               continue;
+                       rw_rlock(&hio_remote_lock[ii]);
+                       if (!ISCONNECTED(gres, ii)) {
+                               rw_unlock(&hio_remote_lock[ii]);
+                               continue;
+                       }
+                       rw_unlock(&hio_remote_lock[ii]);
+                       if (proto_timeout(gres->hr_remotein,
+                           gres->hr_timeout) < 0) {
+                               pjdlog_errno(LOG_WARNING,
+                                   "Unable to set connection timeout");
+                       }
+                       if (proto_timeout(gres->hr_remoteout,
+                           gres->hr_timeout) < 0) {
+                               pjdlog_errno(LOG_WARNING,
+                                   "Unable to set connection timeout");
+                       }
+               }
+       } else {
+               for (ii = 0; ii < ncomps; ii++) {
+                       if (!ISREMOTE(ii))
+                               continue;
+                       remote_close(gres, ii);
+               }
+               if (modified & MODIFIED_REMOTEADDR) {
+                       strlcpy(gres->hr_remoteaddr, res->hr_remoteaddr,
+                           sizeof(gres->hr_remoteaddr));
+               }
+       }
+#undef MODIFIED_REMOTEADDR
+#undef MODIFIED_REPLICATION
+#undef MODIFIED_TIMEOUT
+
+       pjdlog_info("Configuration reloaded successfully.");
+       return;
+failed:
+       if (newcfg != NULL) {
+               if (newcfg->hc_controlconn != NULL)
+                       proto_close(newcfg->hc_controlconn);
+               if (newcfg->hc_listenconn != NULL)
+                       proto_close(newcfg->hc_listenconn);
+               yy_config_free(newcfg);
+       }
+       pjdlog_warning("Configuration not reloaded.");
+}
+
 /*
  * Thread guards remote connections and reconnects when needed, handles
  * signals, etc.
@@ -1739,14 +1855,16 @@ guard_thread(void *arg)
        int timeout;
 
        ncomps = HAST_NCOMPONENTS;
-       /* The is only one remote component for now. */
-#define        ISREMOTE(no)    ((no) == 1)
 
        for (;;) {
                if (sigexit_received) {
                        primary_exitx(EX_OK,
                            "Termination signal received, exiting.");
                }
+               if (sighup_received) {
+                       sighup_received = false;
+                       config_reload();
+               }
                /*
                 * If all the connection will be fine, we will sleep until
                 * someone wakes us up.
@@ -1810,7 +1928,6 @@ guard_thread(void *arg)
                (void)cv_timedwait(&hio_guard_cond, &hio_guard_lock, timeout);
                mtx_unlock(&hio_guard_lock);
        }
-#undef ISREMOTE
        /* NOTREACHED */
        return (NULL);
 }
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r210886 - head/sbin/hastd

Reply via email to