Package: release.debian.org
Severity: normal
User: release.debian....@packages.debian.org
Usertags: unblock

Please unblock package sbd

New version contains upstream fixes for some usecases and updates
package tests to work with Corosync/Pacemaker versions in buster.

unblock sbd/1.4.0-18-g5e3283c-1

-- System Information:
Debian Release: buster/sid
  APT prefers unstable
  APT policy: (500, 'unstable')
Architecture: amd64 (x86_64)

Kernel: Linux 4.19.0-3-amd64 (SMP w/8 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8), 
LANGUAGE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/bash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled
diff -Nru sbd-1.4.0/debian/changelog sbd-1.4.0-18-g5e3283c/debian/changelog
--- sbd-1.4.0/debian/changelog  2019-01-15 09:25:28.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/changelog      2019-05-08 10:55:44.000000000 
+0200
@@ -1,3 +1,12 @@
+sbd (1.4.0-18-g5e3283c-1) unstable; urgency=medium
+
+  * New upstream version 1.4.0-18-g5e3283c (Closes: #925821)
+  * debian/sbd.lintian-overrides: update manpage line
+  * debian/patches: use /run for PIDFile location
+  * debian/tests: update for corosync v3
+
+ -- Valentin Vidic <vvi...@debian.org>  Wed, 08 May 2019 10:55:44 +0200
+
 sbd (1.4.0-1) unstable; urgency=medium
 
   * New upstream version 1.4.0
diff -Nru sbd-1.4.0/debian/patches/pidfile-in-runstatedir.patch 
sbd-1.4.0-18-g5e3283c/debian/patches/pidfile-in-runstatedir.patch
--- sbd-1.4.0/debian/patches/pidfile-in-runstatedir.patch       1970-01-01 
01:00:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/patches/pidfile-in-runstatedir.patch   
2019-05-08 10:55:20.000000000 +0200
@@ -0,0 +1,28 @@
+Description: Use /run for PIDFile location
+ systemd complains if PIDFile uses /var/run
+Author: Valentin Vidic <vvi...@debian.org>
+Last-Update: 2019-04-26
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- a/src/sbd.service.in
++++ b/src/sbd.service.in
+@@ -10,7 +10,7 @@
+ 
+ [Service]
+ Type=forking
+-PIDFile=@localstatedir@/run/sbd.pid
++PIDFile=@runstatedir@/sbd.pid
+ EnvironmentFile=-@CONFIGDIR@/sbd
+ ExecStart=@sbindir@/sbd $SBD_OPTS -p @localstatedir@/run/sbd.pid watch
+ ExecStop=@bindir@/kill -TERM $MAINPID
+--- a/src/sbd_remote.service.in
++++ b/src/sbd_remote.service.in
+@@ -8,7 +8,7 @@
+ 
+ [Service]
+ Type=forking
+-PIDFile=@localstatedir@/run/sbd.pid
++PIDFile=@runstatedir@/sbd.pid
+ EnvironmentFile=-@CONFIGDIR@/sbd
+ ExecStart=@sbindir@/sbd $SBD_OPTS -p @localstatedir@/run/sbd.pid watch
+ ExecStop=@bindir@/kill -TERM $MAINPID
diff -Nru sbd-1.4.0/debian/patches/series 
sbd-1.4.0-18-g5e3283c/debian/patches/series
--- sbd-1.4.0/debian/patches/series     1970-01-01 01:00:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/patches/series 2019-05-08 10:55:20.000000000 
+0200
@@ -0,0 +1 @@
+pidfile-in-runstatedir.patch
diff -Nru sbd-1.4.0/debian/sbd.lintian-overrides 
sbd-1.4.0-18-g5e3283c/debian/sbd.lintian-overrides
--- sbd-1.4.0/debian/sbd.lintian-overrides      2019-01-15 09:12:00.000000000 
+0100
+++ sbd-1.4.0-18-g5e3283c/debian/sbd.lintian-overrides  2019-05-08 
10:55:01.000000000 +0200
@@ -1 +1 @@
-manpage-has-errors-from-man usr/share/man/man8/sbd.8.gz 185: warning [p 1, 
8.7i]: can't break line
+manpage-has-errors-from-man usr/share/man/man8/sbd.8.gz 189: warning [p 1, 
8.7i]: can't break line
diff -Nru sbd-1.4.0/debian/tests/control 
sbd-1.4.0-18-g5e3283c/debian/tests/control
--- sbd-1.4.0/debian/tests/control      2019-01-15 09:12:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/tests/control  2019-05-08 10:55:28.000000000 
+0200
@@ -14,10 +14,10 @@
 Restrictions: needs-root, allow-stderr, isolation-machine
 Tests: regression
 
-Depends: @, pacemaker, crmsh
+Depends: @
 Restrictions: needs-root, isolation-machine, breaks-testbed
 Tests: fence-external
 
-Depends: @, pacemaker, crmsh, fence-agents
+Depends: @
 Restrictions: needs-root, isolation-machine, breaks-testbed
 Tests: fence-agents
diff -Nru sbd-1.4.0/debian/tests/fence-agents 
sbd-1.4.0-18-g5e3283c/debian/tests/fence-agents
--- sbd-1.4.0/debian/tests/fence-agents 2019-01-15 09:12:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/tests/fence-agents     2019-05-08 
10:55:28.000000000 +0200
@@ -14,21 +14,24 @@
 LOOP=$(losetup --find --show $DISK)
 
 echo "=== create ==="
+hostname node1 # must match corosync for fence to work
 sbd -d $LOOP create
-echo "SBD_OPTS='-d $LOOP -W -W'" > /etc/default/sbd
+sed -i "s|^#\\?\\(SBD_DEVICE=\\).*|\\1$LOOP|" /etc/default/sbd
+sed -i "s|^\\(SBD_WATCHDOG_DEV=\\).*|\\1/dev/null|" /etc/default/sbd
 
 echo "=== cluster ==="
-service corosync start
-service pacemaker start
-sleep 60
+apt-get --yes --quiet install pacemaker crmsh fence-agents
 service sbd status
-crm status
 
-echo "=== crm ==="
-HOSTNAME=$(uname -n)
-crm configure primitive sbd stonith:fence_sbd params devices=$LOOP 
plug=$HOSTNAME sbd_path=/usr/sbin/sbd
+echo -n "Waiting for cluster to start... "
+for x in `seq 60 -1 1`; do echo -n "$x "; sleep 1; done; echo
+crm configure primitive sbd stonith:fence_sbd params devices=$LOOP plug=node1 
sbd_path=/usr/sbin/sbd
 crm configure show
 
+echo -n "Waiting for resource to start... "
+for x in `seq 10 -1 1`; do echo -n "$x "; sleep 1; done; echo
+crm status
+
 echo "=== fence ==="
 /tmp/autopkgtest-reboot-prepare fenced
-crm --force node fence $HOSTNAME
+crm --force node fence node1
diff -Nru sbd-1.4.0/debian/tests/fence-external 
sbd-1.4.0-18-g5e3283c/debian/tests/fence-external
--- sbd-1.4.0/debian/tests/fence-external       2019-01-15 09:12:00.000000000 
+0100
+++ sbd-1.4.0-18-g5e3283c/debian/tests/fence-external   2019-05-08 
10:55:28.000000000 +0200
@@ -14,20 +14,24 @@
 LOOP=$(losetup --find --show $DISK)
 
 echo "=== create ==="
+hostname node1 # must match corosync for fence to work
 sbd -d $LOOP create
-echo "SBD_OPTS='-d $LOOP -W -W'" > /etc/default/sbd
+sed -i "s|^#\\?\\(SBD_DEVICE=\\).*|\\1$LOOP|" /etc/default/sbd
+sed -i "s|^\\(SBD_WATCHDOG_DEV=\\).*|\\1/dev/null|" /etc/default/sbd
 
 echo "=== cluster ==="
-service corosync start
-service pacemaker start
-sleep 60
+apt-get --yes --quiet install pacemaker crmsh
 service sbd status
-crm status
 
-echo "=== crm ==="
+echo -n "Waiting for cluster to start... "
+for x in `seq 60 -1 1`; do echo -n "$x "; sleep 1; done; echo
 crm configure primitive sbd stonith:external/sbd params sbd_device=$LOOP
 crm configure show
 
+echo -n "Waiting for resource to start... "
+for x in `seq 10 -1 1`; do echo -n "$x "; sleep 1; done; echo
+crm status
+
 echo "=== fence ==="
 /tmp/autopkgtest-reboot-prepare fenced
-crm --force node fence $(uname -n)
+crm --force node fence node1
diff -Nru sbd-1.4.0/man/sbd.8.pod sbd-1.4.0-18-g5e3283c/man/sbd.8.pod
--- sbd-1.4.0/man/sbd.8.pod     2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/man/sbd.8.pod 2019-04-16 14:38:22.000000000 +0200
@@ -493,7 +493,7 @@
 introduce an additional single point of failure then.
 
 If the SBD device is not accessible, the daemon will fail to start and
-inhibit openais startup.
+inhibit startup of cluster services.
 
 =item Two devices
 
diff -Nru sbd-1.4.0/src/sbd-cluster.c sbd-1.4.0-18-g5e3283c/src/sbd-cluster.c
--- sbd-1.4.0/src/sbd-cluster.c 2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-cluster.c     2019-04-16 14:38:22.000000000 
+0200
@@ -174,6 +174,25 @@
     return TRUE;
 }
 
+static void
+cmap_destroy(void)
+{
+    if (cmap_source) {
+        g_source_destroy(cmap_source);
+        cmap_source = NULL;
+    }
+
+    if (track_handle) {
+        cmap_track_delete(cmap_handle, track_handle);
+        track_handle = 0;
+    }
+
+    if (cmap_handle) {
+        cmap_finalize(cmap_handle);
+        cmap_handle = 0;
+    }
+}
+
 static gboolean
 sbd_get_two_node(void)
 {
@@ -217,18 +236,7 @@
     return TRUE;
 
 out:
-    if (cmap_source) {
-        g_source_destroy(cmap_source);
-        cmap_source = NULL;
-    }
-    if (track_handle) {
-        cmap_track_delete(cmap_handle, track_handle);
-        track_handle = 0;
-    }
-    if (cmap_handle) {
-        cmap_finalize(cmap_handle);
-        cmap_handle = 0;
-    }
+    cmap_destroy();
 
     return FALSE;
 }
@@ -327,6 +335,12 @@
 {
     cl_log(LOG_WARNING, "Lost connection to %s", 
name_for_cluster_type(get_cluster_type()));
 
+    if (get_cluster_type() != pcmk_cluster_unknown) {
+#if SUPPORT_COROSYNC && CHECK_TWO_NODE
+        cmap_destroy();
+#endif
+    }
+
     set_servant_health(pcmk_health_unclean, LOG_ERR, "Cluster connection 
terminated");
     notify_parent();
 
diff -Nru sbd-1.4.0/src/sbd-common.c sbd-1.4.0-18-g5e3283c/src/sbd-common.c
--- sbd-1.4.0/src/sbd-common.c  2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-common.c      2019-04-16 14:38:22.000000000 
+0200
@@ -568,13 +568,13 @@
 #define IOPRIO_PRIO_DATA(mask)  ((mask) & IOPRIO_PRIO_MASK)
 #define IOPRIO_PRIO_VALUE(class, data)  (((class) << IOPRIO_CLASS_SHIFT) | 
data)
 
-static unsigned char
+static void
 sbd_stack_hogger(unsigned char * inbuf, int kbytes)
 {
     unsigned char buf[1024];
 
     if(kbytes <= 0) {
-        return HOG_CHAR;
+        return;
     }
 
     if (inbuf == NULL) {
@@ -584,10 +584,10 @@
     }
 
     if (kbytes > 0) {
-        return sbd_stack_hogger(buf, kbytes-1);
-    } else {
-        return buf[sizeof(buf)-1];
+        sbd_stack_hogger(buf, kbytes-1);
     }
+
+    return;
 }
 
 static void
diff -Nru sbd-1.4.0/src/sbd.h sbd-1.4.0-18-g5e3283c/src/sbd.h
--- sbd-1.4.0/src/sbd.h 2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd.h     2019-04-16 14:38:22.000000000 +0200
@@ -54,10 +54,13 @@
 /* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */
 
 /* exit status for disk-servant */
-#define EXIT_MD_IO_FAIL             20
-#define EXIT_MD_REQUEST_RESET       21
-#define EXIT_MD_REQUEST_SHUTOFF     22
-#define EXIT_MD_REQUEST_CRASHDUMP   23
+#define EXIT_MD_SERVANT_IO_FAIL             20
+#define EXIT_MD_SERVANT_REQUEST_RESET       21
+#define EXIT_MD_SERVANT_REQUEST_SHUTOFF     22
+#define EXIT_MD_SERVANT_REQUEST_CRASHDUMP   23
+
+/* exit status for pcmk-servant */
+#define EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN 30
 
 #define HOG_CHAR       0xff
 #define SECTOR_NAME_MAX 63
@@ -175,7 +178,7 @@
 int dump_headers(struct servants_list_item *servants);
 unsigned long get_first_msgwait(struct servants_list_item *servants);
 int messenger(const char *name, const char *msg, struct servants_list_item 
*servants);
-int servant(const char *diskname, int mode, const void* argp);
+int servant_md(const char *diskname, int mode, const void* argp);
 #endif
 
 int servant_pcmk(const char *diskname, int mode, const void* argp);
diff -Nru sbd-1.4.0/src/sbd-inquisitor.c 
sbd-1.4.0-18-g5e3283c/src/sbd-inquisitor.c
--- sbd-1.4.0/src/sbd-inquisitor.c      2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-inquisitor.c  2019-04-16 14:38:22.000000000 
+0200
@@ -42,19 +42,36 @@
        struct servants_list_item *newbie;
 
        if (lookup_servant_by_dev(devname)) {
-               cl_log(LOG_DEBUG, "Servant %s already exists", devname);
-               return;
+           cl_log(LOG_DEBUG, "Servant %s already exists", devname);
+           return;
        }
 
        newbie = malloc(sizeof(*newbie));
-       if (!newbie) {
-               fprintf(stderr, "malloc failed in recruit_servant.\n");
-               exit(1);
+       if (newbie) {
+           memset(newbie, 0, sizeof(*newbie));
+           newbie->devname = strdup(devname);
+           newbie->pid = pid;
+           newbie->first_start = 1;
+       }
+       if (!newbie || !newbie->devname) {
+           fprintf(stderr, "heap allocation failed in recruit_servant.\n");
+           exit(1);
+       }
+
+       /* some sanity-check on our newbie */
+       if (sbd_is_disk(newbie)) {
+           cl_log(LOG_INFO, "Monitoring %s", devname);
+           disk_count++;
+       } else if (sbd_is_pcmk(newbie) || sbd_is_cluster(newbie)) {
+           /* alive just after pcmk and cluster servants have shown up */
+           newbie->outdated = 1;
+       } else {
+           /* toss our newbie */
+           cl_log(LOG_ERR, "Refusing to recruit unrecognized servant %s", 
devname);
+           free((void *) newbie->devname);
+           free(newbie);
+           return;
        }
-       memset(newbie, 0, sizeof(*newbie));
-       newbie->devname = strdup(devname);
-       newbie->pid = pid;
-       newbie->first_start = 1;
 
        if (!s) {
                servants_leader = newbie;
@@ -65,12 +82,6 @@
        }
 
        servant_count++;
-        if(sbd_is_disk(newbie)) {
-            cl_log(LOG_INFO, "Monitoring %s", devname);
-            disk_count++;
-        } else {
-            newbie->outdated = 1;
-        }
 }
 
 int assign_servant(const char* devname, functionp_t functionp, int mode, const 
void* argp)
@@ -148,7 +159,7 @@
        if (sbd_is_disk(s)) {
 #if SUPPORT_SHARED_DISK
                DBGLOG(LOG_INFO, "Starting servant for device %s", s->devname);
-               s->pid = assign_servant(s->devname, servant, start_mode, s);
+               s->pid = assign_servant(s->devname, servant_md, start_mode, s);
 #else
                 cl_log(LOG_ERR, "Shared disk functionality not supported");
                 return;
@@ -479,19 +490,19 @@
                                        if (sbd_is_disk(s)) {
                                                if (WIFEXITED(status)) {
                                                        
switch(WEXITSTATUS(status)) {
-                                                               case 
EXIT_MD_IO_FAIL:
+                                                               case 
EXIT_MD_SERVANT_IO_FAIL:
                                                                        
DBGLOG(LOG_INFO, "Servant for %s requests to be disowned",
                                                                                
s->devname);
                                                                        break;
-                                                               case 
EXIT_MD_REQUEST_RESET:
+                                                               case 
EXIT_MD_SERVANT_REQUEST_RESET:
                                                                        
cl_log(LOG_WARNING, "%s requested a reset", s->devname);
                                                                        
do_reset();
                                                                        break;
-                                                               case 
EXIT_MD_REQUEST_SHUTOFF:
+                                                               case 
EXIT_MD_SERVANT_REQUEST_SHUTOFF:
                                                                        
cl_log(LOG_WARNING, "%s requested a shutoff", s->devname);
                                                                        
do_off();
                                                                        break;
-                                                               case 
EXIT_MD_REQUEST_CRASHDUMP:
+                                                               case 
EXIT_MD_SERVANT_REQUEST_CRASHDUMP:
                                                                        
cl_log(LOG_WARNING, "%s requested a crashdump", s->devname);
                                                                        
do_crashdump();
                                                                        break;
@@ -499,6 +510,22 @@
                                                                        break;
                                                        }
                                                }
+                                       } else if (sbd_is_pcmk(s)) {
+                                               if (WIFEXITED(status)) {
+                                                       
switch(WEXITSTATUS(status)) {
+                                                               case 
EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN:
+                                                                       
DBGLOG(LOG_INFO, "PCMK-Servant has exited gracefully");
+                                                                       /* 
revert to state prior to pacemaker-detection */
+                                                                       
s->restarts = 0;
+                                                                       
s->restart_blocked = 0;
+                                                                       
cluster_appeared = 0;
+                                                                       
s->outdated = 1;
+                                                                       
s->t_last.tv_sec = 0;
+                                                                       break;
+                                                               default:
+                                                                       break;
+                                                       }
+                                               }
                                        }
                                        cleanup_servant_by_pid(pid);
                                }
@@ -753,54 +780,56 @@
 int
 parse_device_line(const char *line)
 {
-    int lpc = 0;
-    int last = 0;
-    int max = 0;
+    size_t lpc = 0;
+    size_t last = 0;
+    size_t max = 0;
     int found = 0;
+    bool skip_space = true;
+    int space_run = 0;
 
-    if(line) {
-        max = strlen(line);
+    if (!line) {
+        return 0;
     }
 
-    if (max <= 0) {
-        return found;
-    }
+    max = strlen(line);
 
-    cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", max, line);
-    /* Skip initial whitespace */
-    for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) {
-        last = lpc + 1;
-    }
+    cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", (int) max, line);
 
-    /* Now the actual content */
     for (lpc = 0; lpc <= max; lpc++) {
-        int a_space = isspace(line[lpc]);
-
-        if (a_space && lpc < max && isspace(line[lpc + 1])) {
-            /* fast-forward to the end of the spaces */
-
-        } else if (a_space || line[lpc] == ';' || line[lpc] == 0) {
-            int rc = 1;
-            char *entry = NULL;
+        if (isspace(line[lpc])) {
+            if (skip_space) {
+                last = lpc + 1;
+            } else {
+                space_run++;
+            }
+            continue;
+        }
+        skip_space = false;
+        if (line[lpc] == ';' || line[lpc] == 0) {
+            int rc = 0;
+            char *entry = calloc(1, 1 + lpc - last);
 
-            if (lpc > last) {
-                entry = calloc(1, 1 + lpc - last);
+            if (entry) {
                 rc = sscanf(line + last, "%[^;]", entry);
+            } else {
+                fprintf(stderr, "Heap allocation failed parsing 
device-line.\n");
+                exit(1);
             }
 
-            if (entry == NULL) {
-                /* Skip */
-            } else if (rc != 1) {
-                cl_log(LOG_WARNING, "Could not parse (%d %d): %s", last, lpc, 
line + last);
+            if (rc != 1) {
+                cl_log(LOG_WARNING, "Could not parse: '%s'", line + last);
             } else {
+                entry[strlen(entry)-space_run] = '\0';
                 cl_log(LOG_DEBUG, "Adding '%s'", entry);
                 recruit_servant(entry, 0);
                 found++;
             }
 
             free(entry);
+            skip_space = true;
             last = lpc + 1;
         }
+        space_run = 0;
     }
     return found;
 }
@@ -861,7 +890,7 @@
             int devices = parse_device_line(value);
             if(devices < 1) {
                 fprintf(stderr, "Invalid device line: %s\n", value);
-               exit_status = -2;
+                exit_status = -2;
                 goto out;
             }
 #else
@@ -1059,7 +1088,8 @@
                        break;
                case 'h':
                        usage();
-                       return (0);
+                       goto out;
+                       break;
                default:
                        exit_status = -2;
                        goto out;
@@ -1212,6 +1242,9 @@
         }
         
   out:
+       if (timeout_action) {
+                               free(timeout_action);
+       }
        if (exit_status < 0) {
                if (exit_status == -2) {
                        usage();
diff -Nru sbd-1.4.0/src/sbd-md.c sbd-1.4.0-18-g5e3283c/src/sbd-md.c
--- sbd-1.4.0/src/sbd-md.c      2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-md.c  2019-04-16 14:38:22.000000000 +0200
@@ -162,9 +162,9 @@
 
        memset(&st->io, 0, sizeof(struct iocb));
        if (rw) {
-               io_prep_pwrite(&st->io, st->devfd, data, sector_size, 
sector_size * sector);
+               io_prep_pwrite(&st->io, st->devfd, data, sector_size, (long 
long) sector_size * sector);
        } else {
-               io_prep_pread(&st->io, st->devfd, data, sector_size, 
sector_size * sector);
+               io_prep_pread(&st->io, st->devfd, data, sector_size, (long 
long) sector_size * sector);
        }
 
        if (io_submit(st->ioctx, 1, ios) != 1) {
@@ -373,7 +373,6 @@
        struct sector_header_s  *s_header;
        struct sector_node_s    *s_node;
        struct sector_mbox_s    *s_mbox;
-       struct stat             s;
        char                    uuid[37];
        int                     i;
        int                     rc = 0;
@@ -394,10 +393,6 @@
        uuid_generate(s_header->uuid);
        uuid_unparse_lower(s_header->uuid, uuid);
 
-       fstat(st->devfd, &s);
-       /* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n",
-                       s.st_size, s.st_blksize, s.st_blocks); */
-
        cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: 
%s)",
                        s_header->version, s_header->minor_version,
                        st->devfd, uuid);
@@ -1031,7 +1026,7 @@
        return 0;
 }
 
-int servant(const char *diskname, int mode, const void* argp)
+int servant_md(const char *diskname, int mode, const void* argp)
 {
        struct sector_mbox_s *s_mbox = NULL;
        struct sector_node_s *s_node = NULL;
@@ -1046,11 +1041,6 @@
        char uuid[37];
        const struct servants_list_item *s = argp;
 
-       if (!diskname) {
-               cl_log(LOG_ERR, "Empty disk name %s.", diskname);
-               return -1;
-       }
-
        cl_log(LOG_INFO, "Servant starting for device %s", diskname);
 
        /* Block most of the signals */
@@ -1066,19 +1056,19 @@
 
        st = open_device(diskname, LOG_WARNING);
        if (!st) {
-               exit(EXIT_MD_IO_FAIL);
+               exit(EXIT_MD_SERVANT_IO_FAIL);
        }
 
        s_header = header_get(st);
        if (!s_header) {
                cl_log(LOG_ERR, "Not a valid header on %s", diskname);
-               exit(EXIT_MD_IO_FAIL);
+               exit(EXIT_MD_SERVANT_IO_FAIL);
        }
 
        if (servant_check_timeout_inconsistent(s_header) < 0) {
                cl_log(LOG_ERR, "Timeouts on %s do not match first device",
                                diskname);
-               exit(EXIT_MD_IO_FAIL);
+               exit(EXIT_MD_SERVANT_IO_FAIL);
        }
 
        if (s_header->minor_version > 0) {
@@ -1091,14 +1081,14 @@
                cl_log(LOG_ERR,
                       "No slot allocated, and automatic allocation failed for 
disk %s.",
                       diskname);
-               rc = EXIT_MD_IO_FAIL;
+               rc = EXIT_MD_SERVANT_IO_FAIL;
                goto out;
        }
        s_node = sector_alloc();
        if (slot_read(st, mbox, s_node) < 0) {
                cl_log(LOG_ERR, "Unable to read node entry on %s",
                                diskname);
-               exit(EXIT_MD_IO_FAIL);
+               exit(EXIT_MD_SERVANT_IO_FAIL);
        }
 
        cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname);
@@ -1114,7 +1104,7 @@
                if (mode > 0) {
                        if (mbox_read(st, mbox, s_mbox) < 0) {
                                cl_log(LOG_ERR, "mbox read failed during 
start-up in servant.");
-                               rc = EXIT_MD_IO_FAIL;
+                               rc = EXIT_MD_SERVANT_IO_FAIL;
                                goto out;
                        }
                        if (s_mbox->cmd != SBD_MSG_EXIT &&
@@ -1130,7 +1120,7 @@
                DBGLOG(LOG_INFO, "First servant start - zeroing inbox");
                memset(s_mbox, 0, sizeof(*s_mbox));
                if (mbox_write(st, mbox, s_mbox) < 0) {
-                       rc = EXIT_MD_IO_FAIL;
+                       rc = EXIT_MD_SERVANT_IO_FAIL;
                        goto out;
                }
        }
@@ -1159,28 +1149,28 @@
                s_header_retry = header_get(st);
                if (!s_header_retry) {
                        cl_log(LOG_ERR, "No longer found a valid header on %s", 
diskname);
-                       exit(EXIT_MD_IO_FAIL);
+                       exit(EXIT_MD_SERVANT_IO_FAIL);
                }
                if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) {
                        cl_log(LOG_ERR, "Header on %s changed since start-up!", 
diskname);
-                       exit(EXIT_MD_IO_FAIL);
+                       exit(EXIT_MD_SERVANT_IO_FAIL);
                }
                free(s_header_retry);
 
                s_node_retry = sector_alloc();
                if (slot_read(st, mbox, s_node_retry) < 0) {
                        cl_log(LOG_ERR, "slot read failed in servant.");
-                       exit(EXIT_MD_IO_FAIL);
+                       exit(EXIT_MD_SERVANT_IO_FAIL);
                }
                if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) {
                        cl_log(LOG_ERR, "Node entry on %s changed since 
start-up!", diskname);
-                       exit(EXIT_MD_IO_FAIL);
+                       exit(EXIT_MD_SERVANT_IO_FAIL);
                }
                free(s_node_retry);
 
                if (mbox_read(st, mbox, s_mbox) < 0) {
                        cl_log(LOG_ERR, "mbox read failed in servant.");
-                       exit(EXIT_MD_IO_FAIL);
+                       exit(EXIT_MD_SERVANT_IO_FAIL);
                }
 
                if (s_mbox->cmd > 0) {
@@ -1195,14 +1185,14 @@
                                sigqueue(ppid, SIG_TEST, signal_value);
                                break;
                        case SBD_MSG_RESET:
-                               exit(EXIT_MD_REQUEST_RESET);
+                               exit(EXIT_MD_SERVANT_REQUEST_RESET);
                        case SBD_MSG_OFF:
-                               exit(EXIT_MD_REQUEST_SHUTOFF);
+                               exit(EXIT_MD_SERVANT_REQUEST_SHUTOFF);
                        case SBD_MSG_EXIT:
                                sigqueue(ppid, SIG_EXITREQ, signal_value);
                                break;
                        case SBD_MSG_CRASHDUMP:
-                               exit(EXIT_MD_REQUEST_CRASHDUMP);
+                               exit(EXIT_MD_SERVANT_REQUEST_CRASHDUMP);
                        default:
                                /* FIXME:
                                   An "unknown" message might result
diff -Nru sbd-1.4.0/src/sbd-pacemaker.c 
sbd-1.4.0-18-g5e3283c/src/sbd-pacemaker.c
--- sbd-1.4.0/src/sbd-pacemaker.c       2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-pacemaker.c   2019-04-16 14:38:22.000000000 
+0200
@@ -103,6 +103,9 @@
 
 static long last_refresh = 0;
 
+static int pcmk_clean_shutdown = 0;
+static int pcmk_shutdown = 0;
+
 static gboolean
 mon_timer_reconnect(gpointer data)
 {
@@ -128,10 +131,26 @@
 {
        if (cib) {
                cib->cmds->signoff(cib);
+               /* retrigger as last one might have been skipped */
+               mon_refresh_state(NULL);
+               if (pcmk_clean_shutdown) {
+                       /* assume a graceful pacemaker-shutdown */
+                       clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN);
+               }
+               /* getting here we aren't sure about the pacemaker-state
+                  so try to use the timeout to reconnect and get
+                  everything sorted out again
+                */
+               pcmk_shutdown = 0;
                set_servant_health(pcmk_health_transient, LOG_WARNING, 
"Disconnected from CIB");
                timer_id_reconnect = g_timeout_add(reconnect_msec, 
mon_timer_reconnect, NULL);
        }
        cib_connected = 0;
+       /* no sense in looking into outdated cib, trying to apply patch, ... */
+       if (current_cib) {
+               free_xml(current_cib);
+               current_cib = NULL;
+       }
        return;
 }
 
@@ -171,7 +190,7 @@
 mon_timer_notify(gpointer data)
 {
        static int counter = 0;
-       int counter_max = timeout_watchdog / timeout_loop;
+       int counter_max = timeout_watchdog / timeout_loop / 2;
 
        if (timer_id_notify > 0) {
                g_source_remove(timer_id_notify);
@@ -257,7 +276,7 @@
     static int updates = 0;
     static int ever_had_quorum = FALSE;
 
-    node_t *node = pe_find_node(data_set->nodes, local_uname);
+    node_t *node = NULL;
 
     updates++;
 
@@ -267,11 +286,15 @@
         return;
     }
 
+    node = pe_find_node(data_set->nodes, local_uname);
 
-    if (node == NULL) {
+    if ((node == NULL) || (node->details == NULL)) {
         set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: %s 
is UNKNOWN", local_uname);
+        notify_parent();
+        return;
+    }
 
-    } else if (node->details->online == FALSE) {
+    if (node->details->online == FALSE) {
         set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: 
OFFLINE");
 
     } else if (node->details->unclean) {
@@ -280,11 +303,6 @@
     } else if (node->details->pending) {
         set_servant_health(pcmk_health_pending, LOG_WARNING, "Node state: 
pending");
 
-#if 0
-    } else if (node->details->shutdown) {
-        set_servant_health(pcmk_health_shutdown, LOG_WARNING, "Node state: 
shutting down");
-#endif
-
     } else if (data_set->flags & pe_flag_have_quorum) {
         set_servant_health(pcmk_health_online, LOG_INFO, "Node state: online");
         ever_had_quorum = TRUE;
@@ -315,6 +333,12 @@
         }
     }
 
+    if (node->details->shutdown) {
+        pcmk_shutdown = 1;
+    }
+    if (pcmk_shutdown && !(node->details->running_rsc)) {
+        pcmk_clean_shutdown = 1;
+    }
     notify_parent();
     return;
 }
@@ -339,7 +363,7 @@
         static mainloop_timer_t *refresh_timer = NULL;
 
         if(refresh_timer == NULL) {
-            refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, 
mon_trigger_refresh, NULL);
+            refresh_timer = mainloop_timer_add("refresh", reconnect_msec, 
FALSE, mon_trigger_refresh, NULL);
             refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, 
mon_refresh_state, refresh_timer);
         }
 
@@ -369,9 +393,9 @@
        }
 
     /* Refresh
-     * - immediately if the last update was more than 5s ago
+     * - immediately if the last update was more than 1s ago
      * - every 10 updates
-     * - at most 2s after the last update
+     * - at most 1s after the last update
      */
     if (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000)) {
         mon_refresh_state(refresh_timer);
diff -Nru sbd-1.4.0/src/sbd.sysconfig sbd-1.4.0-18-g5e3283c/src/sbd.sysconfig
--- sbd-1.4.0/src/sbd.sysconfig 2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd.sysconfig     2019-04-16 14:38:22.000000000 
+0200
@@ -68,6 +68,9 @@
 # If your sbd device(s) reside on a multipath setup or iSCSI, this
 # should be the time required to detect a path failure.
 #
+# Be aware that watchdog timeout set in the on-disk metadata takes
+# precedence.
+#
 SBD_WATCHDOG_TIMEOUT=5
 
 ## Type: string

Reply via email to