Hello community,

here is the log from the commit of package booth for openSUSE:Factory checked 
in at 2014-06-16 21:25:34
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/booth (Old)
 and      /work/SRC/openSUSE:Factory/.booth.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "booth"

Changes:
--------
--- /work/SRC/openSUSE:Factory/booth/booth.changes      2014-06-07 
07:12:34.000000000 +0200
+++ /work/SRC/openSUSE:Factory/.booth.new/booth.changes 2014-06-16 
21:25:35.000000000 +0200
@@ -1,0 +2,12 @@
+Sun Jun 15 14:08:50 UTC 2014 - dmuhameda...@suse.com
+
+- raft: preserve term for status messages 
+- raft: ignore VOTE_FOR if we lost the candidate status (bnc#882209) 
+- raft: always accept heartbeats and updates from a valid leader (bnc#882209) 
+- raft: ack revokes (bnc#882057)
+- config: don't allow too long packet retry period 
+- config: don't change ticket defaults in ticket stanzas (use __defaults__) 
+- raft: don't ignore duplicate vote_for packets 
+- upstream version: 4c1e97
+
+-------------------------------------------------------------------

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ booth.tar.bz2 ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/docs/boothd.8.txt new/booth/docs/boothd.8.txt
--- old/booth/docs/boothd.8.txt 2014-06-06 10:53:11.000000000 +0200
+++ new/booth/docs/boothd.8.txt 2014-06-15 16:01:35.000000000 +0200
@@ -177,9 +177,10 @@
 *'ticket'*::
        Registers a ticket. Multiple tickets can be handled by single
        Booth instance.
-
-The next items modify per-ticket defaults. Modifications are
-inherited by tickets which follow in the configuration file.
++
+Use the special ticket name '__defaults__' to modify the
+defaults. The '__defaults__' stanza must precede all the other
+ticket specifications.
 
 All times are in seconds.
 
@@ -220,10 +221,11 @@
 +
 Default is 10. Values lower than 3 are illegal.
 +
-This counts only for a single broadcast; if ticket *renewal* runs
-into this limit (because the network was temporarily down), but
-the ticket is still valid afterwards, a new renewal run will be
-started automatically.
+Ticket *renewal*, which occurs every half expire time, cannot
+must happen after packet resending. Hence, the total retry time
+must be shorter than half the expire time:
+
+       timeout*(retries+1) < expire/2
 
 *'before-acquire-handler'*::
        If set, this command will be called before 'boothd' tries to
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/src/booth.h new/booth/src/booth.h
--- old/booth/src/booth.h       2014-06-06 10:53:11.000000000 +0200
+++ new/booth/src/booth.h       2014-06-15 16:01:35.000000000 +0200
@@ -149,6 +149,7 @@
        OP_REQ_VOTE = CHAR2CONST('R', 'V', 'o', 't'), /* start election */
        OP_VOTE_FOR = CHAR2CONST('V', 't', 'F', 'r'), /* reply to REQ_VOTE */
        OP_HEARTBEAT= CHAR2CONST('H', 'r', 't', 'B'), /* Heartbeat */
+       OP_ACK      = CHAR2CONST('A', 'c', 'k', '.'), /* Ack for heartbeats and 
revokes */
        OP_UPDATE   = CHAR2CONST('U', 'p', 'd', 'E'), /* Update ticket */
        OP_REVOKE   = CHAR2CONST('R', 'e', 'v', 'k'), /* Revoke ticket */
        OP_REJECTED = CHAR2CONST('R', 'J', 'C', '!'),
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/src/config.c new/booth/src/config.c
--- old/booth/src/config.c      2014-06-06 10:53:11.000000000 +0200
+++ new/booth/src/config.c      2014-06-15 16:01:35.000000000 +0200
@@ -154,10 +154,6 @@
                        exit(1);
                }
 
-       log_debug("configured %s: %s, id=%d",
-               (site->type == ARBITRATOR ? "arbitrator" : "site"),
-               site->addr_string, site->site_id);
-
 out:
        return rv;
 }
@@ -214,6 +210,12 @@
        tk = booth_conf->ticket + booth_conf->ticket_count;
        booth_conf->ticket_count++;
 
+       tk->last_valid_tk = malloc(sizeof(struct ticket_config));
+       if (!tk->last_valid_tk) {
+               log_error("out of memory");
+               return -ENOMEM;
+       }
+       memset(tk->last_valid_tk, 0, sizeof(struct ticket_config));
 
        if (!check_max_len_valid(name, sizeof(tk->name))) {
                log_error("ticket name \"%s\" too long.", name);
@@ -241,6 +243,18 @@
        return 0;
 }
 
+static int validate_ticket(struct ticket_config *tk)
+{
+       if (tk->timeout*(tk->retries+1) >= tk->term_duration/2) {
+               tk_log_error("total amount of time to "
+                       "retry sending packets cannot exceed "
+                       "half of the expiry time "
+                       "(%d*(%d+1) >= %d/2)",
+                       tk->timeout, tk->retries, tk->term_duration);
+               return 0;
+       }
+       return 1;
+}
 
 /* returns number of weights, or -1 on bad input. */
 static int parse_weights(const char *input, int weights[MAX_NODES])
@@ -299,7 +313,7 @@
        int lineno = 0;
        int got_transport = 0;
        struct ticket_config defaults = { { 0 } };
-       struct ticket_config *last_ticket = NULL;
+       struct ticket_config *current_tk = NULL;
 
 
        fp = fopen(path, "r");
@@ -471,34 +485,6 @@
                        continue;
                }
 
-               if (strcmp(key, "debug") == 0) {
-                       if (type != CLIENT)
-                               debug_level = max(debug_level, atoi(val));
-                       continue;
-               }
-
-               if (strcmp(key, "ticket") == 0) {
-                       if (add_ticket(val, &last_ticket, &defaults))
-                               goto out;
-
-                       /* last_ticket is valid until another one is needed -
-                        * and then it already has the new address and
-                        * is valid again. */
-                       continue;
-               }
-
-               if (strcmp(key, "expire") == 0) {
-                       defaults.term_duration = strtol(val, &s, 0);
-                       if (*s || s == val || defaults.term_duration<10) {
-                               error = "Expected plain integer value >=10 for 
expire";
-                               goto err;
-                       }
-
-                       if (last_ticket)
-                               last_ticket->term_duration = 
defaults.term_duration;
-                       continue;
-               }
-
                if (strcmp(key, "site-user") == 0) {
                        safe_copy(booth_conf->site_user, optarg, BOOTH_NAME_LEN,
                                        "site-user");
@@ -520,67 +506,85 @@
                        continue;
                }
 
+               if (strcmp(key, "debug") == 0) {
+                       if (type != CLIENT)
+                               debug_level = max(debug_level, atoi(val));
+                       continue;
+               }
+
+               if (strcmp(key, "ticket") == 0) {
+                       if (current_tk && strcmp(current_tk->name, 
"__defaults__")) {
+                               if (!validate_ticket(current_tk)) {
+                                       goto out;
+                               }
+                       }
+                       if (!strcmp(val, "__defaults__")) {
+                               current_tk = &defaults;
+                       } else if (add_ticket(val, &current_tk, &defaults)) {
+                               goto out;
+                       }
+
+                       /* current_tk is valid until another one is needed -
+                        * and then it already has the new address and
+                        * is valid again. */
+                       continue;
+               }
+
+               if (strcmp(key, "expire") == 0) {
+                       current_tk->term_duration = strtol(val, &s, 0);
+                       if (*s || s == val || current_tk->term_duration<10) {
+                               error = "Expected plain integer value >=10 for 
expire";
+                               goto err;
+                       }
+                       continue;
+               }
 
                if (strcmp(key, "timeout") == 0) {
-                       defaults.timeout = strtol(val, &s, 0);
-                       if (*s || s == val || defaults.timeout<1) {
+                       current_tk->timeout = strtol(val, &s, 0);
+                       if (*s || s == val || current_tk->timeout<1) {
                                error = "Expected plain integer value >=1 for 
timeout";
                                goto err;
                        }
-
-                       if (last_ticket)
-                               last_ticket->timeout = defaults.timeout;
                        continue;
                }
 
                if (strcmp(key, "retries") == 0) {
-                       defaults.retries = strtol(val, &s, 0);
-                       if (*s || s == val || defaults.retries<3 || 
defaults.retries > 100) {
+                       current_tk->retries = strtol(val, &s, 0);
+                       if (*s || s == val ||
+                                       current_tk->retries<3 || 
current_tk->retries > 100) {
                                error = "Expected plain integer value in the 
range [3, 100] for retries";
                                goto err;
                        }
-
-                       if (last_ticket)
-                               last_ticket->retries = defaults.retries;
                        continue;
                }
 
                if (strcmp(key, "acquire-after") == 0) {
-                       defaults.acquire_after = strtol(val, &s, 0);
-                       if (*s || s == val || defaults.acquire_after<0) {
+                       current_tk->acquire_after = strtol(val, &s, 0);
+                       if (*s || s == val || current_tk->acquire_after<0) {
                                error = "Expected plain integer value >=1 for 
acquire-after";
                                goto err;
                        }
-
-                       if (last_ticket)
-                               last_ticket->acquire_after = 
defaults.acquire_after;
                        continue;
                }
 
                if (strcmp(key, "before-acquire-handler") == 0) {
-                       defaults.ext_verifier = strdup(val);
-                       if (*s || s == val || defaults.timeout<1) {
-                               error = "Expected plain integer value >=1 for 
timeout";
+                       if (current_tk->ext_verifier) {
+                               free(current_tk->ext_verifier);
+                       }
+                       current_tk->ext_verifier = strdup(val);
+                       if (!current_tk->ext_verifier) {
+                               error = "Out of memory";
                                goto err;
                        }
-
-                       if (last_ticket)
-                               last_ticket->ext_verifier = 
defaults.ext_verifier;
                        continue;
                }
 
-
                if (strcmp(key, "weights") == 0) {
-                       if (parse_weights(val, defaults.weight) < 0)
+                       if (parse_weights(val, current_tk->weight) < 0)
                                goto out;
-
-                       if (last_ticket)
-                               memcpy(last_ticket->weight, defaults.weight,
-                                               sizeof(last_ticket->weight));
                        continue;
                }
 
-
                error = "Unknown item";
                goto out;
        }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/src/config.h new/booth/src/config.h
--- old/booth/src/config.h      2014-06-06 10:53:11.000000000 +0200
+++ new/booth/src/config.h      2014-06-15 16:01:35.000000000 +0200
@@ -129,6 +129,9 @@
         */
        time_t delay_commit;
 
+       /* the last request RPC we sent
+        */
+       uint32_t last_request;
        /* if we expect some acks, then set this to the id of
         * the RPC which others will send us; it is cleared once all
         * replies were received
@@ -151,11 +154,21 @@
        */
        int update_cib;
 
+       /* Is this ticket in election?
+       */
+       int in_election;
+
        /* don't log warnings unnecessarily
         */
        int expect_more_rejects;
        /** \name Needed while proposals are being done.
         * @{ */
+       /* Need to keep the previous valid ticket in case we moved to
+        * start new elections and another server asks for the ticket
+        * status. It would be wrong to send our candidate ticket.
+       */
+       struct ticket_config *last_valid_tk;
+
        /** Whom to vote for the next time.
         * Needed to push a ticket to someone else. */
 
@@ -182,7 +195,6 @@
        /** @} */
 };
 
-
 struct booth_config {
     char name[BOOTH_NAME_LEN];
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/src/inline-fn.h new/booth/src/inline-fn.h
--- old/booth/src/inline-fn.h   2014-06-06 10:53:11.000000000 +0200
+++ new/booth/src/inline-fn.h   2014-06-15 16:01:35.000000000 +0200
@@ -24,7 +24,6 @@
 #include <assert.h>
 #include <string.h>
 #include "config.h"
-#include "ticket.h"
 #include "transport.h"
 
 
@@ -93,6 +92,10 @@
        init_header(&msg->header, cmd, 0, 0, 0, sizeof(*msg));
 }
 
+#define my_last_term(tk) \
+       (((tk)->state == ST_CANDIDATE && (tk)->last_valid_tk->current_term) ? \
+       (tk)->last_valid_tk->current_term : (tk)->current_term)
+
 static inline void init_ticket_msg(struct boothc_ticket_msg *msg,
                int cmd, int rv, int reason,
                struct ticket_config *tk)
@@ -299,13 +302,6 @@
        tk->acks_expected = 0;
 }
 
-static inline int send_heartbeat(struct ticket_config *tk)
-{
-       expect_replies(tk, OP_HEARTBEAT);
-
-       return ticket_broadcast(tk, OP_HEARTBEAT, RLT_SUCCESS, 0);
-}
-
 static inline struct booth_site *my_vote(struct ticket_config *tk)
 {
        return tk->votes_for[ local->index ];
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/src/log.h new/booth/src/log.h
--- old/booth/src/log.h 2014-06-06 10:53:11.000000000 +0200
+++ new/booth/src/log.h 2014-06-15 16:01:35.000000000 +0200
@@ -23,6 +23,7 @@
 
 #include <heartbeat/glue_config.h>
 #include <clplumbing/cl_log.h>
+#include "inline-fn.h"
 
 #define log_debug(fmt, args...)                do { \
        if (ANYDEBUG) cl_log(LOG_DEBUG, fmt, ##args); } \
@@ -35,9 +36,13 @@
  * have the ticket named tk!)
  */
 #define tk_cl_log(sev, fmt, args...)           cl_log(sev, "%s: " fmt, 
tk->name, ##args)
+#define tk_detailed_cl_log(sev, fmt, args...) \
+       cl_log(sev, "%s (%s/%d/%d): " fmt, \
+       tk->name, state_to_string(tk->state), tk->current_term, 
term_time_left(tk), \
+       ##args)
 
 #define tk_log_debug(fmt, args...)             do { \
-       if (ANYDEBUG) tk_cl_log(LOG_DEBUG, fmt, ##args); } \
+       if (ANYDEBUG) tk_detailed_cl_log(LOG_DEBUG, fmt, ##args); } \
        while (0)
 #define tk_log_info(fmt, args...)              tk_cl_log(LOG_INFO, fmt, ##args)
 #define tk_log_warn(fmt, args...)              tk_cl_log(LOG_WARNING, fmt, 
##args)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/src/main.c new/booth/src/main.c
--- old/booth/src/main.c        2014-06-06 10:53:11.000000000 +0200
+++ new/booth/src/main.c        2014-06-15 16:01:35.000000000 +0200
@@ -1206,16 +1206,36 @@
        return 0;
 }
 
+static int lock_fd = -1;
+
+static void server_exit(void)
+{
+       int rv;
+
+       if (lock_fd >= 0) {
+               /* We might not be able to delete it, but at least
+                * make it empty. */
+               rv = ftruncate(lock_fd, 0);
+               (void)rv;
+               unlink_lockfile(lock_fd);
+       }
+       log_info("exiting");
+}
+
+static void sig_exit_handler(int sig)
+{
+       log_info("caught signal %d", sig);
+       exit(0);
+}
 
 static int do_server(int type)
 {
-       int lock_fd = -1;
        int rv = -1;
        static char log_ent[128] = DAEMON_NAME "-";
 
        rv = setup_config(type);
        if (rv < 0)
-               goto out;
+               return rv;
 
 
        if (!local) {
@@ -1236,6 +1256,8 @@
        if (lock_fd < 0)
                return lock_fd;
 
+       atexit(server_exit);
+
        strcat(log_ent, type_to_string(local->type));
        cl_log_set_entity(log_ent);
        cl_log_enable_stderr(enable_stderr ? TRUE : FALSE);
@@ -1247,6 +1269,8 @@
                        local->site_id, local->site_id);
 
        signal(SIGUSR1, (__sighandler_t)tickets_log_info);
+       signal(SIGTERM, (__sighandler_t)sig_exit_handler);
+       signal(SIGINT, (__sighandler_t)sig_exit_handler);
 
        set_scheduler();
        set_oom_adj(-16);
@@ -1268,15 +1292,6 @@
 
        rv = loop(lock_fd);
 
-out:
-       if (lock_fd >= 0) {
-               /* We might not be able to delete it, but at least
-                * make it empty. */
-               rv = ftruncate(lock_fd, 0);
-               (void)rv;
-               unlink_lockfile(lock_fd);
-       }
-
        return rv;
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/src/raft.c new/booth/src/raft.c
--- old/booth/src/raft.c        2014-06-06 10:53:11.000000000 +0200
+++ new/booth/src/raft.c        2014-06-15 16:01:35.000000000 +0200
@@ -71,8 +71,8 @@
                struct booth_site *leader,
                struct boothc_ticket_msg *msg)
 {
-       if (tk->current_term != ntohl(msg->ticket.term)) {
-               return tk->current_term - ntohl(msg->ticket.term);
+       if (my_last_term(tk) != ntohl(msg->ticket.term)) {
+               return my_last_term(tk) - ntohl(msg->ticket.term);
        }
        /* compare commit_index only from the leader */
        if (sender == leader) {
@@ -104,18 +104,17 @@
 
 
 static void update_ticket_from_msg(struct ticket_config *tk,
+               struct booth_site *sender,
                struct boothc_ticket_msg *msg)
 {
        int duration;
 
-       duration = tk->term_duration;
-       if (msg)
-               duration = min(duration, ntohl(msg->ticket.term_valid_for));
+       tk_log_debug("updating from %s (%d/%d)",
+               site_string(sender),
+               ntohl(msg->ticket.term), ntohl(msg->ticket.term_valid_for));
+       duration = min(tk->term_duration, ntohl(msg->ticket.term_valid_for));
        tk->term_expires = time(NULL) + duration;
-
-       if (msg) {
-               update_term_from_msg(tk, msg);
-       }
+       update_term_from_msg(tk, msg);
 }
 
 
@@ -153,7 +152,7 @@
        tk->voted_for = NULL;
 
        tk->commit_index++;
-       send_heartbeat(tk);
+       ticket_broadcast(tk, OP_HEARTBEAT, OP_ACK, RLT_SUCCESS, 0);
        ticket_activate_timeout(tk);
 }
 
@@ -262,7 +261,6 @@
                                        term, tk->current_term,
                                        ticket_leader_string(tk));
                } else {
-                       tk->leader = no_leader;
                        tk_log_debug("from %s: higher term %d vs. %d 
(election)",
                                        site_string(sender),
                                        term, tk->current_term);
@@ -308,26 +306,29 @@
               )
 {
        uint32_t term;
-       struct boothc_ticket_msg omsg;
-
 
        term = ntohl(msg->ticket.term);
-       tk_log_debug("leader: %s, have %s; term %d vs %d",
+       tk_log_debug("heartbeat from leader: %s, have %s; term %d vs %d",
                        site_string(leader), ticket_leader_string(tk),
                        term, tk->current_term);
 
+       if (term < tk->current_term) {
+               if (sender == tk->leader) {
+                       tk_log_info("trusting leader %s with a lower term (%d 
vs %d)",
+                               site_string(leader), term, tk->current_term);
+               } else if (is_owned(tk)) {
+                       tk_log_warn("different leader %s with a lower term "
+                                       "(%d vs %d), sending reject",
+                               site_string(leader), term, tk->current_term);
+                       return send_reject(sender, tk, RLT_TERM_OUTDATED);
+               }
+       }
+
        /* got heartbeat, no rejects expected anymore */
        tk->expect_more_rejects = 0;
 
-       /* if we're candidate, it may be that we got a heartbeat from
-        * a legitimate leader, so don't ignore a lower term
-        */
-       if (tk->state != ST_CANDIDATE && term < tk->current_term) {
-               tk_log_info("ignoring lower term %d vs. %d, from %s",
-                               term, tk->current_term,
-                               ticket_leader_string(tk));
-               return 0;
-       }
+       /* and certainly not in election */
+       tk->in_election = 0;
 
        /* Needed? */
        newer_term(tk, sender, leader, msg, 0);
@@ -339,8 +340,7 @@
        tk->leader = leader;
 
        /* Ack the heartbeat (we comply). */
-       init_ticket_msg(&omsg, OP_HEARTBEAT, RLT_SUCCESS, 0, tk);
-       return booth_udp_send(sender, &omsg, sizeof(omsg));
+       return send_msg(OP_ACK, tk, sender);
 }
 
 
@@ -351,23 +351,17 @@
                struct boothc_ticket_msg *msg
               )
 {
-       uint32_t term;
-
-
-       term = ntohl(msg->ticket.term);
-       tk_log_debug("leader: %s, have %s; term %d vs %d",
-                       site_string(leader), ticket_leader_string(tk),
-                       term, tk->current_term);
-
-       /* No reject. (?) */
-       if (term < tk->current_term) {
-               tk_log_info("ignoring lower term %d vs. %d, from %s",
-                               term, tk->current_term,
-                               ticket_leader_string(tk));
-               return 0;
+       if (is_owned(tk) && sender != tk->leader) {
+               tk_log_warn("different leader %s wants to update "
+                               "our ticket, sending reject",
+                       site_string(leader));
+               return send_reject(sender, tk, RLT_TERM_OUTDATED);
        }
 
-       update_ticket_from_msg(tk, msg);
+       tk_log_debug("leader %s wants to update our ticket",
+                       site_string(leader));
+
+       copy_ticket_from_msg(tk, msg);
        ticket_write(tk);
 
        /* run ticket_cron if the ticket expires */
@@ -383,7 +377,12 @@
                struct boothc_ticket_msg *msg
               )
 {
-       if (tk->leader != sender) {
+       int rv;
+
+       if (tk->state == ST_INIT && tk->leader == no_leader) {
+               /* assume that our ack got lost */
+               rv = send_msg(OP_ACK, tk, sender);
+       } else if (tk->leader != sender) {
                tk_log_error("%s wants to revoke ticket, "
                                "but it is not granted there (ignoring)",
                                site_string(sender));
@@ -400,14 +399,15 @@
                reset_ticket(tk);
                tk->leader = no_leader;
                ticket_write(tk);
+               rv = send_msg(OP_ACK, tk, sender);
        }
 
-       return 0;
+       return rv;
 }
 
 
 /* For leader. */
-static int process_HEARTBEAT(
+static int process_ACK(
                struct ticket_config *tk,
                struct booth_site *sender,
                struct booth_site *leader,
@@ -438,8 +438,8 @@
        }
 
        /* if the ticket is to be revoked, further processing is not
-        * interesting */
-       if (tk->next_state == ST_INIT)
+        * interesting (and dangerous) */
+       if (tk->next_state == ST_INIT || tk->state == ST_INIT)
                return 0;
 
        if (term == tk->current_term &&
@@ -464,14 +464,6 @@
                struct boothc_ticket_msg *msg
                )
 {
-       if (term_too_low(tk, sender, leader, msg))
-               return 0;
-
-       if (newer_term(tk, sender, leader, msg, 0)) {
-               clear_election(tk);
-       }
-
-
        /* leader wants to step down? */
        if (leader == no_leader && sender == tk->leader &&
                        (tk->state == ST_FOLLOWER || tk->state == 
ST_CANDIDATE)) {
@@ -481,14 +473,21 @@
                return new_round(tk, OR_STEPDOWN);
        }
 
-       record_vote(tk, sender, leader);
-
-
        if (tk->state != ST_CANDIDATE) {
                /* lost candidate status, somebody rejected our proposal */
+               tk_log_debug("candidate status lost, ignoring vote_for from %s",
+                       site_string(sender));
                return 0;
        }
 
+       if (term_too_low(tk, sender, leader, msg))
+               return 0;
+
+       if (newer_term(tk, sender, leader, msg, 0)) {
+               clear_election(tk);
+       }
+
+       record_vote(tk, sender, leader);
 
        /* only if all voted can we take the ticket now, otherwise
         * wait for timeout in ticket_cron */
@@ -567,7 +566,7 @@
                        become_follower(tk, msg);
                } else {
                        tk_log_warn("our ticket is outdated and revoked");
-                       update_ticket_from_msg(tk, msg);
+                       update_ticket_from_msg(tk, sender, msg);
                        tk->state = ST_INIT;
                }
                return 0;
@@ -641,7 +640,6 @@
                struct boothc_ticket_msg *msg
                )
 {
-       uint32_t term;
        int valid;
        struct boothc_ticket_msg omsg;
        cmd_result_t inappr_reason;
@@ -650,25 +648,23 @@
        if (inappr_reason)
                return send_reject(sender, tk, inappr_reason);
 
-       term = ntohl(msg->ticket.term);
-       /* Important: Ignore duplicated packets! */
        valid = term_time_left(tk);
-       if (valid &&
-                       term == tk->current_term &&
-                       sender == tk->leader) {
-               tk_log_debug("Duplicate OP_VOTE_FOR ignored.");
-               return 0;
-       }
 
        /* allow the leader to start new elections on valid tickets */
        if (sender != tk->leader && valid) {
-               tk_log_warn("election rejected, term still valid for %ds", 
valid);
+               tk_log_warn("election from %s rejected "
+                       "(we have %s as ticket owner), ticket still valid for 
%ds",
+                       site_string(sender), site_string(tk->leader), valid);
                return send_reject(sender, tk, RLT_TERM_STILL_VALID);
        }
 
        if (term_too_low(tk, sender, leader, msg))
                return 0;
 
+       /* set this, so that we know not to send status for the
+        * ticket */
+       tk->in_election = 1;
+
        /* if it's a newer term or ... */
        if (newer_term(tk, sender, leader, msg, 1)) {
                clear_election(tk);
@@ -704,15 +700,20 @@
        if (now <= tk->election_end)
                return 0;
 
-
        /* ยง5.2 */
        /* If there was _no_ answer, don't keep incrementing the term number
         * indefinitely. If there was no peer, there'll probably be no one
         * listening now either. However, we don't know if we were
         * invoked due to a timeout (caller does).
         */
-       if (update_term)
+       if (update_term) {
+               /* save the previous term, we may need to send out the
+                * MY_INDEX message */
+               if (tk->state != ST_CANDIDATE) {
+                       memcpy(tk->last_valid_tk, tk, sizeof(struct 
ticket_config));
+               }
                tk->current_term++;
+       }
 
        tk->term_expires = 0;
        tk->election_end = now + tk->timeout;
@@ -737,8 +738,7 @@
                tk->election_reason = reason;
        }
 
-       expect_replies(tk, OP_VOTE_FOR);
-       ticket_broadcast(tk, OP_REQ_VOTE, RLT_SUCCESS, reason);
+       ticket_broadcast(tk, OP_REQ_VOTE, OP_VOTE_FOR, RLT_SUCCESS, reason);
        ticket_activate_timeout(tk);
        return 0;
 }
@@ -818,7 +818,10 @@
 
        if (i > 0) {
                /* let them know about our newer ticket */
-               send_msg(OP_MY_INDEX, tk, sender);
+               /* but if we're voting in elections, our ticket is not
+                * valid yet, don't send it */
+               if (!tk->in_election)
+                       send_msg(OP_MY_INDEX, tk, sender);
                if (tk->state == ST_LEADER) {
                        tk_log_info("sending ticket update to %s",
                                        site_string(sender));
@@ -855,7 +858,7 @@
 
        /* their ticket is either newer or not expired, don't
         * ignore it */
-       update_ticket_from_msg(tk, msg);
+       update_ticket_from_msg(tk, sender, msg);
        tk->leader = leader;
        update_ticket_state(tk, sender);
        set_ticket_wakeup(tk);
@@ -865,7 +868,7 @@
 
 int raft_answer(
                struct ticket_config *tk,
-               struct booth_site *from,
+               struct booth_site *sender,
                struct booth_site *leader,
                struct boothc_ticket_msg *msg
               )
@@ -878,56 +881,64 @@
 
        tk_log_debug("got message %s from %s",
                        state_to_string(cmd),
-                       site_string(from));
+                       site_string(sender));
 
 
        switch (cmd) {
        case OP_REQ_VOTE:
-               rv = answer_REQ_VOTE(tk, from, leader, msg);
+               rv = answer_REQ_VOTE(tk, sender, leader, msg);
                break;
        case OP_VOTE_FOR:
-               rv = process_VOTE_FOR(tk, from, leader, msg);
+               rv = process_VOTE_FOR(tk, sender, leader, msg);
                break;
-       case OP_HEARTBEAT:
+       case OP_ACK:
                if (tk->leader == local &&
                                tk->state == ST_LEADER)
-                       rv = process_HEARTBEAT(tk, from, leader, msg);
-               else if (tk->leader != local &&
+                       rv = process_ACK(tk, sender, leader, msg);
+               break;
+       case OP_HEARTBEAT:
+               if (tk->leader != local &&
                                (tk->state == ST_INIT ||tk->state == 
ST_FOLLOWER ||
                                tk->state == ST_CANDIDATE))
-                       rv = answer_HEARTBEAT(tk, from, leader, msg);
+                       rv = answer_HEARTBEAT(tk, sender, leader, msg);
                else {
                        tk_log_warn("unexpected message %s, from %s",
                                state_to_string(cmd),
-                               site_string(from));
+                               site_string(sender));
+                       if (ticket_seems_ok(tk))
+                               send_reject(sender, tk, RLT_TERM_STILL_VALID);
                        rv = -EINVAL;
                }
                break;
        case OP_UPDATE:
-               if (tk->leader != local && tk->state == ST_FOLLOWER) {
-                       rv = process_UPDATE(tk, from, leader, msg);
+               if (tk->leader != local && tk->leader == leader &&
+                               tk->state == ST_FOLLOWER) {
+                       rv = process_UPDATE(tk, sender, leader, msg);
                } else {
                        tk_log_warn("unexpected message %s, from %s",
                                state_to_string(cmd),
-                               site_string(from));
+                               site_string(sender));
+                       if (ticket_seems_ok(tk))
+                               send_reject(sender, tk, RLT_TERM_STILL_VALID);
                        rv = -EINVAL;
                }
                break;
        case OP_REJECTED:
-               rv = process_REJECTED(tk, from, leader, msg);
+               rv = process_REJECTED(tk, sender, leader, msg);
                break;
        case OP_REVOKE:
-               rv = process_REVOKE(tk, from, leader, msg);
+               rv = process_REVOKE(tk, sender, leader, msg);
                break;
        case OP_MY_INDEX:
-               rv = process_MY_INDEX(tk, from, leader, msg);
+               rv = process_MY_INDEX(tk, sender, leader, msg);
                break;
        case OP_STATUS:
-               rv = send_msg(OP_MY_INDEX, tk, from);
+               if (!tk->in_election)
+                       rv = send_msg(OP_MY_INDEX, tk, sender);
                break;
        default:
                tk_log_error("unknown message %s, from %s",
-                       state_to_string(cmd), site_string(from));
+                       state_to_string(cmd), site_string(sender));
                rv = -EINVAL;
        }
        return rv;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/src/ticket.c new/booth/src/ticket.c
--- old/booth/src/ticket.c      2014-06-06 10:53:11.000000000 +0200
+++ new/booth/src/ticket.c      2014-06-15 16:01:35.000000000 +0200
@@ -130,7 +130,7 @@
                        reset_ticket(tk);
                        ticket_write(tk);
                        if (start_election) {
-                               ticket_broadcast(tk, OP_VOTE_FOR, RLT_SUCCESS, 
OR_LOCAL_FAIL);
+                               ticket_broadcast(tk, OP_VOTE_FOR, 0, 
RLT_SUCCESS, OR_LOCAL_FAIL);
                        }
                }
        }
@@ -188,7 +188,8 @@
        reset_ticket(tk);
        tk->leader = no_leader;
        ticket_write(tk);
-       return ticket_broadcast(tk, OP_REVOKE, RLT_SUCCESS, OR_ADMIN);
+       ticket_activate_timeout(tk);
+       return ticket_broadcast(tk, OP_REVOKE, OP_ACK, RLT_SUCCESS, OR_ADMIN);
 }
 
 /** Ticket revoke.
@@ -313,6 +314,12 @@
 
 void update_ticket_state(struct ticket_config *tk, struct booth_site *sender)
 {
+       if (tk->state == ST_CANDIDATE) {
+               tk_log_info("learned from %s about "
+                               "newer ticket, stopping elections",
+                               site_string(sender));
+       }
+
        if (tk->leader == local || tk->is_granted) {
                /* message from a live leader with valid ticket? */
                if (sender == tk->leader && term_time_left(tk)) {
@@ -329,6 +336,9 @@
                        tk->state = ST_FOLLOWER;
                        tk->next_state = ST_FOLLOWER;
                } else {
+                       if (tk->state == ST_CANDIDATE) {
+                               tk->state = ST_FOLLOWER;
+                       }
                        tk->next_state = ST_LEADER;
                }
        } else {
@@ -372,8 +382,7 @@
                /* wait until all send their status (or the first
                 * timeout) */
                tk->start_postpone = 1;
-               expect_replies(tk, OP_MY_INDEX);
-               ticket_broadcast(tk, OP_STATUS, RLT_SUCCESS, 0);
+               ticket_broadcast(tk, OP_STATUS, OP_MY_INDEX, RLT_SUCCESS, 0);
        }
 
        return 0;
@@ -462,7 +471,8 @@
 
 
 int ticket_broadcast(struct ticket_config *tk,
-               cmd_request_t cmd, cmd_result_t res, cmd_reason_t reason)
+               cmd_request_t cmd, cmd_request_t expected_reply,
+               cmd_result_t res, cmd_reason_t reason)
 {
        struct boothc_ticket_msg msg;
 
@@ -472,6 +482,10 @@
                        ntohl(msg.ticket.term),
                        ntohl(msg.ticket.term_valid_for));
 
+       tk->last_request = cmd;
+       if (expected_reply) {
+               expect_replies(tk, expected_reply);
+       }
        return transport()->broadcast(&msg, sizeof(msg));
 }
 
@@ -569,16 +583,16 @@
        int i;
 
        if (!(tk->acks_received ^ local->bitmask)) {
-               ticket_broadcast(tk, tk->acks_expected, RLT_SUCCESS, 0);
+               ticket_broadcast(tk, tk->last_request, 0, RLT_SUCCESS, 0);
        } else {
                for (i = 0; i < booth_conf->site_count; i++) {
                        n = booth_conf->site + i;
                        if (!(tk->acks_received & n->bitmask)) {
                                tk_log_debug("resending %s to %s",
-                                               
state_to_string(tk->acks_expected),
+                                               
state_to_string(tk->last_request),
                                                site_string(n)
                                                );
-                               send_msg(tk->acks_expected, tk, n);
+                               send_msg(tk->last_request, tk, n);
                        }
                }
        }
@@ -598,22 +612,24 @@
        if (!majority_of_bits(tk, tk->acks_received)) {
                ack_cnt = count_bits(tk->acks_received) - 1;
                if (!ack_cnt) {
-                       tk_log_warn("no answers to heartbeat (try #%d), "
+                       tk_log_warn("no answers to our request (try #%d), "
                        "we are alone",
                        tk->retry_number);
                } else {
-                       tk_log_warn("not enough answers to heartbeat (try #%d): 
"
+                       tk_log_warn("not enough answers to our request (try 
#%d): "
                        "only got %d answers",
                        tk->retry_number,
                        ack_cnt);
                }
        } else {
                log_lost_servers(tk);
-               /* we have the majority, update the ticket, at
-                * least the local copy if we're still not
-                * allowed to commit
-                */
-               leader_update_ticket(tk);
+               if (is_owned(tk)) {
+                       /* we have the majority, update the ticket, at
+                        * least the local copy if we're still not
+                        * allowed to commit
+                        */
+                       leader_update_ticket(tk);
+               }
        }
 
        resend_msg(tk);
@@ -647,7 +663,8 @@
                no_resends(tk);
        }
 
-       if (tk->next_state) {
+       /* wanting to be follower is not much of an ambition */
+       if (tk->next_state && tk->next_state != ST_FOLLOWER) {
                switch(tk->next_state) {
                case ST_LEADER:
                        if (tk->state == ST_LEADER) {
@@ -663,7 +680,6 @@
                default:
                        break;
                }
-               tk->next_state = 0;
                tk->start_postpone = 0;
                goto out;
        }
@@ -681,15 +697,17 @@
                }
 
                tk->lost_leader = tk->leader;
-               tk->next_state = 0;
                /* Couldn't renew in time - ticket lost. */
                new_round(tk, OR_TKT_LOST);
-               return;
+               goto out;
        }
 
        switch(tk->state) {
        case ST_INIT:
-               /* init state, nothing to do */
+               /* init state, handle resends for ticket revoke */
+               if (tk->acks_expected) {
+                       handle_resends(tk);
+               }
                break;
 
        case ST_FOLLOWER:
@@ -709,7 +727,7 @@
                } else {
                        /* this is ticket renewal, run local test */
                        if (!test_external_prog(tk, 1)) {
-                               send_heartbeat(tk);
+                               ticket_broadcast(tk, OP_HEARTBEAT, OP_ACK, 
RLT_SUCCESS, 0);
                                ticket_activate_timeout(tk);
                        }
                }
@@ -720,6 +738,7 @@
        }
 
 out:
+       tk->next_state = 0;
        if (tk->update_cib)
                ticket_write(tk);
 }
@@ -940,19 +959,26 @@
 {
        struct boothc_ticket_msg msg;
 
+       tk_log_debug("sending reject to %s",
+                       site_string(dest));
        init_ticket_msg(&msg, OP_REJECTED, code, 0, tk);
        return booth_udp_send(dest, &msg, sizeof(msg));
 }
 
 int send_msg (
                int cmd,
-               struct ticket_config *tk,
+               struct ticket_config *current_tk,
                struct booth_site *dest
               )
 {
+       struct ticket_config *tk = current_tk;
        struct boothc_ticket_msg msg;
 
        if (cmd == OP_MY_INDEX) {
+               if (current_tk->state == ST_CANDIDATE &&
+                               current_tk->last_valid_tk->current_term) {
+                       tk = current_tk->last_valid_tk;
+               }
                tk_log_info("sending status to %s",
                                site_string(dest));
        }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/src/ticket.h new/booth/src/ticket.h
--- old/booth/src/ticket.h      2014-06-06 10:53:11.000000000 +0200
+++ new/booth/src/ticket.h      2014-06-15 16:01:35.000000000 +0200
@@ -72,7 +72,7 @@
 char *state_to_string(uint32_t state_ho);
 int send_reject(struct booth_site *dest, struct ticket_config *tk, 
cmd_result_t code);
 int send_msg (int cmd, struct ticket_config *tk, struct booth_site *dest);
-int ticket_broadcast(struct ticket_config *tk, cmd_request_t cmd, cmd_result_t 
res, cmd_reason_t reason);
+int ticket_broadcast(struct ticket_config *tk, cmd_request_t cmd, 
cmd_request_t expected_reply, cmd_result_t res, cmd_reason_t reason);
 
 int leader_update_ticket(struct ticket_config *tk);
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/booth/test/live_test.sh new/booth/test/live_test.sh
--- old/booth/test/live_test.sh 2014-06-06 10:53:11.000000000 +0200
+++ new/booth/test/live_test.sh 2014-06-15 16:01:35.000000000 +0200
@@ -16,6 +16,7 @@
 shift 1
 logf=test_booth.log
 iprules=/usr/share/booth/tests/test/booth_path
+netif=eth0
 : ${HA_LOGFACILITY:="syslog"}
 
 is_function() {
@@ -96,6 +97,10 @@
        done
        return $rc
 }
+dump_conf() {
+       echo "test configuration file $cnf:"
+       grep -v '^#' $cnf | grep -v '^[[:space:]]*$' | sed "s/^/$cnf: /"
+}
 forall() {
        local h rc=0
        for h in $sites $arbitrators; do
@@ -104,6 +109,14 @@
        done
        return $rc
 }
+forall_sites() {
+       local h rc=0
+       for h in $sites; do
+               ssh $h $@
+               rc=$((rc|$?))
+       done
+       return $rc
+}
 forall_fun() {
        local h rc=0 f=$1
        for h in $sites $arbitrators; do
@@ -168,6 +181,17 @@
        sleep $T_timeout
 }
 
+# tc netem, simulate packet loss, wan, etc
+netem_delay() {
+       tc qdisc add dev $netif root netem delay $1ms $(($1/10))ms
+}
+netem_loss() {
+       tc qdisc add dev $netif root netem loss $1%
+}
+netem_reset() {
+       tc qdisc del dev $netif root netem
+}
+
 cib_status() {
        local h=$1 stat
        stat=`ssh $h crm_ticket -L |
@@ -233,7 +257,10 @@
 
 booth_where_granted() {
        local grantee ticket_line
-       ticket_line=`run_arbitrator 1 booth list | grep $tkt`
+       # we don't know which sites could be stopped, so run booth
+       # list on all of them (at least one should have booth
+       # running)
+       ticket_line=`forall_sites booth list | grep $tkt | sort -u | head -1`
        grantee=`echo "$ticket_line" | sed 's/.*leader: //;s/,.*//'`
        echo $grantee
        [ "$grantee" = "none" ] && return
@@ -318,6 +345,8 @@
 
 sites=`get_servers site < $cnf`
 arbitrators=`get_servers arbitrator < $cnf`
+site_cnt=`echo $sites | wc -w`
+arbitrator_cnt=`echo $arbitrators | wc -w`
 eval `get_tkt_settings`
 
 [ -z "$sites" ] && {
@@ -339,6 +368,8 @@
 # the tests
 #
 
+## TEST: grant ##
+
 # just a grant
 test_grant() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -350,6 +381,32 @@
        check_consistency `get_site 1`
 }
 
+## TEST: grant_noarb ##
+
+# just a grant with no arbitrators
+test_grant_noarb() {
+       run_site 1 booth revoke $tkt >/dev/null
+       wait_timeout
+       local h
+       for h in $arbitrators; do
+               stop_arbitrator $h
+       done >/dev/null 2>&1
+       sleep 1
+       run_site 1 booth grant $tkt >/dev/null
+       wait_timeout
+}
+check_grant_noarb() {
+       check_consistency `get_site 1`
+}
+recover_grant_noarb() {
+       local h
+       for h in $arbitrators; do
+               start_arbitrator $h
+       done >/dev/null 2>&1
+}
+
+## TEST: revoke ##
+
 # just a revoke
 test_revoke() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -363,6 +420,8 @@
        check_consistency
 }
 
+## TEST: grant_elsewhere ##
+
 # just a grant to another site
 test_grant_elsewhere() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -374,6 +433,8 @@
        check_consistency `get_site 2`
 }
 
+## TEST: grant_site_lost ##
+
 # grant with one site lost
 test_grant_site_lost() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -392,7 +453,9 @@
        start_site `get_site 2`
 }
 
-# grant with one site lost
+## TEST: simultaneous_start_even ##
+
+# simultaneous start of even number of members
 test_simultaneous_start_even() {
        local serv
        run_site 1 booth revoke $tkt >/dev/null
@@ -407,14 +470,40 @@
        for serv in $arbitrators; do
                start_arbitrator $serv &
        done
-       wait_timeout
+       wait_half_exp
        start_site `get_site 1`
        wait_timeout
+       wait_timeout
 }
 check_simultaneous_start_even() {
        check_consistency `get_site 2`
 }
 
+## TEST: slow_start_granted ##
+
+# slow start
+test_slow_start_granted() {
+       run_site 1 booth revoke $tkt >/dev/null
+       wait_timeout
+       run_site 1 booth grant $tkt >/dev/null
+       wait_timeout
+       stop_booth
+       wait_timeout
+       for serv in $sites; do
+               start_site $serv
+               wait_timeout
+       done
+       for serv in $arbitrators; do
+               start_arbitrator $serv
+               wait_timeout
+       done
+}
+check_slow_start_granted() {
+       check_consistency `get_site 1`
+}
+
+## TEST: restart_granted ##
+
 # restart with ticket granted
 test_restart_granted() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -428,6 +517,8 @@
        check_consistency `get_site 1`
 }
 
+## TEST: restart_granted_nocib ##
+
 # restart with ticket granted (but cib empty)
 test_restart_granted_nocib() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -443,6 +534,8 @@
        check_consistency `get_site 1`
 }
 
+## TEST: notgranted ##
+
 # restart with ticket not granted
 test_restart_notgranted() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -458,6 +551,8 @@
        check_consistency `get_site 1`
 }
 
+## TEST: failover ##
+
 # ticket failover
 test_failover() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -476,6 +571,8 @@
        start_site `get_site 1`
 }
 
+## TEST: split_leader ##
+
 # split brain (leader alone)
 test_split_leader() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -496,6 +593,8 @@
        run_site 1 $iprules start  >/dev/null
 }
 
+## TEST: split_follower ##
+
 # split brain (follower alone)
 test_split_follower() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -512,6 +611,8 @@
        check_consistency `get_site 1`
 }
 
+## TEST: split_edge ##
+
 # split brain (leader alone)
 test_split_edge() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -527,6 +628,8 @@
        check_consistency any
 }
 
+## TEST: external_prog_failed ##
+
 # external test prog failed
 test_external_prog_failed() {
        run_site 1 booth revoke $tkt >/dev/null
@@ -548,6 +651,25 @@
        [ -n `get_rsc` ]
 }
 
+# packet loss 80%
+test_loss_80() {
+       run_site 1 booth revoke $tkt >/dev/null
+       wait_timeout
+       run_site 1 booth grant $tkt >/dev/null
+       sleep 1
+       netem_loss 80
+       wait_exp
+       wait_exp
+       wait_exp
+       netem_reset
+}
+check_loss_80() {
+       check_consistency `get_site 1`
+}
+applicable_loss_80() {
+       which tc > /dev/null 2>&1
+}
+
 sync_conf || exit
 restart_booth
 all_booth_status || {
@@ -555,10 +677,12 @@
        all_booth_status || exit
 }
 
+dump_conf >&2
+
 TESTS="$@"
 
-: ${TESTS:="grant grant_elsewhere grant_site_lost revoke
-simultaneous_start_even
+: ${TESTS:="grant grant_noarb grant_elsewhere grant_site_lost revoke
+simultaneous_start_even slow_start_granted
 restart_granted restart_granted_nocib restart_notgranted
 failover split_leader split_follower split_edge
 external_prog_failed"}

-- 
To unsubscribe, e-mail: opensuse-commit+unsubscr...@opensuse.org
For additional commands, e-mail: opensuse-commit+h...@opensuse.org

Reply via email to