Hello community, here is the log from the commit of package booth for openSUSE:Factory checked in at 2014-06-07 07:12:33 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/booth (Old) and /work/SRC/openSUSE:Factory/.booth.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "booth" Changes: -------- --- /work/SRC/openSUSE:Factory/booth/booth.changes 2014-05-27 11:52:00.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.booth.new/booth.changes 2014-06-07 07:12:34.000000000 +0200 @@ -1,0 +2,12 @@ +Fri Jun 6 09:04:17 UTC 2014 - dmuhameda...@suse.com + +- raft: don't reset acks prematurely (bnc#881508) +- test: add more tests to live_test.sh +- raft: on server step down reset the ticket state too +- service-runnable: remove crm_simulate's tmp shadow (bnc#879934) +- raft: invalidate ticket if the leader wants to step down +- raft: delay revoke if there's another operation in progress +- main: enable coredumps +- upstream version: 465245 + +------------------------------------------------------------------- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ booth.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/booth/docs/boothd.8.txt new/booth/docs/boothd.8.txt --- old/booth/docs/boothd.8.txt 2014-05-26 13:08:22.000000000 +0200 +++ new/booth/docs/boothd.8.txt 2014-06-06 10:53:11.000000000 +0200 @@ -167,6 +167,13 @@ Booth needs at least three members for normal operation. Odd number of members provides more redundancy. +*'site-user'*, *'site-group'*, *'arbitrator-user'*, *'arbitrator-group'*:: + These define the credentials 'boothd' will be running with. ++ +On a (Pacemaker) site the booth process will have to call 'crm_ticket', so the +default is to use 'hacluster':'haclient'; for an arbitrator this user and group +might not exists, so there we default to 'nobody':'nobody'. + *'ticket'*:: Registers a ticket. Multiple tickets can be handled by single Booth instance. @@ -218,15 +225,6 @@ the ticket is still valid afterwards, a new renewal run will be started automatically. - -*'site-user'*, *'site-group'*, *'arbitrator-user'*, *'arbitrator-group'*:: - These define the credentials 'boothd' will be running with. -+ -On a (Pacemaker) site the booth process will have to call 'crm_ticket', so the -default is to use 'hacluster':'haclient'; for an arbitrator this user and group -might not exists, so there we default to 'nobody':'nobody'. - - *'before-acquire-handler'*:: If set, this command will be called before 'boothd' tries to acquire or renew a ticket. On exit code other than 0, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/booth/script/service-runnable new/booth/script/service-runnable --- old/booth/script/service-runnable 2014-05-26 13:08:22.000000000 +0200 +++ new/booth/script/service-runnable 2014-06-06 10:53:11.000000000 +0200 @@ -7,12 +7,14 @@ set -e service="${1:?Need a resource name as first argument.}" +tmpshadow=`mktemp booth-check.XXXXXX` +trap "rm -f $tmpshadow" EXIT # We expect an output like # p_dummy (ocf::pacemaker:Dummy): Started geo-rz2-a -if crm_simulate --ticket-grant "$BOOTH_TICKET" --simulate --live-check | +if crm_simulate -O $tmpshadow --ticket-grant "$BOOTH_TICKET" --simulate --live-check | sed -n '/^Revised cluster status:/,$p' | egrep "^[[:space:]]+$service[[:space:]]+\(.*\):[[:space:]]+Started ([^[:space:]]+) *$" >/dev/null then diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/booth/src/config.c new/booth/src/config.c --- old/booth/src/config.c 2014-05-26 13:08:22.000000000 +0200 +++ new/booth/src/config.c 2014-06-06 10:53:11.000000000 +0200 @@ -46,7 +46,7 @@ p = realloc(booth_conf->ticket, sizeof(struct ticket_config) * want); - if (!booth_conf) { + if (!p) { log_error("can't alloc more tickets"); return -ENOMEM; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/booth/src/inline-fn.h new/booth/src/inline-fn.h --- old/booth/src/inline-fn.h 2014-05-26 13:08:22.000000000 +0200 +++ new/booth/src/inline-fn.h 2014-06-06 10:53:11.000000000 +0200 @@ -297,7 +297,6 @@ { tk->retry_number = 0; tk->acks_expected = 0; - tk->acks_received = 0; } static inline int send_heartbeat(struct ticket_config *tk) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/booth/src/main.c new/booth/src/main.c --- old/booth/src/main.c 2014-05-26 13:08:22.000000000 +0200 +++ new/booth/src/main.c 2014-06-06 10:53:11.000000000 +0200 @@ -34,6 +34,8 @@ #include <sys/poll.h> #include <pacemaker/crm/services.h> #include <clplumbing/setproctitle.h> +#include <sys/prctl.h> +#include <clplumbing/coredumps.h> #include <fcntl.h> #include <string.h> #include <assert.h> @@ -1240,7 +1242,6 @@ cl_log_set_facility(HA_LOG_FACILITY); cl_inherit_logging_environment(0); - log_info("BOOTH %s daemon is starting, node id is 0x%08X (%d).", type_to_string(local->type), local->site_id, local->site_id); @@ -1259,6 +1260,11 @@ if (rv) return rv; + if (cl_enable_coredumps(TRUE) < 0){ + cl_log(LOG_ERR, "enabling core dump failed"); + } + cl_cdtocoredir(); + prctl(PR_SET_DUMPABLE, (unsigned long)TRUE, 0UL, 0UL, 0UL); rv = loop(lock_fd); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/booth/src/raft.c new/booth/src/raft.c --- old/booth/src/raft.c 2014-05-26 13:08:22.000000000 +0200 +++ new/booth/src/raft.c 2014-06-06 10:53:11.000000000 +0200 @@ -232,8 +232,6 @@ } else if (new_leader) { tk_log_info("ticket granted at %s", site_string(new_leader)); - become_follower(tk, NULL); - set_ticket_wakeup(tk); } else { tk_log_info("nobody won elections, new elections"); new_election(tk, NULL, is_tie(tk), OR_AGAIN); @@ -439,6 +437,10 @@ return 0; } + /* if the ticket is to be revoked, further processing is not + * interesting */ + if (tk->next_state == ST_INIT) + return 0; if (term == tk->current_term && leader == tk->leader) { @@ -475,6 +477,7 @@ (tk->state == ST_FOLLOWER || tk->state == ST_CANDIDATE)) { tk_log_info("%s wants to give the ticket away", site_string(tk->leader)); + time(&tk->term_expires); return new_round(tk, OR_STEPDOWN); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/booth/src/ticket.c new/booth/src/ticket.c --- old/booth/src/ticket.c 2014-05-26 13:08:22.000000000 +0200 +++ new/booth/src/ticket.c 2014-06-06 10:53:11.000000000 +0200 @@ -127,7 +127,8 @@ * Just send a VOTE_FOR message, so the * others can start elections. */ if (leader_and_valid(tk)) { - disown_ticket(tk); + reset_ticket(tk); + ticket_write(tk); if (start_election) { ticket_broadcast(tk, OP_VOTE_FOR, RLT_SUCCESS, OR_LOCAL_FAIL); } @@ -180,9 +181,7 @@ } -/** Ticket revoke. - * Only to be started from the leader. */ -int do_revoke_ticket(struct ticket_config *tk) +static int start_revoke_ticket(struct ticket_config *tk) { tk_log_info("revoking ticket"); @@ -192,6 +191,19 @@ return ticket_broadcast(tk, OP_REVOKE, RLT_SUCCESS, OR_ADMIN); } +/** Ticket revoke. + * Only to be started from the leader. */ +int do_revoke_ticket(struct ticket_config *tk) +{ + if (tk->acks_expected) { + tk_log_info("delay ticket revoke until the current operation finishes"); + tk->next_state = ST_INIT; + return 0; + } else { + return start_revoke_ticket(tk); + } +} + int list_ticket(char **pdata, unsigned int *len) { @@ -270,7 +282,7 @@ valid = (tk->term_expires >= time(NULL)); - if (tk->is_granted || tk->leader == local) { + if (tk->leader == local) { where_granted = "granted here"; } else { snprintf(buff, sizeof(buff), "granted to %s", @@ -304,9 +316,15 @@ if (tk->leader == local || tk->is_granted) { /* message from a live leader with valid ticket? */ if (sender == tk->leader && term_time_left(tk)) { - tk_log_info("ticket was granted here, " - "but it's live at %s (revoking here)", - site_string(sender)); + if (tk->is_granted) { + tk_log_warn("ticket was granted here, " + "but it's live at %s (revoking here)", + site_string(sender)); + } else { + tk_log_info("ticket live at %s", + site_string(sender)); + } + disown_ticket(tk); ticket_write(tk); tk->state = ST_FOLLOWER; tk->next_state = ST_FOLLOWER; @@ -630,12 +648,20 @@ } if (tk->next_state) { - if (tk->next_state == ST_LEADER) { + switch(tk->next_state) { + case ST_LEADER: if (tk->state == ST_LEADER) { new_round(tk, OR_SPLIT); } else { reacquire_ticket(tk); } + break; + case ST_INIT: + no_resends(tk); + start_revoke_ticket(tk); + break; + default: + break; } tk->next_state = 0; tk->start_postpone = 0; @@ -770,6 +796,7 @@ /* got an ack! */ tk->acks_received |= sender->bitmask; + if (cmd == OP_HEARTBEAT) tk_log_debug("got ACK from %s, %d/%d agree.", site_string(sender), count_bits(tk->acks_received), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/booth/test/live_test.sh new/booth/test/live_test.sh --- old/booth/test/live_test.sh 2014-05-26 13:08:22.000000000 +0200 +++ new/booth/test/live_test.sh 2014-06-06 10:53:11.000000000 +0200 @@ -16,6 +16,7 @@ shift 1 logf=test_booth.log iprules=/usr/share/booth/tests/test/booth_path +: ${HA_LOGFACILITY:="syslog"} is_function() { test z"`command -v $1`" = z"$1" @@ -56,6 +57,16 @@ booth_status() { test "`get_stat_fld $1 booth_state`" = "started" } +stop_booth() { + local h + for h in $sites; do + stop_site $h + done >/dev/null 2>&1 + for h in $arbitrators; do + stop_arbitrator $h + done >/dev/null 2>&1 + wait_timeout +} start_booth() { local h for h in $sites; do @@ -117,8 +128,7 @@ ssh $h $@ } get_site() { - local n=$1 h - shift 1 + local n=$1 echo $sites | awk '{print $'$n'}' } @@ -126,7 +136,16 @@ grep "^$1" | sed -n 's/.*="//;s/"//p' } +get_rsc() { + awk '/before-acquire-handler/{print $NF}' $cnf +} +break_external_prog() { + echo "location __pref_booth_live_test `get_rsc` rule -inf: defined #uname" | run_site 1 crm configure +} +repair_external_prog() { + run_site $1 crm configure delete __pref_booth_live_test +} get_tkt_settings() { awk ' n && /^ / && /expire|timeout/ { @@ -206,7 +225,7 @@ CIB check failed CIB grantee: $cib_grantee booth grantee: $booth_grantee -expected grantee: $booth_grantee +expected grantee: $exp_grantee EOF fi return $rc @@ -217,6 +236,7 @@ ticket_line=`run_arbitrator 1 booth list | grep $tkt` grantee=`echo "$ticket_line" | sed 's/.*leader: //;s/,.*//'` echo $grantee + [ "$grantee" = "none" ] && return ! ssh $grantee booth list | grep -q "$tkt.*pending" } check_booth_consistency() { @@ -245,25 +265,45 @@ run_site 1 booth revoke $tkt >/dev/null wait_timeout } -test_booth_status() { +all_booth_status() { forall_fun booth_status } +can_run_test() { + if is_function applicable_$1; then + if ! applicable_$1; then + echo "(not applicable, skipping)" + return 1 + fi + fi + if ! is_function test_$1 || ! is_function check_$1; then + echo "(test missing)" + return 1 + fi +} runtest() { - local start_ts end_ts rc + local start_ts end_ts rc booth_status local start_time end_time + TEST=$1 start_time=`date` start_ts=`date +%s` echo -n "Testing: $1... " + can_run_test $1 || return 0 + logger -p $HA_LOGFACILITY.info "starting booth test $1 ..." test_$1 && check_$1 rc=$? end_time=`date` end_ts=`date +%s` + logger -p $HA_LOGFACILITY.info "finished booth test $1 (exit code $rc)" is_function recover_$1 && recover_$1 - if [ $rc -eq 0 ]; then + all_booth_status + booth_status=$? + if [ $rc -eq 0 -a $booth_status -eq 0 ]; then echo OK else echo "FAIL (running hb_report ... $1.tar.bz2; see also $logf)" + [ $booth_status -ne 0 ] && + echo "unexpected: some booth daemons not running" echo "running hb_report" >&2 hb_report -f "`date -d @$((start_ts-5))`" \ -t "`date -d @$((end_ts+60))`" \ @@ -352,6 +392,29 @@ start_site `get_site 2` } +# grant with one site lost +test_simultaneous_start_even() { + local serv + run_site 1 booth revoke $tkt >/dev/null + wait_timeout + run_site 2 booth grant $tkt >/dev/null + wait_timeout + stop_booth + wait_timeout + for serv in $(echo $sites | sed "s/`get_site 1` //"); do + start_site $serv & + done + for serv in $arbitrators; do + start_arbitrator $serv & + done + wait_timeout + start_site `get_site 1` + wait_timeout +} +check_simultaneous_start_even() { + check_consistency `get_site 2` +} + # restart with ticket granted test_restart_granted() { run_site 1 booth revoke $tkt >/dev/null @@ -371,7 +434,9 @@ wait_timeout run_site 1 booth grant $tkt >/dev/null wait_timeout - restart_site `get_site 1` + stop_site_clean `get_site 1` || return 1 + wait_timeout + start_site `get_site 1` wait_timeout } check_restart_granted_nocib() { @@ -406,6 +471,8 @@ } check_failover() { check_consistency any +} +recover_failover() { start_site `get_site 1` } @@ -460,19 +527,41 @@ check_consistency any } +# external test prog failed +test_external_prog_failed() { + run_site 1 booth revoke $tkt >/dev/null + wait_timeout + run_site 1 booth grant $tkt >/dev/null + sleep 1 + break_external_prog 1 + wait_half_exp + wait_timeout +} +check_external_prog_failed() { + check_consistency any && + [ `booth_where_granted` != `get_site 1` ] +} +recover_external_prog_failed() { + repair_external_prog 1 +} +applicable_external_prog_failed() { + [ -n `get_rsc` ] +} + sync_conf || exit restart_booth -test_booth_status || { +all_booth_status || { reset_booth - test_booth_status || exit + all_booth_status || exit } TESTS="$@" : ${TESTS:="grant grant_elsewhere grant_site_lost revoke +simultaneous_start_even restart_granted restart_granted_nocib restart_notgranted -failover -split_leader split_follower split_edge"} +failover split_leader split_follower split_edge +external_prog_failed"} for t in $TESTS; do runtest $t -- To unsubscribe, e-mail: opensuse-commit+unsubscr...@opensuse.org For additional commands, e-mail: opensuse-commit+h...@opensuse.org