If there is never a leader known by the current server, it's status should be "disconnected" to the cluster. Without this patch, when a server in cluster is restarted, before it successfully connecting back to the cluster it will appear as connected, which is wrong.
Signed-off-by: Han Zhou <hz...@ovn.org> --- ovsdb/raft.c | 10 ++++++++-- tests/ovsdb-cluster.at | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 4789bc4..6cd7b00 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -298,6 +298,11 @@ struct raft { bool had_leader; /* There has been leader elected since last election initiated. This is to help setting candidate_retrying. */ + + /* For all. */ + bool ever_had_leader; /* There has been leader elected since the raft + is initialized, meaning it is ever + connected. */ }; /* All Raft structures. */ @@ -1024,7 +1029,8 @@ raft_is_connected(const struct raft *raft) && !raft->joining && !raft->leaving && !raft->left - && !raft->failed); + && !raft->failed + && raft->ever_had_leader); VLOG_DBG("raft_is_connected: %s\n", ret? "true": "false"); return ret; } @@ -2519,7 +2525,7 @@ static void raft_set_leader(struct raft *raft, const struct uuid *sid) { raft->leader_sid = *sid; - raft->had_leader = true; + raft->ever_had_leader = raft->had_leader = true; raft->candidate_retrying = false; } diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index 5b6188b..0aa4564 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -179,6 +179,41 @@ AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) ovsdb_test_cluster_disconnect 5 leader yes AT_CLEANUP +AT_SETUP([OVSDB cluster - initial status should be disconnected]) +AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) + +n=3 +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +ordinal_schema > schema +AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) +cid=`ovsdb-tool db-cid s1.db` +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +for i in `seq 2 $n`; do + AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) +done + +on_exit 'kill `cat *.pid`' +for i in `seq $n`; do + AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) +done +for i in `seq $n`; do + AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) +done + +# Stop all servers, and start the s1 only, to test initial connection status +# when there is no leader yet. +for i in `seq 1 $n`; do + OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) +done +i=1 +AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) + +# The initial status should be disconnected. So wait should fail. +AT_CHECK([ovsdb_client_wait --timeout=1 unix:s$i.ovsdb $schema_name connected], [142], [ignore], [ignore]) +OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) + +AT_CLEANUP + AT_BANNER([OVSDB cluster election timer change]) -- 2.1.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev