This is an automated email from the ASF dual-hosted git repository. bschuchardt pushed a commit to branch feature/GEODE-3780 in repository https://gitbox.apache.org/repos/asf/geode.git
The following commit(s) were added to refs/heads/feature/GEODE-3780 by this push: new 38b75a9 GEODE-3780 suspected member is never watched again after passing final check 38b75a9 is described below commit 38b75a90b2164c0dfd3deb8ef21b059befc9168b Author: Bruce Schuchardt <bschucha...@pivotal.io> AuthorDate: Tue Aug 14 13:23:56 2018 -0700 GEODE-3780 suspected member is never watched again after passing final check Changes to address Darrel's review comments --- .../internal/membership/gms/ServiceConfig.java | 27 ++++------------------ .../membership/gms/fd/GMSHealthMonitor.java | 13 +++++++---- .../membership/gms/membership/GMSJoinLeave.java | 4 +++- 3 files changed, 15 insertions(+), 29 deletions(-) diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java index 395e94d..09e2ed8 100644 --- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java +++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java @@ -27,11 +27,12 @@ public class ServiceConfig { public static final long MEMBER_REQUEST_COLLECTION_INTERVAL = Long.getLong(DistributionConfig.GEMFIRE_PREFIX + "member-request-collection-interval", 300); + /** in a small cluster we might want to involve all members in operations */ + public static final int SMALL_CLUSTER_SIZE = 9; + /** various settings from Geode configuration */ private final long joinTimeout; private final int[] membershipPortRange; - private final int udpRecvBufferSize; - private final int udpSendBufferSize; private final long memberTimeout; private Integer lossThreshold; private final Integer memberWeight; @@ -79,12 +80,8 @@ public class ServiceConfig { return networkPartitionDetectionEnabled; } - public void setNetworkPartitionDetectionEnabled(boolean enabled) { - this.networkPartitionDetectionEnabled = enabled; - } - public boolean areLocatorsPreferredAsCoordinators() { - boolean locatorsAreCoordinators = false; + boolean locatorsAreCoordinators; if (networkPartitionDetectionEnabled) { locatorsAreCoordinators = true; @@ -139,24 +136,8 @@ public class ServiceConfig { membershipPortRange = theConfig.getMembershipPortRange(); - udpRecvBufferSize = DistributionConfig.DEFAULT_UDP_RECV_BUFFER_SIZE_REDUCED; - udpSendBufferSize = theConfig.getUdpSendBufferSize(); - memberTimeout = theConfig.getMemberTimeout(); - // The default view-ack timeout in 7.0 is 12347 ms but is adjusted based on the member-timeout. - // We don't want a longer timeout than 12437 because new members will likely time out trying to - // connect because their join timeouts are set to expect a shorter period - int ackCollectionTimeout = theConfig.getMemberTimeout() * 2 * 12437 / 10000; - if (ackCollectionTimeout < 1500) { - ackCollectionTimeout = 1500; - } else if (ackCollectionTimeout > 12437) { - ackCollectionTimeout = 12437; - } - ackCollectionTimeout = Integer - .getInteger(DistributionConfig.GEMFIRE_PREFIX + "VIEW_ACK_TIMEOUT", ackCollectionTimeout) - .intValue(); - lossThreshold = Integer.getInteger(DistributionConfig.GEMFIRE_PREFIX + "network-partition-threshold", 51); if (lossThreshold < 51) diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java index d66155b..f31a0c3 100644 --- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java +++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java @@ -66,6 +66,7 @@ import org.apache.geode.distributed.internal.InternalDistributedSystem; import org.apache.geode.distributed.internal.membership.InternalDistributedMember; import org.apache.geode.distributed.internal.membership.NetView; import org.apache.geode.distributed.internal.membership.gms.GMSMember; +import org.apache.geode.distributed.internal.membership.gms.ServiceConfig; import org.apache.geode.distributed.internal.membership.gms.Services; import org.apache.geode.distributed.internal.membership.gms.interfaces.HealthMonitor; import org.apache.geode.distributed.internal.membership.gms.interfaces.MessageHandler; @@ -1001,8 +1002,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler { @Override public void memberSuspected(InternalDistributedMember initiator, InternalDistributedMember suspect, String reason) { - suspectedMemberIds.putIfAbsent(suspect, currentView); synchronized (suspectRequestsInView) { + suspectedMemberIds.putIfAbsent(suspect, currentView); Collection<SuspectRequest> requests = suspectRequestsInView.get(currentView); boolean found = false; if (requests == null) { @@ -1022,9 +1023,10 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler { } private void memberUnsuspected(InternalDistributedMember mbr) { - logger.info("No longer suspecting {}", mbr); - suspectedMemberIds.remove(mbr); synchronized (suspectRequestsInView) { + if (suspectedMemberIds.remove(mbr) != null) { + logger.info("No longer suspecting {}", mbr); + } Collection<SuspectRequest> suspectRequests = suspectRequestsInView.get(currentView); if (suspectRequests != null) { Collection<SuspectRequest> removals = new ArrayList<>(suspectRequests.size()); @@ -1361,7 +1363,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler { private void sendSuspectRequest(final List<SuspectRequest> requests) { logger.debug("Sending suspect request for members {}", requests); List<InternalDistributedMember> recipients; - if (currentView.size() > 9) { + if (currentView.size() > ServiceConfig.SMALL_CLUSTER_SIZE) { HashSet<InternalDistributedMember> filter = new HashSet<>(); for (Enumeration<InternalDistributedMember> e = suspectedMemberIds.keys(); e .hasMoreElements();) { @@ -1370,7 +1372,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler { filter.addAll( requests.stream().map(SuspectRequest::getSuspectMember).collect(Collectors.toList())); recipients = - currentView.getPreferredCoordinators(filter, services.getJoinLeave().getMemberID(), 10); + currentView.getPreferredCoordinators(filter, services.getJoinLeave().getMemberID(), + ServiceConfig.SMALL_CLUSTER_SIZE + 1); } else { recipients = currentView.getMembers(); } diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java index 45b17bf..7b257ef 100644 --- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java +++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java @@ -65,6 +65,7 @@ import org.apache.geode.distributed.internal.membership.NetMember; import org.apache.geode.distributed.internal.membership.NetView; import org.apache.geode.distributed.internal.membership.gms.GMSMember; import org.apache.geode.distributed.internal.membership.gms.GMSUtil; +import org.apache.geode.distributed.internal.membership.gms.ServiceConfig; import org.apache.geode.distributed.internal.membership.gms.Services; import org.apache.geode.distributed.internal.membership.gms.interfaces.JoinLeave; import org.apache.geode.distributed.internal.membership.gms.interfaces.MessageHandler; @@ -1660,7 +1661,8 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler { processRemoveRequest(msg); if (!this.isCoordinator) { msg.resetRecipients(); - msg.setRecipients(v.getPreferredCoordinators(Collections.emptySet(), localAddress, 10)); + msg.setRecipients(v.getPreferredCoordinators(Collections.emptySet(), localAddress, + ServiceConfig.SMALL_CLUSTER_SIZE + 1)); services.getMessenger().send(msg); } } else {