This is an automated email from the ASF dual-hosted git repository.
bschuchardt pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/geode.git
The following commit(s) were added to refs/heads/develop by this push:
new efcfa93 Revert "GEODE-6244 Healthy member kicked out by sick member"
efcfa93 is described below
commit efcfa9390c77814f7654a37e08fb76b42b782b8d
Author: Bruce Schuchardt <[email protected]>
AuthorDate: Thu Jan 17 15:40:50 2019 -0800
Revert "GEODE-6244 Healthy member kicked out by sick member"
There are unit test failures caused by what I thought were innocuous changes
This reverts commit f4b8cf2f8dbcb98b541b24238b50b4066ff136a8.
---
.../cache/query/dunit/QueryUsingPoolDUnitTest.java | 1 -
.../gms/fd/GMSHealthMonitorJUnitTest.java | 35 +----------
.../distributed/internal/membership/NetView.java | 34 +++++-----
.../membership/gms/fd/GMSHealthMonitor.java | 72 ++++++----------------
.../membership/gms/membership/GMSJoinLeave.java | 3 +-
.../membership/gms/mgr/GMSMembershipManager.java | 2 +-
.../tier/sockets/command/ExecuteFunction66.java | 2 +-
7 files changed, 40 insertions(+), 109 deletions(-)
diff --git
a/geode-core/src/distributedTest/java/org/apache/geode/cache/query/dunit/QueryUsingPoolDUnitTest.java
b/geode-core/src/distributedTest/java/org/apache/geode/cache/query/dunit/QueryUsingPoolDUnitTest.java
index f1e3ace..ab2423a 100644
---
a/geode-core/src/distributedTest/java/org/apache/geode/cache/query/dunit/QueryUsingPoolDUnitTest.java
+++
b/geode-core/src/distributedTest/java/org/apache/geode/cache/query/dunit/QueryUsingPoolDUnitTest.java
@@ -105,7 +105,6 @@ public class QueryUsingPoolDUnitTest extends
JUnit4CacheTestCase {
disconnectAllFromDS();
IgnoredException.addIgnoredException("Connection reset");
IgnoredException.addIgnoredException("Socket input is shutdown");
- IgnoredException.addIgnoredException("Connection refused");
}
@Override
diff --git
a/geode-core/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitorJUnitTest.java
b/geode-core/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitorJUnitTest.java
index f8f8f7f..8001aed 100644
---
a/geode-core/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitorJUnitTest.java
+++
b/geode-core/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitorJUnitTest.java
@@ -150,7 +150,7 @@ public class GMSHealthMonitorJUnitTest {
when(stopper.isCancelInProgress()).thenReturn(false);
if (mockMembers == null) {
- mockMembers = new ArrayList<>();
+ mockMembers = new ArrayList<InternalDistributedMember>();
for (int i = 0; i < 7; i++) {
InternalDistributedMember mbr = new
InternalDistributedMember("localhost", 8888 + i);
@@ -615,39 +615,6 @@ public class GMSHealthMonitorJUnitTest {
assertTrue(gmsHealthMonitor.isSuspectMember(memberToCheck));
}
- /**
- * a failed availablility check should initiate suspect processing
- */
- @Test
- public void testFailedCheckIfAvailableDoesNotRemoveMember() {
- NetView v = installAView();
-
- setFailureDetectionPorts(v);
-
- InternalDistributedMember memberToCheck =
gmsHealthMonitor.getNextNeighbor();
- boolean available = gmsHealthMonitor.checkIfAvailable(memberToCheck, "Not
responding", false);
- assertFalse(available);
- verify(joinLeave, never()).remove(isA(InternalDistributedMember.class),
isA(String.class));
- assertFalse(gmsHealthMonitor.isSuspectMember(memberToCheck));
- }
-
-
- /**
- * Same test as above but with request to initiate removal
- */
- @Test
- public void testFailedCheckIfAvailableRemovesMember() {
- NetView v = installAView();
-
- setFailureDetectionPorts(v);
-
- InternalDistributedMember memberToCheck =
gmsHealthMonitor.getNextNeighbor();
- boolean available = gmsHealthMonitor.checkIfAvailable(memberToCheck, "Not
responding", true);
- assertFalse(available);
- verify(joinLeave).remove(isA(InternalDistributedMember.class),
isA(String.class));
- }
-
-
@Test
public void testShutdown() {
diff --git
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/NetView.java
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/NetView.java
index 6e304e7..c9ed332 100644
---
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/NetView.java
+++
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/NetView.java
@@ -512,23 +512,35 @@ public class NetView implements DataSerializableFixedID {
InternalDistributedMember lead = getLeadMember();
StringBuilder sb = new StringBuilder(200);
-
sb.append("View[").append(creator).append('|').append(viewId).append("]\nmembers:
[");
+ sb.append("View[").append(creator).append('|').append(viewId).append("]
members: [");
+ boolean first = true;
for (InternalDistributedMember mbr : this.members) {
- sb.append("\n").append(mbr);
+ if (!first)
+ sb.append(", ");
+ sb.append(mbr);
if (mbr == lead) {
sb.append("{lead}");
}
+ first = false;
}
if (!this.shutdownMembers.isEmpty()) {
- sb.append("]\nshutdown: [");
+ sb.append("] shutdown: [");
+ first = true;
for (InternalDistributedMember mbr : this.shutdownMembers) {
- sb.append("\n").append(mbr);
+ if (!first)
+ sb.append(", ");
+ sb.append(mbr);
+ first = false;
}
}
if (!this.crashedMembers.isEmpty()) {
- sb.append("]\ncrashed: [");
+ sb.append("] crashed: [");
+ first = true;
for (InternalDistributedMember mbr : this.crashedMembers) {
- sb.append("\n").append(mbr);
+ if (!first)
+ sb.append(", ");
+ sb.append(mbr);
+ first = false;
}
}
// sb.append("] fd ports: [");
@@ -628,14 +640,4 @@ public class NetView implements DataSerializableFixedID {
public int getDSFID() {
return NETVIEW;
}
-
-
- /**
- * This will alter the NetView to throw exceptions if an attempt is made to
- * add or remove members from the view. Do this to avoid concurrent
- * modification exceptions.
- */
- public void makeImmutable() {
- this.members = Collections.unmodifiableList(members);
- }
}
diff --git
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
index cf6e9e5..45bd9c1 100644
---
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
+++
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
@@ -437,7 +437,10 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
if (services.getJoinLeave().isMemberLeaving(mbr)) {
return;
}
- sendSuspectRequest(Collections.singletonList(new SuspectRequest(mbr,
reason)));
+ SuspectRequest sr = new SuspectRequest(mbr, reason);
+ List<SuspectRequest> sl = new ArrayList<>();
+ sl.add(sr);
+ sendSuspectRequest(sl);
}
/**
@@ -579,8 +582,7 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
return false;
}
} catch (SocketTimeoutException e) {
- logger.debug("Availability check TCP/IP connection timed out for suspect
member {}",
- suspectMember);
+ logger.debug("Final check TCP/IP connection timed out for suspect member
{}", suspectMember);
return false;
} catch (IOException e) {
logger.trace("Unexpected exception", e);
@@ -1102,9 +1104,6 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
NetView cv = currentView;
- logger.info("Received suspect message {} with current view {}",
incomingRequest,
- cv == null ? "<no view>" : cv.getViewId());
-
if (cv == null) {
return;
}
@@ -1139,9 +1138,7 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
}
}
- logger.debug(
- "Processing suspect requests {}\nproposed view is currently {}\nwith
coordinator {}",
- suspectRequests, cv, cv.getCoordinator());
+ logger.debug("Processing suspect requests {}", suspectRequests);
if (cv.getCoordinator().equals(localAddress)) {
// This process is the membership coordinator and should perform a final
check
logSuspectRequests(incomingRequest, sender);
@@ -1156,26 +1153,13 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
synchronized (suspectRequestsInView) {
recordSuspectRequests(suspectRequests, cv);
Set<SuspectRequest> suspectsInView = suspectRequestsInView.get(cv);
- logger.info("Current suspects are {}", suspectsInView);
+ logger.debug("Current suspects for view #{} are {}", cv.getViewId(),
suspectsInView);
for (final SuspectRequest sr : suspectsInView) {
check.remove(sr.getSuspectMember());
membersToCheck.add(sr);
}
}
- List membersLeaving = new ArrayList();
- for (InternalDistributedMember member : cv.getMembers()) {
- if (services.getJoinLeave().isMemberLeaving(member)) {
- membersLeaving.add(member);
- }
- }
- if (!membersLeaving.isEmpty()) {
- logger.info("Current leave requests are {}", membersLeaving);
- check.removeAll(membersLeaving);
- }
- logger.info(
- "Proposed view with suspects & leaving members removed is {}\nwith
coordinator {}\nmy address is {}",
- check,
- check.getCoordinator(), localAddress);
+ logger.trace("Trial view with suspects removed is {}\nmy address is {}",
check, localAddress);
InternalDistributedMember coordinator = check.getCoordinator();
if (coordinator != null && coordinator.equals(localAddress)) {
@@ -1233,7 +1217,7 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
}
final String reason = sr.getReason();
- logger.debug("Scheduling availability check for member {}; reason={}",
mbr, reason);
+ logger.debug("Scheduling final check for member {}; reason={}", mbr,
reason);
// its a coordinator
checkExecutor.execute(() -> {
try {
@@ -1248,7 +1232,7 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
}
protected boolean inlineCheckIfAvailable(final InternalDistributedMember
initiator,
- final NetView cv, boolean forceRemovalIfCheckFails, final
InternalDistributedMember mbr,
+ final NetView cv, boolean initiateRemoval, final
InternalDistributedMember mbr,
final String reason) {
if (services.getJoinLeave().isMemberLeaving(mbr)) {
@@ -1257,7 +1241,7 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
boolean failed = false;
- logger.info("Performing availability check for suspect member {}
reason={}", mbr, reason);
+ logger.info("Performing final check for suspect member {} reason={}", mbr,
reason);
membersInFinalCheck.add(mbr);
setNextNeighbor(currentView, mbr);
@@ -1294,36 +1278,17 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
if (!pinged && !isStopping) {
TimeStamp ts = memberTimeStamps.get(mbr);
if (ts == null || ts.getTime() < startTime) {
- logger.info("Availability check failed for member {}", mbr);
- // if the final check fails & this VM is the coordinator we don't
need to do another final
- // check
- if (forceRemovalIfCheckFails) {
+ logger.info("Final check failed for member {}", mbr);
+ if (initiateRemoval) {
logger.info("Requesting removal of suspect member {}", mbr);
services.getJoinLeave().remove(mbr, reason);
- // make sure it is still suspected
- memberSuspected(localAddress, mbr, reason);
- } else {
- // if this node can survive an availability check then initiate
suspicion about
- // the node that failed the availability check
- if (doTCPCheckMember(localAddress, this.socketPort)) {
- membersInFinalCheck.remove(mbr);
- // tell peers about this member and then perform another
availability check
- memberSuspected(localAddress, mbr, reason);
- initiateSuspicion(mbr, reason);
- SuspectMembersMessage suspectMembersMessage =
- new
SuspectMembersMessage(Collections.singletonList(localAddress),
- Collections
- .singletonList(new SuspectRequest(mbr, "failed
availability check")));
- suspectMembersMessage.setSender(localAddress);
- logger.info("Performing local processing on suspect request");
- processSuspectMembersRequest(suspectMembersMessage);
- }
}
+ // make sure it is still suspected
+ memberSuspected(localAddress, mbr, reason);
failed = true;
} else {
logger.info(
- "Availability check failed but detected recent message traffic
for suspect member "
- + mbr);
+ "Final check failed but detected recent message traffic for
suspect member " + mbr);
}
}
@@ -1335,7 +1300,7 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
services.getMessenger().send(message);
}
- logger.info("Availability check passed for suspect member " + mbr);
+ logger.info("Final check passed for suspect member " + mbr);
}
} finally {
if (!failed) {
@@ -1373,7 +1338,6 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
recipients = currentView.getMembers();
}
- logger.info("Sending suspect messages to {}", recipients);
SuspectMembersMessage smm = new SuspectMembersMessage(recipients,
requests);
Set<InternalDistributedMember> failedRecipients;
try {
@@ -1384,7 +1348,7 @@ public class GMSHealthMonitor implements HealthMonitor,
MessageHandler {
}
if (failedRecipients != null && failedRecipients.size() > 0) {
- logger.info("Unable to send suspect message to {}", failedRecipients);
+ logger.info("Unable to send suspect message to {}", recipients);
}
}
diff --git
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
index 39c03cb..5693fbb 100644
---
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
+++
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
@@ -923,7 +923,7 @@ public class GMSJoinLeave implements JoinLeave,
MessageHandler {
getViewCreator().markViewCreatorForShutdown();
this.isCoordinator = false;
}
- installView(new NetView(view, view.getViewId()));
+ installView(view);
}
if (recips.isEmpty()) {
@@ -1397,7 +1397,6 @@ public class GMSJoinLeave implements JoinLeave,
MessageHandler {
}
logger.info("received new view: {}\nold view is: {}", newView,
currentView);
- newView.makeImmutable();
if (currentView == null && !this.isJoined) {
boolean found = false;
diff --git
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
index fedbe58..6ccfcb8 100644
---
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
+++
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
@@ -1671,7 +1671,7 @@ public class GMSMembershipManager implements
MembershipManager, Manager {
@Override
public boolean verifyMember(DistributedMember mbr, String reason) {
return mbr != null && memberExists(mbr)
- && this.services.getHealthMonitor().checkIfAvailable(mbr, reason,
false);
+ && this.services.getHealthMonitor().checkIfAvailable(mbr, reason,
true);
}
/**
diff --git
a/geode-core/src/main/java/org/apache/geode/internal/cache/tier/sockets/command/ExecuteFunction66.java
b/geode-core/src/main/java/org/apache/geode/internal/cache/tier/sockets/command/ExecuteFunction66.java
index 9a04143..20d95bd 100644
---
a/geode-core/src/main/java/org/apache/geode/internal/cache/tier/sockets/command/ExecuteFunction66.java
+++
b/geode-core/src/main/java/org/apache/geode/internal/cache/tier/sockets/command/ExecuteFunction66.java
@@ -164,7 +164,7 @@ public class ExecuteFunction66 extends BaseCommand {
functionObject = internalFunctionExecutionService.getFunction((String)
function);
if (functionObject == null) {
String message = String
- .format("Function named %s is not registered to
FunctionService", function);
+ .format("Function named %s is not registered to FunctionService
for %s", function);
logger.warn("{}: {}", serverConnection.getName(), message);
sendError(hasResult, clientMessage, message, serverConnection);
return;