This is an automated email from the ASF dual-hosted git repository. echobravo pushed a commit to branch revert/1.13 in repository https://gitbox.apache.org/repos/asf/geode.git
commit 2249dd163948be472b72dd0d0204b6a5952fc181 Author: Ernest Burghardt <eburgha...@pivotal.io> AuthorDate: Tue Jun 22 13:50:05 2021 -0500 Revert "GEODE-7861: Improve error reporting in GMSJoinLeave.join() (#5839)" This reverts commit 53d32c325c808d6e965a45cdf36aca1a71db2183. --- .../apache/geode/distributed/LocatorDUnitTest.java | 3 +- .../gms/membership/GMSJoinLeaveJUnitTest.java | 100 +++++---------------- .../internal/membership/gms/GMSMembership.java | 7 +- .../membership/gms/interfaces/JoinLeave.java | 12 +-- .../membership/gms/membership/GMSJoinLeave.java | 76 ++++++---------- 5 files changed, 55 insertions(+), 143 deletions(-) diff --git a/geode-core/src/distributedTest/java/org/apache/geode/distributed/LocatorDUnitTest.java b/geode-core/src/distributedTest/java/org/apache/geode/distributed/LocatorDUnitTest.java index e0f8966..d3c1733 100644 --- a/geode-core/src/distributedTest/java/org/apache/geode/distributed/LocatorDUnitTest.java +++ b/geode-core/src/distributedTest/java/org/apache/geode/distributed/LocatorDUnitTest.java @@ -1002,8 +1002,7 @@ public class LocatorDUnitTest implements Serializable { } catch (GemFireConfigException ex) { String s = ex.getMessage(); - assertThat(s.contains("Could not contact any of the locators")) - .isTrue(); + assertThat(s.contains("Locator does not exist")).isTrue(); } } diff --git a/geode-membership/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java b/geode-membership/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java index fd15db6..3a86a1e 100644 --- a/geode-membership/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java +++ b/geode-membership/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java @@ -22,14 +22,12 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Matchers.isA; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; -import static org.mockito.Mockito.spy; import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -117,18 +115,11 @@ public class GMSJoinLeaveJUnitTest { public void initMocks(boolean enableNetworkPartition, boolean useTestGMSJoinLeave) throws Exception { - String locator = "localhost[12345]"; - initMocks(enableNetworkPartition, useTestGMSJoinLeave, locator, locator); - } - - public void initMocks(boolean enableNetworkPartition, boolean useTestGMSJoinLeave, - String locators, String startLocator) - throws Exception { mockConfig = mock(MembershipConfig.class); when(mockConfig.isNetworkPartitionDetectionEnabled()).thenReturn(enableNetworkPartition); when(mockConfig.getSecurityUDPDHAlgo()).thenReturn(""); - when(mockConfig.getStartLocator()).thenReturn(startLocator); - when(mockConfig.getLocators()).thenReturn(locators); + when(mockConfig.getStartLocator()).thenReturn("localhost[12345]"); + when(mockConfig.getLocators()).thenReturn("localhost[12345]"); when(mockConfig.getMcastPort()).thenReturn(0); when(mockConfig.getMemberTimeout()).thenReturn(2000L); @@ -1432,7 +1423,14 @@ public class GMSJoinLeaveJUnitTest { @Test public void testCoordinatorFindRequestSuccess() throws Exception { initMocks(false); - mockRequestToServer(isA(HostAndPort.class)); + HashSet<MemberIdentifier> registrants = new HashSet<>(); + registrants.add(mockMembers[0]); + FindCoordinatorResponse fcr = new FindCoordinatorResponse(mockMembers[0], mockMembers[0], false, + null, registrants, false, true, null); + + when(locatorClient.requestToServer(isA(HostAndPort.class), + isA(FindCoordinatorRequest.class), anyInt(), anyBoolean())) + .thenReturn(fcr); boolean foundCoordinator = gmsJoinLeave.findCoordinator(); assertTrue(gmsJoinLeave.searchState.toString(), foundCoordinator); @@ -1443,80 +1441,22 @@ public class GMSJoinLeaveJUnitTest { public void testCoordinatorFindRequestFailure() throws Exception { try { initMocks(false); - mockRequestToServer(eq(new HostAndPort("localhost", 12346))); + HashSet<MemberIdentifier> registrants = new HashSet<>(); + registrants.add(mockMembers[0]); + FindCoordinatorResponse fcr = new FindCoordinatorResponse(mockMembers[0], mockMembers[0], + false, null, registrants, false, true, null); GMSMembershipView view = createView(); JoinResponseMessage jrm = new JoinResponseMessage(mockMembers[0], view, 0); gmsJoinLeave.setJoinResponseMessage(jrm); - assertThatThrownBy(gmsJoinLeave::join) - .isInstanceOf(MembershipConfigurationException.class); - } finally { - } - } - - @Test - public void testJoinFailureWhenSleepInterrupted() throws Exception { - initMocks(false); - mockRequestToServer(isA(HostAndPort.class)); - - when(mockConfig.getMemberTimeout()).thenReturn(100L); - when(mockConfig.getJoinTimeout()).thenReturn(1000L); - - GMSJoinLeave spyGmsJoinLeave = spy(gmsJoinLeave); - when(spyGmsJoinLeave.pauseIfThereIsNoCoordinator(-1, GMSJoinLeave.JOIN_RETRY_SLEEP)) - .thenThrow(new InterruptedException()); - - assertThatThrownBy(spyGmsJoinLeave::join) - .isInstanceOf(MembershipConfigurationException.class) - .hasMessageContaining("Retry sleep interrupted"); - } - - @Test - public void testJoinFailureWhenTimeout() throws Exception { - initMocks(false); - mockRequestToServer(isA(HostAndPort.class)); - - assertThatThrownBy(() -> gmsJoinLeave.join()) - .isInstanceOf(MembershipConfigurationException.class) - .hasMessageContaining("Operation timed out"); - } - - @Test - public void testPauseIfThereIsNoCoordinator() throws InterruptedException { - locatorClient = mock(TcpClient.class); - gmsJoinLeave = new GMSJoinLeave(locatorClient); - assertThat(gmsJoinLeave.pauseIfThereIsNoCoordinator(-1, GMSJoinLeave.JOIN_RETRY_SLEEP)) - .isFalse(); - assertThat(gmsJoinLeave.pauseIfThereIsNoCoordinator(1, GMSJoinLeave.JOIN_RETRY_SLEEP)).isTrue(); - } - - @Test - public void testJoinFailureWhenNoLocator() throws Exception { - final String locator1 = "locator1[12345]"; - final String locator2 = "locator2[54321]"; - locatorClient = mock(TcpClient.class); + when(locatorClient.requestToServer(eq(new HostAndPort("localhost", 12346)), + isA(FindCoordinatorRequest.class), anyInt(), anyBoolean())) + .thenReturn(fcr); - initMocks(false, false, locator1 + ',' + locator2, locator1); - when(locatorClient.requestToServer(any(), any(), anyInt(), anyBoolean())) - .thenThrow(IOException.class); - - assertThatThrownBy(gmsJoinLeave::join) - .isInstanceOf(MembershipConfigurationException.class) - .hasMessageContaining( - "Could not contact any of the locators: [HostAndPort[locator1:12345], HostAndPort[locator2:54321]]") - .hasCauseInstanceOf(IOException.class); - } - - private void mockRequestToServer(HostAndPort hostAndPort) - throws IOException, ClassNotFoundException { - HashSet<MemberIdentifier> registrants = new HashSet<>(); - registrants.add(mockMembers[0]); + assertFalse("Should not be able to join ", gmsJoinLeave.join()); + } finally { - FindCoordinatorResponse fcr = new FindCoordinatorResponse(mockMembers[0], mockMembers[0], false, - null, registrants, false, true, null); - when(locatorClient.requestToServer(hostAndPort, - isA(FindCoordinatorRequest.class), anyInt(), anyBoolean())) - .thenReturn(fcr); + } } private void waitForViewAndFinalCheckInProgress(int viewId) throws InterruptedException { diff --git a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/GMSMembership.java b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/GMSMembership.java index 2d88312..e1d1b55 100644 --- a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/GMSMembership.java +++ b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/GMSMembership.java @@ -571,7 +571,12 @@ public class GMSMembership<ID extends MemberIdentifier> implements Membership<ID this.isJoining = true; // added for bug #44373 // connect - services.getJoinLeave().join(); + boolean ok = services.getJoinLeave().join(); + + if (!ok) { + throw new MembershipConfigurationException("Unable to join the distributed system. " + + "Operation either timed out, was stopped or Locator does not exist."); + } MembershipView<ID> initialView = createGeodeView(services.getJoinLeave().getView()); latestView = new MembershipView<>(initialView, initialView.getViewId()); diff --git a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/interfaces/JoinLeave.java b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/interfaces/JoinLeave.java index 1880a26..7228162 100755 --- a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/interfaces/JoinLeave.java +++ b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/interfaces/JoinLeave.java @@ -16,7 +16,6 @@ package org.apache.geode.distributed.internal.membership.gms.interfaces; import org.apache.geode.distributed.internal.membership.api.MemberIdentifier; import org.apache.geode.distributed.internal.membership.api.MemberStartupException; -import org.apache.geode.distributed.internal.membership.api.MembershipConfigurationException; import org.apache.geode.distributed.internal.membership.gms.GMSMembershipView; /** @@ -27,15 +26,10 @@ import org.apache.geode.distributed.internal.membership.gms.GMSMembershipView; public interface JoinLeave<ID extends MemberIdentifier> extends Service<ID> { /** - * joins the distributed system. - * - * @throws MemberStartupException if there was a problem joining the cluster after membership - * configuration has - * completed. - * @throws MembershipConfigurationException if operation either timed out, was stopped or locator - * does not exist. + * joins the distributed system and returns true if successful, false if not. Throws + * MemberStartupException and MemberConfigurationException */ - void join() throws MemberStartupException; + boolean join() throws MemberStartupException; /** * leaves the distributed system. Should be invoked before stop() diff --git a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java index 7328ae3..ff5c619 100644 --- a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java +++ b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java @@ -273,13 +273,11 @@ public class GMSJoinLeave<ID extends MemberIdentifier> implements JoinLeave<ID> int lastFindCoordinatorInViewId = -1000; final Set<FindCoordinatorResponse<ID>> responses = new HashSet<>(); public int responsesExpected; - Exception lastLocatorException; void cleanup() { alreadyTried.clear(); possibleCoordinator = null; view = null; - lastLocatorException = null; synchronized (responses) { responses.clear(); } @@ -317,14 +315,14 @@ public class GMSJoinLeave<ID extends MemberIdentifier> implements JoinLeave<ID> * @return true if successful, false if not */ @Override - public void join() throws MemberStartupException { + public boolean join() throws MemberStartupException { try { if (Boolean.getBoolean(BYPASS_DISCOVERY_PROPERTY)) { synchronized (viewInstallationLock) { becomeCoordinator(); } - return; + return true; } SearchState<ID> state = searchState; @@ -357,11 +355,11 @@ public class GMSJoinLeave<ID extends MemberIdentifier> implements JoinLeave<ID> synchronized (viewInstallationLock) { becomeCoordinator(); } - return; + return true; } } else { if (attemptToJoin()) { - return; + return true; } if (this.isStopping) { break; @@ -385,45 +383,40 @@ public class GMSJoinLeave<ID extends MemberIdentifier> implements JoinLeave<ID> break; } } - if (found && !state.hasContactedAJoinedLocator) { - try { - if (pauseIfThereIsNoCoordinator(state.possibleCoordinator.getVmViewId(), retrySleep)) { + try { + if (found && !state.hasContactedAJoinedLocator) { + // if locators are restarting they may be handing out IDs from a stale view that + // we should go through quickly. Otherwise we should sleep a bit to let failure + // detection select a new coordinator + if (state.possibleCoordinator.getVmViewId() < 0) { + logger.debug("sleeping for {} before making another attempt to find the coordinator", + retrySleep); + Thread.sleep(retrySleep); + } else { // since we were given a coordinator that couldn't be used we should keep trying tries = 0; giveupTime = System.currentTimeMillis() + timeout; } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new MembershipConfigurationException( - "Retry sleep interrupted. Giving up on joining the distributed system."); } + } catch (InterruptedException e) { + logger.debug("retry sleep interrupted - giving up on joining the distributed system"); + return false; } } // for if (!this.isJoined) { logger.debug("giving up attempting to join the distributed system after " + (System.currentTimeMillis() - startTime) + "ms"); + } - // to preserve old behavior we need to throw a MemberStartupException if - // unable to contact any of the locators - if (state.hasContactedAJoinedLocator) { - throw new MemberStartupException("Unable to join the distributed system in " - + (System.currentTimeMillis() - startTime) + "ms"); - } - - if (state.locatorsContacted == 0) { - throw new MembershipConfigurationException( - "Unable to join the distributed system. Could not contact any of the locators: " - + locators, - state.lastLocatorException); - } - - if (System.currentTimeMillis() > giveupTime) { - throw new MembershipConfigurationException( - "Unable to join the distributed system. Operation timed out"); - } + // to preserve old behavior we need to throw a MemberStartupException if + // unable to contact any of the locators + if (!this.isJoined && state.hasContactedAJoinedLocator) { + throw new MemberStartupException("Unable to join the distributed system in " + + (System.currentTimeMillis() - startTime) + "ms"); } - return; + + return this.isJoined; } finally { // notify anyone waiting on the address to be completed if (this.isJoined) { @@ -435,24 +428,6 @@ public class GMSJoinLeave<ID extends MemberIdentifier> implements JoinLeave<ID> } } - boolean pauseIfThereIsNoCoordinator(int viewId, long retrySleep) - throws InterruptedException { - // if locators are restarting they may be handing out IDs from a stale view that - // we should go through quickly. Otherwise we should sleep a bit to let failure - // detection select a new coordinator - if (viewId < 0) { - // the process hasn't finished joining the cluster. - logger.debug("sleeping for {} before making another attempt to find the coordinator", - retrySleep); - Thread.sleep(retrySleep); - } else { - // the member has joined the cluster. - return true; - } - - return false; - } - /** * send a join request and wait for a reply. Process the reply. This may throw a * MemberStartupException or an exception from the authenticator, if present. @@ -1224,7 +1199,6 @@ public class GMSJoinLeave<ID extends MemberIdentifier> implements JoinLeave<ID> } catch (IOException | ClassNotFoundException problem) { logger.info("Unable to contact locator " + laddr + ": " + problem); logger.debug("Exception thrown when contacting a locator", problem); - state.lastLocatorException = problem; if (state.locatorsContacted == 0 && System.currentTimeMillis() < giveUpTime) { try { Thread.sleep(FIND_LOCATOR_RETRY_SLEEP);