Repository: incubator-geode Updated Branches: refs/heads/feature/GEODE-77 c61fe3466 -> 7fd67b883
GEODE-77: improve restart locator error handling on corrupted state file Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/7fd67b88 Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/7fd67b88 Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/7fd67b88 Branch: refs/heads/feature/GEODE-77 Commit: 7fd67b8836b1c0ff95034a9a6f25901e805354f2 Parents: c61fe34 Author: Qihong Chen <qc...@pivotal.io> Authored: Wed Aug 19 15:12:16 2015 -0700 Committer: Qihong Chen <qc...@pivotal.io> Committed: Wed Aug 19 15:13:06 2015 -0700 ---------------------------------------------------------------------- gemfire-assembly/build.gradle | 2 +- .../distributed/internal/InternalLocator.java | 7 +- .../membership/gms/locator/GMSLocator.java | 66 ++++++--------- .../gemfire/internal/i18n/LocalizedStrings.java | 2 +- .../gms/locator/GMSLocatorJUnitTest.java | 87 ++++++++++++++++++++ 5 files changed, 118 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-assembly/build.gradle ---------------------------------------------------------------------- diff --git a/gemfire-assembly/build.gradle b/gemfire-assembly/build.gradle index 7e7dcb3..be655e2 100755 --- a/gemfire-assembly/build.gradle +++ b/gemfire-assembly/build.gradle @@ -112,7 +112,7 @@ def cp = { it.contains('spring-shell') || it.contains('snappy-java') || it.contains('hbase') || - it.contains('jgroups') + it.contains('jgroups') || it.contains('netty') }.join(' ') } http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java ---------------------------------------------------------------------- diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java index f649713..32dee46 100644 --- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java +++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalLocator.java @@ -24,6 +24,7 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import com.gemstone.gemfire.InternalGemFireException; import org.apache.logging.log4j.Logger; import com.gemstone.gemfire.CancelException; @@ -1066,7 +1067,7 @@ public class InternalLocator extends Locator implements ConnectListener { ThreadGroup group = LoggingThreadGroup.createThreadGroup("Locator restart thread group"); this.restartThread = new Thread(group, "Location services restart thread") { public void run() { - boolean restarted; + boolean restarted = false; try { restarted = attemptReconnect(); logger.info("attemptReconnect returned {}", restarted); @@ -1074,6 +1075,10 @@ public class InternalLocator extends Locator implements ConnectListener { logger.info("attempt to restart location services was interrupted", e); } catch (IOException e) { logger.info("attempt to restart location services terminated", e); + } finally { + if (! restarted) { + stoppedForReconnect = false; + } } InternalLocator.this.restartThread = null; } http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java ---------------------------------------------------------------------- diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java index a988dec..dd4ac51 100755 --- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java +++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java @@ -14,6 +14,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import com.gemstone.gemfire.InternalGemFireException; import org.apache.logging.log4j.Logger; import com.gemstone.gemfire.DataSerializer; @@ -30,16 +31,16 @@ import com.gemstone.gemfire.distributed.internal.membership.gms.Services; import com.gemstone.gemfire.distributed.internal.membership.gms.interfaces.Locator; import com.gemstone.gemfire.distributed.internal.membership.gms.mgr.GMSMembershipManager; import com.gemstone.gemfire.distributed.internal.tcpserver.TcpClient; -import com.gemstone.gemfire.distributed.internal.tcpserver.TcpHandler; import com.gemstone.gemfire.distributed.internal.tcpserver.TcpServer; import com.gemstone.gemfire.internal.Version; import com.gemstone.gemfire.internal.VersionedObjectInput; -import com.gemstone.gemfire.internal.i18n.LocalizedStrings; import com.gemstone.gemfire.internal.logging.LogService; +import static com.gemstone.gemfire.internal.i18n.LocalizedStrings.LOCATOR_UNABLE_TO_RECOVER_VIEW; + public class GMSLocator implements Locator, NetLocator { - private static final int LOCATOR_FILE_STAMP = 0x7b8cf741; + /* package */ static final int LOCATOR_FILE_STAMP = 0x7b8cf741; private static final Logger logger = LogService.getLogger(); @@ -89,7 +90,7 @@ public class GMSLocator implements Locator, NetLocator { } @Override - public void init(TcpServer server) { + public void init(TcpServer server) throws InternalGemFireException { recover(); } @@ -236,7 +237,7 @@ public class GMSLocator implements Locator, NetLocator { setMembershipManager(((InternalDistributedSystem)ds).getDM().getMembershipManager()); } - public void recover() { + public void recover() throws InternalGemFireException { if (!recoverFromOthers()) { recoverFromFile(viewFile); } @@ -265,61 +266,40 @@ public class GMSLocator implements Locator, NetLocator { logger.info("Peer locator recovered initial membership of {}", view); return true; } - } catch (IOException e) { - // ignore - } catch (ClassNotFoundException e) { - // hmm - odd response? - } + } catch (IOException | ClassNotFoundException ignore) {} return false; } - - private boolean recoverFromFile(File file) { + + /* package */ boolean recoverFromFile(File file) throws InternalGemFireException { if (!file.exists()) { return false; } + logger.info("Peer locator recovering from " + file.getAbsolutePath()); - FileInputStream fis = null; - try { - fis = new FileInputStream(file); - ObjectInput ois = new ObjectInputStream(fis); - - int magic = ois.readInt(); - if (magic != LOCATOR_FILE_STAMP) { + try (ObjectInput ois = new ObjectInputStream(new FileInputStream(file))) { + if (ois.readInt() != LOCATOR_FILE_STAMP) { return false; } - + int version = ois.readInt(); Version geodeVersion = Version.fromOrdinalNoThrow((short)version, false); - if (geodeVersion != null && version != Version.CURRENT_ORDINAL) { + if (geodeVersion != null && version == Version.CURRENT_ORDINAL) { logger.info("Peer locator found that persistent view was written with {}", geodeVersion); - ois = new VersionedObjectInput(ois, geodeVersion); - } else { - return false; - } - - this.view = DataSerializer.readObject(ois); - - logger.info("Initial membership is " + view); - return true; - } - catch (Exception e) { - e.printStackTrace(); - logger.warn(LocalizedStrings.Locator_unable_to_recover_view_0 - .toLocalizedString(file), e); - try { - fis.close(); - } catch (IOException ex) { - // ignore + ObjectInput ois2 = new VersionedObjectInput(ois, geodeVersion); + this.view = DataSerializer.readObject(ois2); + logger.info("Initial membership is " + view); + return true; } + return false; + } catch (Exception e) { + String msg = LOCATOR_UNABLE_TO_RECOVER_VIEW.toLocalizedString(file.toString()); + logger.warn(msg, e); if (!file.delete() && file.exists()) { logger.warn("Peer locator was unable to recover from or delete " + file); this.viewFile = null; } + throw new InternalGemFireException(msg, e); } - finally { - try { fis.close(); } catch (IOException e) { } - } - return false; } } http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java ---------------------------------------------------------------------- diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java index c4d9c58..f146834 100644 --- a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java +++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/i18n/LocalizedStrings.java @@ -2125,7 +2125,7 @@ public class LocalizedStrings extends ParentLocalizedStrings { public static final StringId MinimumSystemRequirements_NOT_MET = new StringId(6604, "Minimum system requirements not met. Unexpected behavior may result in additional errors."); public static final StringId MinimumSystemRequirements_JAVA_VERSION = new StringId(6605, "Java version older than {0}."); - public static final StringId Locator_unable_to_recover_view_0 = new StringId(6606, "Unable to recover previous membership view from {0}"); + public static final StringId LOCATOR_UNABLE_TO_RECOVER_VIEW = new StringId(6606, "Unable to recover previous membership view from {0}"); public static final StringId Network_partition_detected = new StringId(6607, "Exiting due to possible network partition event due to loss of {0} cache processes: {1}"); http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/7fd67b88/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocatorJUnitTest.java ---------------------------------------------------------------------- diff --git a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocatorJUnitTest.java b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocatorJUnitTest.java new file mode 100644 index 0000000..e79dcbc --- /dev/null +++ b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocatorJUnitTest.java @@ -0,0 +1,87 @@ +package com.gemstone.gemfire.distributed.internal.membership.gms.locator; + +import com.gemstone.gemfire.DataSerializer; +import com.gemstone.gemfire.InternalGemFireException; +import com.gemstone.gemfire.distributed.internal.membership.NetView; +import com.gemstone.gemfire.internal.Version; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import com.gemstone.gemfire.test.junit.categories.UnitTest; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.ObjectOutputStream; + +import static org.junit.Assert.*; + +@Category(UnitTest.class) +public class GMSLocatorJUnitTest { + + File tempStateFile = null; + GMSLocator locator = null; + + @Before + public void setUp() throws Exception { + tempStateFile = File.createTempFile("tempLocator-", ".dat", new File("/tmp")); + locator = new GMSLocator(null, tempStateFile, null, false, false); + // System.out.println("temp state file: " + tempStateFile); + } + + @After + public void tearDown() throws Exception { + if (tempStateFile.exists()) { + tempStateFile.delete(); + } + } + + private void populateStateFile(File file, int fileStamp, int ordinal, Object object) throws Exception { + try (ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(file))) { + oos.writeInt(fileStamp); + oos.writeInt(ordinal); + DataSerializer.writeObject(object, oos); + } + } + + @Test + public void testRecoverFromFileWithNonExistFile() throws Exception { + tempStateFile.delete(); + assertFalse(tempStateFile.exists()); + assertFalse(locator.recoverFromFile(tempStateFile)); + } + + @Test + public void testRecoverFromFileWithNormalFile() throws Exception { + NetView view = new NetView(); + populateStateFile(tempStateFile, GMSLocator.LOCATOR_FILE_STAMP, Version.CURRENT_ORDINAL, view); + assertTrue(locator.recoverFromFile(tempStateFile)); + } + + @Test + public void testRecoverFromFileWithWrongFileStamp() throws Exception { + // add 1 to file stamp to make it invalid + populateStateFile(tempStateFile, GMSLocator.LOCATOR_FILE_STAMP + 1, Version.CURRENT_ORDINAL, 1); + assertFalse(locator.recoverFromFile(tempStateFile)); + } + + @Test + public void testRecoverFromFileWithWrongOrdinal() throws Exception { + // add 1 to ordinal to make it wrong + populateStateFile(tempStateFile, GMSLocator.LOCATOR_FILE_STAMP, Version.CURRENT_ORDINAL + 1, 1); + assertFalse(locator.recoverFromFile(tempStateFile)); + } + + @Test + public void testRecoverFromFileWithInvalidViewObject() throws Exception { + populateStateFile(tempStateFile, GMSLocator.LOCATOR_FILE_STAMP, Version.CURRENT_ORDINAL, 1); + try { + locator.recoverFromFile(tempStateFile); + fail("should catch InternalGemFileException"); + } catch (InternalGemFireException e) { + assertTrue(e.getMessage().startsWith("Unable to recover previous membership view from")); + } + } + +} +