Repository: geode
Updated Branches:
  refs/heads/feature/GEODE-2497 [created] 8d45ca227


GEODE-2497 surprise members are never timed out during startup

Moved the creation of the timer to GMSMembershipManager.started()

Removed write-lock in timer-creation method since it's only called from
one place now

Altered the way that the timer-creation method finds the
InternalDistributedSystem.  The old way of using getAnyInstance() was
the primary source of the problem since it returns null until startup
is completed.

Altered the surprise-member unit test to ensure that it's using the
timer and not relying on installation of a new membership view to clean
things up.

Altered the surprise-member unit test to run faster.  It now completes in
under 10 seconds.


Project: http://git-wip-us.apache.org/repos/asf/geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/geode/commit/8d45ca22
Tree: http://git-wip-us.apache.org/repos/asf/geode/tree/8d45ca22
Diff: http://git-wip-us.apache.org/repos/asf/geode/diff/8d45ca22

Branch: refs/heads/feature/GEODE-2497
Commit: 8d45ca22737282abe279d3c863478f904f2e1926
Parents: dbea592
Author: Bruce Schuchardt <bschucha...@pivotal.io>
Authored: Fri Feb 17 10:17:21 2017 -0800
Committer: Bruce Schuchardt <bschucha...@pivotal.io>
Committed: Fri Feb 17 10:22:25 2017 -0800

----------------------------------------------------------------------
 .../gms/mgr/GMSMembershipManager.java           |  75 +++++-------
 .../internal/DistributionManagerDUnitTest.java  | 117 +++++++++----------
 2 files changed, 89 insertions(+), 103 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/geode/blob/8d45ca22/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
----------------------------------------------------------------------
diff --git 
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
 
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
index cf17025..6cefe4e 100644
--- 
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
+++ 
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
@@ -608,7 +608,6 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
       }
       try {
         listener.viewInstalled(latestView);
-        startCleanupTimer();
       } catch (DistributedSystemDisconnectedException se) {
       }
     } finally {
@@ -616,6 +615,10 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
     }
   }
 
+  public boolean isCleanupTimerStarted() {
+    return this.cleanupTimer != null;
+  }
+
   /**
    * the timer used to perform periodic tasks
    * 
@@ -767,7 +770,9 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
   }
 
   @Override
-  public void started() {}
+  public void started() {
+    startCleanupTimer();
+  }
 
 
   /** this is invoked by JoinLeave when there is a loss of quorum in the 
membership system */
@@ -942,12 +947,6 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
         surpriseMembers.remove(member);
       } else {
 
-        // Now that we're sure the member is new, add them.
-        // make sure the surprise-member cleanup task is running
-        if (this.cleanupTimer == null) {
-          startCleanupTimer();
-        } // cleanupTimer == null
-
         // Ensure that the member is accounted for in the view
         // Conjure up a new view including the new member. This is necessary
         // because we are about to tell the listener about a new member, so
@@ -978,43 +977,33 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
 
   /** starts periodic task to perform cleanup chores such as expire surprise 
members */
   private void startCleanupTimer() {
-    latestViewWriteLock.lock();
-    try {
-      if (this.cleanupTimer != null) {
-        return;
-      }
-      DistributedSystem ds = InternalDistributedSystem.getAnyInstance();
-      if (ds != null && ds.isConnected()) {
-        this.cleanupTimer = new SystemTimer(ds, true);
-        SystemTimer.SystemTimerTask st = new SystemTimer.SystemTimerTask() {
-          @Override
-          public void run2() {
-            latestViewWriteLock.lock();
-            try {
-              long oldestAllowed = System.currentTimeMillis() - 
surpriseMemberTimeout;
-              for (Iterator it = surpriseMembers.entrySet().iterator(); 
it.hasNext();) {
-                Map.Entry entry = (Map.Entry) it.next();
-                Long birthtime = (Long) entry.getValue();
-                if (birthtime.longValue() < oldestAllowed) {
-                  it.remove();
-                  InternalDistributedMember m = (InternalDistributedMember) 
entry.getKey();
-                  logger.info(LocalizedMessage.create(
-                      
LocalizedStrings.GroupMembershipService_MEMBERSHIP_EXPIRING_MEMBERSHIP_OF_SURPRISE_MEMBER_0,
-                      m));
-                  removeWithViewLock(m, true,
-                      "not seen in membership view in " + 
surpriseMemberTimeout + "ms");
-                }
-              }
-            } finally {
-              latestViewWriteLock.unlock();
+    DistributedSystem ds = this.dcReceiver.getDM().getSystem();
+    this.cleanupTimer = new SystemTimer(ds, true);
+    SystemTimer.SystemTimerTask st = new SystemTimer.SystemTimerTask() {
+      @Override
+      public void run2() {
+        latestViewWriteLock.lock();
+        try {
+          long oldestAllowed = System.currentTimeMillis() - 
surpriseMemberTimeout;
+          for (Iterator it = surpriseMembers.entrySet().iterator(); 
it.hasNext();) {
+            Map.Entry entry = (Map.Entry) it.next();
+            Long birthtime = (Long) entry.getValue();
+            if (birthtime.longValue() < oldestAllowed) {
+              it.remove();
+              InternalDistributedMember m = (InternalDistributedMember) 
entry.getKey();
+              logger.info(LocalizedMessage.create(
+                  
LocalizedStrings.GroupMembershipService_MEMBERSHIP_EXPIRING_MEMBERSHIP_OF_SURPRISE_MEMBER_0,
+                  m));
+              removeWithViewLock(m, true,
+                  "not seen in membership view in " + surpriseMemberTimeout + 
"ms");
             }
           }
-        };
-        this.cleanupTimer.scheduleAtFixedRate(st, surpriseMemberTimeout, 
surpriseMemberTimeout / 3);
-      } // ds != null && ds.isConnected()
-    } finally {
-      latestViewWriteLock.unlock();
-    }
+        } finally {
+          latestViewWriteLock.unlock();
+        }
+      }
+    };
+    this.cleanupTimer.scheduleAtFixedRate(st, surpriseMemberTimeout, 
surpriseMemberTimeout / 3);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/geode/blob/8d45ca22/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
----------------------------------------------------------------------
diff --git 
a/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
 
b/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
index ccce013..2b405fa 100644
--- 
a/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
+++ 
b/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
@@ -159,77 +159,74 @@ public class DistributionManagerDUnitTest extends 
JUnit4DistributedTestCase {
    **/
   @Test
   public void testSurpriseMemberHandling() {
-    VM vm0 = Host.getHost(0).getVM(0);
-
-    InternalDistributedSystem sys = getSystem();
-    MembershipManager mgr = MembershipManagerHelper.getMembershipManager(sys);
 
+    System.setProperty(DistributionConfig.GEMFIRE_PREFIX + 
"surprise-member-timeout", "3000");
     try {
-      InternalDistributedMember mbr =
-          new InternalDistributedMember(NetworkUtils.getIPLiteral(), 12345);
+      InternalDistributedSystem sys = getSystem();
+      MembershipManager mgr = 
MembershipManagerHelper.getMembershipManager(sys);
+      assertTrue(((GMSMembershipManager) mgr).isCleanupTimerStarted());
 
-      // first make sure we can't add this as a surprise member (bug #44566)
-
-      // if the view number isn't being recorded correctly the test will pass 
but the
-      // functionality is broken
-      Assert.assertTrue("expected view ID to be greater than zero", 
mgr.getView().getViewId() > 0);
-
-      int oldViewId = mbr.getVmViewId();
-      mbr.setVmViewId((int) mgr.getView().getViewId() - 1);
-      org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
-          .info("current membership view is " + mgr.getView());
-      org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
-          .info("created ID " + mbr + " with view ID " + mbr.getVmViewId());
-      sys.getLogWriter()
-          .info("<ExpectedException action=add>attempt to add old 
member</ExpectedException>");
-      sys.getLogWriter()
-          .info("<ExpectedException action=add>Removing shunned GemFire 
node</ExpectedException>");
       try {
-        boolean accepted = mgr.addSurpriseMember(mbr);
-        Assert.assertTrue("member with old ID was not rejected (bug #44566)", 
!accepted);
-      } finally {
+        InternalDistributedMember mbr =
+            new InternalDistributedMember(NetworkUtils.getIPLiteral(), 12345);
+
+        // first make sure we can't add this as a surprise member (bug #44566)
+
+        // if the view number isn't being recorded correctly the test will 
pass but the
+        // functionality is broken
+        Assert.assertTrue("expected view ID to be greater than zero",
+            mgr.getView().getViewId() > 0);
+
+        int oldViewId = mbr.getVmViewId();
+        mbr.setVmViewId((int) mgr.getView().getViewId() - 1);
+        org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
+            .info("current membership view is " + mgr.getView());
+        org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
+            .info("created ID " + mbr + " with view ID " + mbr.getVmViewId());
         sys.getLogWriter()
-            .info("<ExpectedException action=remove>attempt to add old 
member</ExpectedException>");
+            .info("<ExpectedException action=add>attempt to add old 
member</ExpectedException>");
         sys.getLogWriter().info(
-            "<ExpectedException action=remove>Removing shunned GemFire 
node</ExpectedException>");
-      }
-      mbr.setVmViewId(oldViewId);
-
-      // now forcibly add it as a surprise member and show that it is reaped
-      long gracePeriod = 5000;
-      long startTime = System.currentTimeMillis();
-      long timeout = ((GMSMembershipManager) mgr).getSurpriseMemberTimeout();
-      long birthTime = startTime - timeout + gracePeriod;
-      MembershipManagerHelper.addSurpriseMember(sys, mbr, birthTime);
-      assertTrue("Member was not a surprise member", 
mgr.isSurpriseMember(mbr));
-
-      // force a real view change
-      SerializableRunnable connectDisconnect = new SerializableRunnable() {
-        public void run() {
-          getSystem().disconnect();
+            "<ExpectedException action=add>Removing shunned GemFire 
node</ExpectedException>");
+        try {
+          boolean accepted = mgr.addSurpriseMember(mbr);
+          Assert.assertTrue("member with old ID was not rejected (bug 
#44566)", !accepted);
+        } finally {
+          sys.getLogWriter().info(
+              "<ExpectedException action=remove>attempt to add old 
member</ExpectedException>");
+          sys.getLogWriter().info(
+              "<ExpectedException action=remove>Removing shunned GemFire 
node</ExpectedException>");
         }
-      };
-      vm0.invoke(connectDisconnect);
-
-      if (birthTime < (System.currentTimeMillis() - timeout)) {
-        return; // machine is too busy and we didn't get enough CPU to perform 
more assertions
-      }
-      assertTrue("Member was incorrectly removed from surprise member set",
-          mgr.isSurpriseMember(mbr));
+        mbr.setVmViewId(oldViewId);
+
+        // now forcibly add it as a surprise member and show that it is reaped
+        long gracePeriod = 5000;
+        long startTime = System.currentTimeMillis();
+        long timeout = ((GMSMembershipManager) mgr).getSurpriseMemberTimeout();
+        long birthTime = startTime - timeout + gracePeriod;
+        MembershipManagerHelper.addSurpriseMember(sys, mbr, birthTime);
+        assertTrue("Member was not a surprise member", 
mgr.isSurpriseMember(mbr));
+
+        if (birthTime < (System.currentTimeMillis() - timeout)) {
+          return; // machine is too busy and we didn't get enough CPU to 
perform more assertions
+        }
+        assertTrue("Member was incorrectly removed from surprise member set",
+            mgr.isSurpriseMember(mbr));
 
-      try {
-        Thread.sleep(gracePeriod);
-      } catch (InterruptedException e) {
-        fail("test was interrupted", e);
-      }
+        try {
+          Thread.sleep((timeout / 3) + gracePeriod);
+        } catch (InterruptedException e) {
+          fail("test was interrupted", e);
+        }
 
-      vm0.invoke(connectDisconnect);
-      assertTrue("Member was not removed from surprise member set", 
!mgr.isSurpriseMember(mbr));
+        assertTrue("Member was not removed from surprise member set", 
!mgr.isSurpriseMember(mbr));
 
-    } finally {
-      if (sys != null && sys.isConnected()) {
-        sys.disconnect();
+      } finally {
+        if (sys != null && sys.isConnected()) {
+          sys.disconnect();
+        }
       }
+    } finally {
+      System.getProperties().remove(DistributionConfig.GEMFIRE_PREFIX + 
"surprise-member-timeout");
     }
   }
 

Reply via email to