[50/50] [abbrv] geode git commit: GEODE-3055: The old primary's the shadow bucket is not initialized when rebalance remove it. Thus the new primary candidate can never initialize from it. The fix is t

2017-07-28 Thread zhouxj
GEODE-3055: The old primary's the shadow bucket is not
initialized when rebalance remove it. Thus the new primary candidate can
never initialize from it. The fix is to wait until new primary exists before
remove the old primary's bucket in rebalance.

When the colocated child bucket failed to initialize, remove the leader
bucket since all these buckets should be created atomically.


Project: http://git-wip-us.apache.org/repos/asf/geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/geode/commit/1504e0f2
Tree: http://git-wip-us.apache.org/repos/asf/geode/tree/1504e0f2
Diff: http://git-wip-us.apache.org/repos/asf/geode/diff/1504e0f2

Branch: refs/heads/feature/GEM-1483
Commit: 1504e0f28e0f36a87eb8bd81a2026c1c7163a18d
Parents: 36665ae
Author: zhouxh 
Authored: Thu Jun 8 14:52:56 2017 -0700
Committer: zhouxh 
Committed: Fri Jul 28 17:51:47 2017 -0700

--
 .../internal/cache/PRHARedundancyProvider.java  |  4 +-
 .../cache/PartitionedRegionDataStore.java   | 73 +++-
 2 files changed, 73 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/geode/blob/1504e0f2/geode-core/src/main/java/org/apache/geode/internal/cache/PRHARedundancyProvider.java
--
diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/cache/PRHARedundancyProvider.java
 
b/geode-core/src/main/java/org/apache/geode/internal/cache/PRHARedundancyProvider.java
index f8e2108..c584b90 100644
--- 
a/geode-core/src/main/java/org/apache/geode/internal/cache/PRHARedundancyProvider.java
+++ 
b/geode-core/src/main/java/org/apache/geode/internal/cache/PRHARedundancyProvider.java
@@ -1798,9 +1798,9 @@ public class PRHARedundancyProvider {
   public void scheduleCreateMissingBuckets() {
 if (this.prRegion.getColocatedWith() != null
 && ColocationHelper.isColocationComplete(this.prRegion)) {
-  Runnable task = new CreateMissingBucketsTask(this);
+  Runnable task = new 
CreateMissingBucketsTask(PRHARedundancyProvider.this);
   final InternalResourceManager resourceManager =
-  this.prRegion.getGemFireCache().getInternalResourceManager();
+  prRegion.getGemFireCache().getInternalResourceManager();
   resourceManager.getExecutor().execute(task);
 }
   }

http://git-wip-us.apache.org/repos/asf/geode/blob/1504e0f2/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
--
diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
 
b/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
index 0318c75..3050054 100644
--- 
a/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
+++ 
b/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
@@ -758,6 +758,17 @@ public class PartitionedRegionDataStore implements 
HasCachePerfStats {
   }
   this.getPartitionedRegion().checkReadiness();
   throw e;
+} catch (RuntimeException validationException) {
+  // GEODE-3055
+  PartitionedRegion leader = 
ColocationHelper.getLeaderRegion(this.partitionedRegion);
+  boolean isLeader = leader.equals(this.partitionedRegion);
+  if (!isLeader) {
+leader.getDataStore().removeBucket(bucketId, true);
+logger.info("For bucket " + bucketId + ", failed to create cololcated 
child bucket for "
++ this.partitionedRegion.getFullPath() + ", removed leader region "
++ leader.getFullPath() + " bucket.");
+  }
+  throw validationException;
 }
 // Determine the size of the bucket (the Region in this case is mirrored,
 // get initial image has populated the bucket, compute the size of the
@@ -1417,6 +1428,39 @@ public class PartitionedRegionDataStore implements 
HasCachePerfStats {
 }
   }
 
+  public boolean isRemotePrimaryReadyForColocatedChildren(int bucketId) {
+boolean isRemotePrimaryReady = true;
+InternalDistributedMember myId =
+
this.partitionedRegion.getDistributionManager().getDistributionManagerId();
+
+List colocatedChildPRs =
+ColocationHelper.getColocatedChildRegions(this.partitionedRegion);
+if (colocatedChildPRs != null) {
+  for (PartitionedRegion pr : colocatedChildPRs) {
+InternalDistributedMember primaryChild = pr.getBucketPrimary(bucketId);
+if (logger.isDebugEnabled()) {
+  logger.debug("Checking colocated child bucket " + pr + ", bucketId=" 
+ bucketId
+  + ", primary is " + primaryChild);
+}
+if (primaryChild == null || myId.equals(primaryChild)) {
+  if (logger.isDebugEnabled()) {
+

[50/50] [abbrv] geode git commit: GEODE-3055: The old primary's the shadow bucket is not initialized when rebalance remove it. Thus the new primary candidate can never initialize from it. The fix is t

2017-07-21 Thread zhouxj
GEODE-3055: The old primary's the shadow bucket is not
initialized when rebalance remove it. Thus the new primary candidate can
never initialize from it. The fix is to wait until new primary exists before
remove the old primary's bucket in rebalance.


Project: http://git-wip-us.apache.org/repos/asf/geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/geode/commit/40fb5fdf
Tree: http://git-wip-us.apache.org/repos/asf/geode/tree/40fb5fdf
Diff: http://git-wip-us.apache.org/repos/asf/geode/diff/40fb5fdf

Branch: refs/heads/feature/GEM-1483
Commit: 40fb5fdf9e7053de7b1f1df0fc1adfd3fe78546b
Parents: 66cd31a
Author: zhouxh 
Authored: Thu Jun 8 14:52:56 2017 -0700
Committer: zhouxh 
Committed: Thu Jul 20 23:40:43 2017 -0700

--
 .../cache/PartitionedRegionDataStore.java   | 58 +++-
 1 file changed, 56 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/geode/blob/40fb5fdf/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
--
diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
 
b/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
index 0318c75..bf708a9 100644
--- 
a/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
+++ 
b/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
@@ -1417,6 +1417,39 @@ public class PartitionedRegionDataStore implements 
HasCachePerfStats {
 }
   }
 
+  public boolean isRemotePrimaryReadyForColocatedChildren(int bucketId) {
+boolean isRemotePrimaryReady = true;
+InternalDistributedMember myId =
+
this.partitionedRegion.getDistributionManager().getDistributionManagerId();
+
+List colocatedChildPRs =
+ColocationHelper.getColocatedChildRegions(this.partitionedRegion);
+if (colocatedChildPRs != null) {
+  for (PartitionedRegion pr : colocatedChildPRs) {
+InternalDistributedMember primaryChild = pr.getBucketPrimary(bucketId);
+if (logger.isDebugEnabled()) {
+  logger.debug("Checking colocated child bucket " + pr + ", bucketId=" 
+ bucketId
+  + ", primary is " + primaryChild);
+}
+if (primaryChild == null || myId.equals(primaryChild)) {
+  if (logger.isDebugEnabled()) {
+logger.debug("Colocated bucket region " + pr + " " + bucketId
++ " does not have a remote primary yet. Not to remove.");
+  }
+  return false;
+} else {
+  if (logger.isDebugEnabled()) {
+logger
+.debug(pr + " bucketId=" + bucketId + " has remote primary, 
checking its children");
+  }
+  isRemotePrimaryReady = isRemotePrimaryReady
+  && 
pr.getDataStore().isRemotePrimaryReadyForColocatedChildren(bucketId);
+}
+  }
+}
+return isRemotePrimaryReady;
+  }
+
   /**
* Removes a redundant bucket hosted by this data store. The rebalancer 
invokes this method
* directly or sends this member a message to invoke it.
@@ -1471,7 +1504,11 @@ public class PartitionedRegionDataStore implements 
HasCachePerfStats {
 
   }
 
+  PartitionedRegion leader = 
ColocationHelper.getLeaderRegion(this.partitionedRegion);
+  boolean isLeader = leader.equals(this.partitionedRegion);
   BucketAdvisor bucketAdvisor = bucketRegion.getBucketAdvisor();
+  InternalDistributedMember myId =
+  
this.partitionedRegion.getDistributionManager().getDistributionManagerId();
   Lock writeLock = bucketAdvisor.getActiveWriteLock();
 
   // Fix for 43613 - don't remove the bucket
@@ -1484,6 +1521,25 @@ public class PartitionedRegionDataStore implements 
HasCachePerfStats {
   return false;
 }
 
+if (isLeader) {
+  if (!isRemotePrimaryReadyForColocatedChildren(bucketId)) {
+return false;
+  }
+
+  InternalDistributedMember primary = bucketAdvisor.getPrimary();
+  if (primary == null || myId.equals(primary)) {
+if (logger.isDebugEnabled()) {
+  logger.debug("Bucket region " + bucketRegion
+  + " does not have a remote primary yet. Not to remove.");
+}
+return false;
+  }
+
+  if (logger.isDebugEnabled()) {
+logger.debug("Bucket region " + bucketRegion + " has primary at " 
+ primary);
+  }
+}
+
 // recurse down to each tier of children to remove first
 removeBucketForColocatedChildren(bucketId, forceRemovePrimary);
 
@@ -1513,8 +1569,6 @@ public class PartitionedRegionDataStore implements 
HasCachePerfStats {
   // because