This is an automated email from the ASF dual-hosted git repository.

jiajunwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/master by this push:
     new 67f4937  Don't output the best possible state calculate result if the 
result is not valid. (#1359)
67f4937 is described below

commit 67f493760aa6d598dd0242996fe9b79c37085726
Author: Jiajun Wang <[email protected]>
AuthorDate: Tue Sep 15 10:42:16 2020 -0700

    Don't output the best possible state calculate result if the result is not 
valid. (#1359)
    
    The invalid result may mislead the following stages in the pipeline and 
cause problems. For example, this change fixes the issue that resource monitor 
rebalance state may be inaccurately reported.
---
 .../stages/BestPossibleStateCalcStage.java          | 21 ++++++++++++---------
 .../integration/TestAlertingRebalancerFailure.java  | 20 ++++++++++++++------
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
 
b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
index bca7a37..1c0d63a 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
@@ -347,36 +347,39 @@ public class BestPossibleStateCalcStage extends 
AbstractBaseStage {
       LogUtil.logError(logger, _eventId, "Error computing assignment for 
resource " + resourceName
           + ". no rebalancer found. rebalancer: " + rebalancer + " 
mappingCalculator: "
           + mappingCalculator);
-    }
-
-    if (rebalancer != null && mappingCalculator != null) {
-      ResourceAssignment partitionStateAssignment = null;
+    } else {
       try {
         HelixManager manager = 
event.getAttribute(AttributeName.helixmanager.name());
         rebalancer.init(manager);
         idealState =
             rebalancer.computeNewIdealState(resourceName, idealState, 
currentStateOutput, cache);
 
-        output.setPreferenceLists(resourceName, 
idealState.getPreferenceLists());
+        // Check if calculation is done successfully
+        if (!checkBestPossibleStateCalculation(idealState)) {
+          LogUtil.logWarn(logger, _eventId,
+              "The calculated idealState is not valid, resource: " + 
resourceName);
+          return false;
+        }
 
         // Use the internal MappingCalculator interface to compute the final 
assignment
         // The next release will support rebalancers that compute the mapping 
from start to finish
-        partitionStateAssignment = mappingCalculator
+        ResourceAssignment partitionStateAssignment = mappingCalculator
             .computeBestPossiblePartitionState(cache, idealState, resource, 
currentStateOutput);
 
         if (partitionStateAssignment == null) {
           LogUtil.logWarn(logger, _eventId,
-              "PartitionStateAssignment is null, resource: " + resourceName);
+              "The calculated partitionStateAssignment is null, resource: " + 
resourceName);
           return false;
         }
 
+        // Set the calculated result to the output.
+        output.setPreferenceLists(resourceName, 
idealState.getPreferenceLists());
         for (Partition partition : resource.getPartitions()) {
           Map<String, String> newStateMap = 
partitionStateAssignment.getReplicaMap(partition);
           output.setState(resourceName, partition, newStateMap);
         }
 
-        // Check if calculation is done successfully
-        return checkBestPossibleStateCalculation(idealState);
+        return true;
       } catch (HelixException e) {
         // No eligible instance is found.
         LogUtil.logError(logger, _eventId, e.getMessage());
diff --git 
a/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
 
b/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
index be5ea37..4caba32 100644
--- 
a/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
+++ 
b/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
@@ -43,6 +43,7 @@ import org.apache.helix.model.ClusterConfig;
 import org.apache.helix.model.IdealState;
 import org.apache.helix.model.IdealState.RebalanceMode;
 import org.apache.helix.model.InstanceConfig;
+import org.apache.helix.model.builder.FullAutoModeISBuilder;
 import org.apache.helix.monitoring.mbeans.MonitorDomainNames;
 import org.apache.helix.monitoring.mbeans.ResourceMonitor;
 import 
org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier;
@@ -118,18 +119,25 @@ public class TestAlertingRebalancerFailure extends 
ZkStandAloneCMTestBase {
 
   @Test
   public void testParticipantUnavailable() throws Exception {
-    _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, 5,
-        BuiltInStateModelDefinitions.MasterSlave.name(), 
RebalanceMode.FULL_AUTO.name());
+    IdealState idealState = new FullAutoModeISBuilder(testDb)
+        .setStateModel(BuiltInStateModelDefinitions.MasterSlave.name())
+        
.setStateModelFactoryName("DEFAULT").setNumPartitions(5).setNumReplica(3)
+        .setRebalancerMode(IdealState.RebalanceMode.FULL_AUTO)
+        
.setRebalancerClass("org.apache.helix.controller.rebalancer.DelayedAutoRebalancer")
+        .setRebalanceStrategy(
+            
"org.apache.helix.controller.rebalancer.strategy.CrushEdRebalanceStrategy").build();
+
+    _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, idealState);
     _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, testDb, 3);
-    ZkHelixClusterVerifier verifier = new 
BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME)
-        .setZkClient(_gZkClient).setResources(new 
HashSet<>(Collections.singleton(testDb))).build();
+    ZkHelixClusterVerifier verifier =
+        new 
BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkClient(_gZkClient)
+            .setResources(new 
HashSet<>(Collections.singleton(testDb))).build();
     Assert.assertTrue(verifier.verifyByPolling());
 
     // disable then enable the resource to ensure no rebalancing error is 
generated during this
     // process
     _gSetupTool.dropResourceFromCluster(CLUSTER_NAME, testDb);
-    _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, 5,
-        BuiltInStateModelDefinitions.MasterSlave.name(), 
RebalanceMode.FULL_AUTO.name());
+    _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, idealState);
     _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, testDb, 3);
     Assert.assertTrue(verifier.verifyByPolling());
 

Reply via email to