This is an automated email from the ASF dual-hosted git repository.

xyuanlu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/master by this push:
     new def71b5da Fix dropping error state partition for customized resource 
(#3024)
def71b5da is described below

commit def71b5da18b15bdb23468da66177e556aa0d36f
Author: xyuanlu <[email protected]>
AuthorDate: Thu May 1 10:36:03 2025 -0700

    Fix dropping error state partition for customized resource (#3024)
    
    Fix dropping error state partition for customized resource.
    Helix should always keep the error state replica in EV.
---
 .../controller/rebalancer/CustomRebalancer.java    |  4 +--
 .../helix/integration/TestErrorReplicaPersist.java | 31 ++++++++++++++++++++--
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/CustomRebalancer.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/CustomRebalancer.java
index 939d94aed..4b7a44f16 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/CustomRebalancer.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/CustomRebalancer.java
@@ -129,13 +129,11 @@ public class CustomRebalancer extends 
AbstractRebalancer<ResourceControllerDataP
 
     Map<String, LiveInstance> assignableLiveInstancesMap = 
cache.getAssignableLiveInstances();
     for (String instance : idealStateMap.keySet()) {
-      boolean notInErrorState = currentStateMap != null
-          && 
!HelixDefinedState.ERROR.toString().equals(currentStateMap.get(instance));
       boolean enabled = !disabledInstancesForPartition.contains(instance) && 
isResourceEnabled;
 
       // Note: if instance is not live, the mapping for that instance will not 
show up in
       // BestPossibleMapping (and ExternalView)
-      if (assignableLiveInstancesMap.containsKey(instance) && notInErrorState) 
{
+      if (assignableLiveInstancesMap.containsKey(instance)){
         if (enabled) {
           instanceStateMap.put(instance, idealStateMap.get(instance));
         } else {
diff --git 
a/helix-core/src/test/java/org/apache/helix/integration/TestErrorReplicaPersist.java
 
b/helix-core/src/test/java/org/apache/helix/integration/TestErrorReplicaPersist.java
index 5c7fb867b..b1c9c0807 100644
--- 
a/helix-core/src/test/java/org/apache/helix/integration/TestErrorReplicaPersist.java
+++ 
b/helix-core/src/test/java/org/apache/helix/integration/TestErrorReplicaPersist.java
@@ -19,9 +19,11 @@ package org.apache.helix.integration;
  * under the License.
  */
 
+import java.util.Collections;
 import java.util.Date;
 
 import org.apache.helix.ConfigAccessor;
+import org.apache.helix.HelixDataAccessor;
 import org.apache.helix.HelixRollbackException;
 import org.apache.helix.NotificationContext;
 import org.apache.helix.TestHelper;
@@ -30,7 +32,11 @@ import 
org.apache.helix.integration.common.ZkStandAloneCMTestBase;
 import org.apache.helix.integration.manager.ClusterControllerManager;
 import org.apache.helix.integration.manager.MockParticipantManager;
 import org.apache.helix.integration.rebalancer.TestAutoRebalance;
+import org.apache.helix.manager.zk.ZKHelixDataAccessor;
+import org.apache.helix.manager.zk.ZkBaseDataAccessor;
 import org.apache.helix.model.ClusterConfig;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
 import org.apache.helix.model.MasterSlaveSMD;
 import org.apache.helix.model.Message;
 import org.apache.helix.participant.StateMachineEngine;
@@ -41,6 +47,7 @@ import org.apache.helix.participant.statemachine.Transition;
 import org.apache.helix.tools.ClusterStateVerifier;
 import 
org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier;
 import org.apache.helix.tools.ClusterVerifiers.HelixClusterVerifier;
+import org.apache.helix.zookeeper.datamodel.ZNRecord;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testng.Assert;
@@ -60,6 +67,11 @@ public class TestErrorReplicaPersist extends 
ZkStandAloneCMTestBase {
     _gSetupTool.addCluster(CLUSTER_NAME, true);
     createResourceWithDelayedRebalance(CLUSTER_NAME, TEST_DB, 
MasterSlaveSMD.name, _PARTITIONS,
         _replica, _replica - 1, 1800000, 
CrushEdRebalanceStrategy.class.getName());
+
+    // Create customized resource
+    _gSetupTool.addResourceToCluster(CLUSTER_NAME, "db-customized", 64, 
MasterSlaveSMD.name,
+        IdealState.RebalanceMode.CUSTOMIZED.name());
+
     for (int i = 0; i < numNode; i++) {
       String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
       _gSetupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
@@ -83,6 +95,8 @@ public class TestErrorReplicaPersist extends 
ZkStandAloneCMTestBase {
     _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, 
controllerName);
     _controller.syncStart();
 
+    _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, "db-customized", 
_replica);
+
     boolean result = ClusterStateVerifier.verifyByZkCallback(
         new TestAutoRebalance.ExternalViewBalancedVerifier(_gZkClient, 
CLUSTER_NAME, TEST_DB));
 
@@ -103,7 +117,7 @@ public class TestErrorReplicaPersist extends 
ZkStandAloneCMTestBase {
     ClusterConfig clusterConfig = 
configAccessor.getClusterConfig(CLUSTER_NAME);
     clusterConfig.setErrorPartitionThresholdForLoadBalance(100000);
     configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);
-    
+
     for (int i = 0; i < (NODE_NR + 1) / 2; i++) {
       _participants[i].syncStop();
       Thread.sleep(2000);
@@ -116,8 +130,10 @@ public class TestErrorReplicaPersist extends 
ZkStandAloneCMTestBase {
       participant.syncStart();
       _participants[i] = participant;
     }
+    // verify full auto converged
     HelixClusterVerifier verifier =
-        new 
BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkClient(_gZkClient)
+        new 
BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkClient(_gZkClient).setResources(
+                Collections.singleton(TEST_DB))
             
.setWaitTillVerify(TestHelper.DEFAULT_REBALANCE_PROCESSING_WAIT_TIME)
             .build();
     Assert.assertTrue(((BestPossibleExternalViewVerifier) 
verifier).verifyByPolling());
@@ -127,6 +143,17 @@ public class TestErrorReplicaPersist extends 
ZkStandAloneCMTestBase {
     }
 
     Assert.assertTrue(((BestPossibleExternalViewVerifier) 
verifier).verifyByPolling());
+
+    // verify customized external view has 3 replica for each partition
+    HelixDataAccessor accessor =
+        new ZKHelixDataAccessor(CLUSTER_NAME, new 
ZkBaseDataAccessor<ZNRecord>(_gZkClient));
+    ExternalView externalView =
+        accessor
+            .getProperty( accessor.keyBuilder().externalView("db-customized"));
+    Assert.assertEquals(externalView.getRecord().getMapFields().size(), 64);
+    for(String partition : externalView.getRecord().getMapFields().keySet()) {
+      
Assert.assertEquals(externalView.getRecord().getMapField(partition).size(), 3);
+    }
   }
 
 

Reply via email to