This is an automated email from the ASF dual-hosted git repository.

jxue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/master by this push:
     new 6609c3773 [apache/helix] -- Added detail in the Exception message for 
WAGED rebalance (hard constraint) failures. (#2829)
6609c3773 is described below

commit 6609c377322bd7f5fceb035fc299001a8583eaea
Author: Himanshu Kandwal <himanshuk...@gmail.com>
AuthorDate: Wed Jul 17 10:53:15 2024 -0700

    [apache/helix] -- Added detail in the Exception message for WAGED rebalance 
(hard constraint) failures. (#2829)
    
     We are enabling constraint level logging in the case when WAGED algorithm 
is not able to find the placement for a replica. The logging will be controlled 
via a flag which is enabled when the failure criteria is met, else is disabled.
---
 .../constraints/ConstraintBasedAlgorithm.java      | 25 +++++++++++
 .../constraints/FaultZoneAwareConstraint.java      |  6 ++-
 .../waged/constraints/HardConstraint.java          | 10 +++++
 .../waged/constraints/NodeCapacityConstraint.java  |  6 ++-
 .../NodeMaxPartitionLimitConstraint.java           |  6 ++-
 .../constraints/ReplicaActivateConstraint.java     |  4 +-
 .../SamePartitionOnInstanceConstraint.java         |  4 +-
 .../waged/constraints/ValidGroupTagConstraint.java |  4 +-
 .../rebalancer/waged/model/ClusterContext.java     |  4 ++
 .../constraints/TestConstraintBasedAlgorithm.java  | 48 ++++++++++++++++++++--
 10 files changed, 104 insertions(+), 13 deletions(-)

diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java
index 60d43764a..e7510987a 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java
@@ -22,6 +22,7 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -30,6 +31,7 @@ import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.stream.Collectors;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Maps;
 import org.apache.helix.HelixRebalanceException;
 import org.apache.helix.controller.rebalancer.waged.RebalanceAlgorithm;
@@ -136,11 +138,16 @@ class ConstraintBasedAlgorithm implements 
RebalanceAlgorithm {
     }).collect(Collectors.toList());
 
     if (candidateNodes.isEmpty()) {
+      LOG.info("Found no eligible candidate nodes. Enabling hard constraint 
level logging for cluster: {}", clusterContext.getClusterName());
+      enableFullLoggingForCluster();
       optimalAssignment.recordAssignmentFailure(replica,
           Maps.transformValues(hardConstraintFailures, 
this::convertFailureReasons));
       return Optional.empty();
     }
 
+    LOG.info("Disabling hard constraint level logging for cluster: {}", 
clusterContext.getClusterName());
+    removeFullLoggingForCluster();
+
     return candidateNodes.parallelStream().map(node -> new 
HashMap.SimpleEntry<>(node,
         getAssignmentNormalizedScore(node, replica, clusterContext)))
         .max((nodeEntry1, nodeEntry2) -> {
@@ -179,6 +186,24 @@ class ConstraintBasedAlgorithm implements 
RebalanceAlgorithm {
         .collect(Collectors.toList());
   }
 
+  /**
+   * Enables logging of failures in all hard constraints
+   */
+  private void enableFullLoggingForCluster() {
+    for (HardConstraint hardConstraint : _hardConstraints) {
+      hardConstraint.setEnableLogging(true);
+    }
+  }
+
+  /**
+   * Removes the cluster from full logging in all hard constraints (if added 
previously)
+   */
+  private void removeFullLoggingForCluster() {
+    for (HardConstraint hardConstraint : _hardConstraints) {
+      hardConstraint.setEnableLogging(false);
+    }
+  }
+
   private static class AssignableReplicaWithScore implements 
Comparable<AssignableReplicaWithScore> {
     private final AssignableReplica _replica;
     private float _score = 0;
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java
index e0992b5d4..459e35853 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java
@@ -42,8 +42,10 @@ class FaultZoneAwareConstraint extends HardConstraint {
         
clusterContext.getPartitionsForResourceAndFaultZone(replica.getResourceName(), 
node.getFaultZone());
 
     if 
(partitionsForResourceAndFaultZone.contains(replica.getPartitionName())) {
-      LOG.debug("A fault zone cannot contain more than 1 replica of same 
partition. Found replica for partition: {}",
-          replica.getPartitionName());
+      if (enableLogging) {
+        LOG.info("A fault zone cannot contain more than 1 replica of same 
partition. Found replica for partition: {}",
+            replica.getPartitionName());
+      }
       return false;
     }
     return true;
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java
index a0e2ff8c4..281adae34 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java
@@ -29,6 +29,8 @@ import 
org.apache.helix.controller.rebalancer.waged.model.ClusterContext;
  */
 abstract class HardConstraint {
 
+  protected boolean enableLogging = false;
+
   /**
    * Check if the replica could be assigned to the node
    * @return True if the proposed assignment is valid; False otherwise
@@ -44,4 +46,12 @@ abstract class HardConstraint {
   String getDescription() {
     return getClass().getName();
   }
+
+  /**
+   * Sets the flag to enable constraint level logging
+   */
+  public void setEnableLogging(boolean enableLogging) {
+    this.enableLogging = enableLogging;
+  }
+
 }
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java
index 056d7f842..a686037de 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java
@@ -40,8 +40,10 @@ class NodeCapacityConstraint extends HardConstraint {
     for (String key : replicaCapacity.keySet()) {
       if (nodeCapacity.containsKey(key)) {
         if (nodeCapacity.get(key) < replicaCapacity.get(key)) {
-          LOG.debug("Node has insufficient capacity for: {}. Left available: 
{}, Required: {}",
-                  key, nodeCapacity.get(key), replicaCapacity.get(key));
+          if (enableLogging) {
+            LOG.info("Node has insufficient capacity for: {}. Left available: 
{}, Required: {}",
+                key, nodeCapacity.get(key), replicaCapacity.get(key));
+          }
           return false;
         }
       }
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java
index deedf4aa5..08b2ee19e 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java
@@ -48,8 +48,10 @@ class NodeMaxPartitionLimitConstraint extends HardConstraint 
{
         || assignedPartitionsByResourceSize < resourceMaxPartitionsPerInstance;
 
     if (!exceedResourceMaxPartitionLimit) {
-      LOG.debug("Cannot exceed the max number of partitions per resource ({}) 
limitation on node. Assigned replica count: {}",
-          resourceMaxPartitionsPerInstance, assignedPartitionsByResourceSize);
+      if (enableLogging) {
+        LOG.info("Cannot exceed the max number of partitions per resource ({}) 
limitation on node. Assigned replica count: {}",
+            resourceMaxPartitionsPerInstance, 
assignedPartitionsByResourceSize);
+      }
       return false;
     }
     return true;
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java
index 13e380a0a..efee2a219 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java
@@ -38,7 +38,9 @@ class ReplicaActivateConstraint extends HardConstraint {
     List<String> disabledPartitions = 
node.getDisabledPartitionsMap().get(replica.getResourceName());
 
     if (disabledPartitions != null && 
disabledPartitions.contains(replica.getPartitionName())) {
-      LOG.debug("Cannot assign the inactive replica: {}", 
replica.getPartitionName());
+      if (enableLogging) {
+        LOG.info("Cannot assign the inactive replica: {}", 
replica.getPartitionName());
+      }
       return false;
     }
     return true;
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java
index e6de9989c..3dd7c65bb 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java
@@ -37,7 +37,9 @@ class SamePartitionOnInstanceConstraint extends 
HardConstraint {
     Set<String> assignedPartitionsByResource = 
node.getAssignedPartitionsByResource(replica.getResourceName());
 
     if (assignedPartitionsByResource.contains(replica.getPartitionName())) {
-      LOG.debug("Same partition ({}) of different states cannot co-exist in 
one instance", replica.getPartitionName());
+      if (enableLogging) {
+        LOG.info("Same partition ({}) of different states cannot co-exist in 
one instance", replica.getPartitionName());
+      }
       return false;
     }
     return true;
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java
index b1e257c87..d0609e005 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java
@@ -38,7 +38,9 @@ class ValidGroupTagConstraint extends HardConstraint {
     }
 
     if 
(!node.getInstanceTags().contains(replica.getResourceInstanceGroupTag())) {
-      LOG.debug("Instance doesn't have the tag of the replica ({})", 
replica.getResourceInstanceGroupTag());
+      if (enableLogging) {
+        LOG.info("Instance doesn't have the tag of the replica ({})", 
replica.getResourceInstanceGroupTag());
+      }
       return false;
     }
     return true;
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java
 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java
index 1b3ad2a87..32debca65 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java
@@ -183,6 +183,10 @@ public class ClusterContext {
     return _clusterCapacityMap;
   }
 
+  public String getClusterName() {
+    return _clusterName;
+  }
+
   public Set<String> getPartitionsForResourceAndFaultZone(String resourceName, 
String faultZoneId) {
     return _assignmentForFaultZoneMap.getOrDefault(faultZoneId, 
Collections.emptyMap())
         .getOrDefault(resourceName, Collections.emptySet());
diff --git 
a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java
 
b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java
index 0a75002f1..1b7b1b944 100644
--- 
a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java
+++ 
b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java
@@ -19,12 +19,13 @@ package 
org.apache.helix.controller.rebalancer.waged.constraints;
  * under the License.
  */
 
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 import org.apache.helix.HelixRebalanceException;
 import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
 import org.apache.helix.controller.rebalancer.waged.model.ClusterModel;
@@ -33,14 +34,20 @@ import 
org.apache.helix.controller.rebalancer.waged.model.OptimalAssignment;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
+import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.atLeastOnce;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 
 public class TestConstraintBasedAlgorithm {
-  @Test(expectedExceptions = HelixRebalanceException.class)
-  public void testCalculateNoValidAssignment() throws IOException, 
HelixRebalanceException {
+
+  @Test
+  public void testCalculateNoValidAssignment() throws IOException {
     HardConstraint mockHardConstraint = mock(HardConstraint.class);
     SoftConstraint mockSoftConstraint = mock(SoftConstraint.class);
     when(mockHardConstraint.isAssignmentValid(any(), any(), 
any())).thenReturn(false);
@@ -49,7 +56,40 @@ public class TestConstraintBasedAlgorithm {
         new ConstraintBasedAlgorithm(ImmutableList.of(mockHardConstraint),
             ImmutableMap.of(mockSoftConstraint, 1f));
     ClusterModel clusterModel = new 
ClusterModelTestHelper().getDefaultClusterModel();
+    try {
+      algorithm.calculate(clusterModel);
+    } catch (HelixRebalanceException ex) {
+      Assert.assertEquals(ex.getFailureType(), 
HelixRebalanceException.Type.FAILED_TO_CALCULATE);
+    }
+
+    verify(mockHardConstraint, times(1)).setEnableLogging(eq(true));
+    verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), 
any());
+  }
+
+  @Test
+  public void testCalculateNoValidAssignmentFirstAndThenRecovery() throws 
IOException, HelixRebalanceException {
+    HardConstraint mockHardConstraint = mock(HardConstraint.class);
+    SoftConstraint mockSoftConstraint = mock(SoftConstraint.class);
+    when(mockHardConstraint.isAssignmentValid(any(), any(), any()))
+        .thenReturn(false) // hard constraint fails
+        .thenReturn(true); // hard constraint recovers
+    when(mockSoftConstraint.getAssignmentNormalizedScore(any(), any(), 
any())).thenReturn(1.0);
+    ConstraintBasedAlgorithm algorithm =
+        new ConstraintBasedAlgorithm(ImmutableList.of(mockHardConstraint),
+            ImmutableMap.of(mockSoftConstraint, 1f));
+    ClusterModel clusterModel = new 
ClusterModelTestHelper().getDefaultClusterModel();
+    try {
+      algorithm.calculate(clusterModel);
+    } catch (HelixRebalanceException ex) {
+      Assert.assertEquals(ex.getFailureType(), 
HelixRebalanceException.Type.FAILED_TO_CALCULATE);
+    }
+
+    verify(mockHardConstraint, times(1)).setEnableLogging(eq(true));
+    verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), 
any());
+
+    // calling again for recovery (no exception)
     algorithm.calculate(clusterModel);
+    verify(mockHardConstraint, atLeastOnce()).setEnableLogging(eq(false));
   }
 
   @Test

Reply via email to