This is an automated email from the ASF dual-hosted git repository.
jackylee-ch pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 54bac26b9c [GLUTEN-12350][CORE] Place ConsistentHash virtual nodes by
Node.key() instead of toString() (#12351)
54bac26b9c is described below
commit 54bac26b9cdd535fff95df6590189d9da7576766
Author: YangJie <[email protected]>
AuthorDate: Fri Jun 26 16:48:12 2026 +0800
[GLUTEN-12350][CORE] Place ConsistentHash virtual nodes by Node.key()
instead of toString() (#12351)
---
.../org/apache/gluten/hash/ConsistentHash.java | 62 ++++++++++++++--------
.../org/apache/gluten/hash/ConsistentHashTest.java | 48 +++++++++++++++++
2 files changed, 88 insertions(+), 22 deletions(-)
diff --git
a/gluten-core/src/main/java/org/apache/gluten/hash/ConsistentHash.java
b/gluten-core/src/main/java/org/apache/gluten/hash/ConsistentHash.java
index e3edb34451..b432049645 100644
--- a/gluten-core/src/main/java/org/apache/gluten/hash/ConsistentHash.java
+++ b/gluten-core/src/main/java/org/apache/gluten/hash/ConsistentHash.java
@@ -199,28 +199,34 @@ public class ConsistentHash<T extends
ConsistentHash.Node> {
}
private boolean add(T node) {
- boolean added = false;
- if (node != null && !nodes.containsKey(node)) {
- Set<Partition<T>> partitions =
- IntStream.range(0, replicate)
- .mapToObj(idx -> new Partition<T>(node, idx))
- .collect(Collectors.toSet());
- nodes.put(node, partitions);
-
- // allocate slot.
- for (Partition<T> partition : partitions) {
- long slot;
- int seed = 0;
- do {
- slot = this.hasher.hash(partition.getPartitionKey(), seed++);
- } while (ring.containsKey(slot));
-
- partition.setSlot(slot);
- ring.put(slot, partition);
- }
- added = true;
+ if (node == null) {
+ return false;
+ }
+ // Validate the key before any map lookup: nodes.containsKey() invokes the
node's
+ // hashCode()/equals(), which may themselves dereference key() and NPE on
a null key, masking
+ // the intended fail-fast here.
+ Preconditions.checkArgument(node.key() != null, "Node key must not be
null: %s", node);
+ if (nodes.containsKey(node)) {
+ return false;
}
- return added;
+ Set<Partition<T>> partitions =
+ IntStream.range(0, replicate)
+ .mapToObj(idx -> new Partition<T>(node, idx))
+ .collect(Collectors.toSet());
+ nodes.put(node, partitions);
+
+ // allocate slot.
+ for (Partition<T> partition : partitions) {
+ long slot;
+ int seed = 0;
+ do {
+ slot = this.hasher.hash(partition.getPartitionKey(), seed++);
+ } while (ring.containsKey(slot));
+
+ partition.setSlot(slot);
+ ring.put(slot, partition);
+ }
+ return true;
}
public static class Partition<T extends ConsistentHash.Node> {
@@ -228,15 +234,21 @@ public class ConsistentHash<T extends
ConsistentHash.Node> {
private final int index;
+ // Hash the node by its logical key() so the ring is stable and
reproducible across JVMs (avoid
+ // node.toString(), which may fall back to the identity hash). Computed
once: the key is
+ // immutable and re-read on every collision-retry in add().
+ private final String partitionKey;
+
private long slot;
public Partition(T node, int index) {
this.node = node;
this.index = index;
+ this.partitionKey = node.key() + ":" + index;
}
public String getPartitionKey() {
- return String.format("%s:%d", node, index);
+ return partitionKey;
}
public T getNode() {
@@ -254,6 +266,12 @@ public class ConsistentHash<T extends ConsistentHash.Node>
{
/** Base interface for the node in the ring. */
public interface Node {
+ /**
+ * A stable, non-null identifier for this node. The ring hashes each
virtual node by this key,
+ * so it MUST be deterministic across JVMs and process restarts (e.g.
derived from a host or
+ * executor id) and consistent with {@link Object#equals}: two equal nodes
must return the same
+ * key. Returning identity- or {@code toString()}-based values breaks ring
stability.
+ */
String key();
}
diff --git
a/gluten-core/src/test/java/org/apache/gluten/hash/ConsistentHashTest.java
b/gluten-core/src/test/java/org/apache/gluten/hash/ConsistentHashTest.java
index 7b27594202..875fc46082 100644
--- a/gluten-core/src/test/java/org/apache/gluten/hash/ConsistentHashTest.java
+++ b/gluten-core/src/test/java/org/apache/gluten/hash/ConsistentHashTest.java
@@ -20,6 +20,8 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
@@ -115,6 +117,52 @@ public class ConsistentHashTest {
}
}
+ @Test
+ public void testRingHashesOnNodeKeyNotToString() {
+ // A node whose key() differs from toString(). The ring must hash
partitions by key() so the
+ // distribution is stable/reproducible; relying on toString() would
silently break any Node
+ // that overrides only the contractual key().
+ final List<String> hashedKeys = new ArrayList<>();
+ ConsistentHash.Hasher recordingHasher =
+ (key, seed) -> {
+ hashedKeys.add(key);
+ return key.hashCode() + seed;
+ };
+ ConsistentHash<ConsistentHash.Node> ring = new ConsistentHash<>(REPLICAS,
recordingHasher);
+
+ ConsistentHash.Node node =
+ new ConsistentHash.Node() {
+ @Override
+ public String key() {
+ return "logical-key";
+ }
+
+ @Override
+ public String toString() {
+ return "DISPLAY-ONLY";
+ }
+ };
+ ring.addNode(node);
+
+ // Every partition key fed to the hasher must derive from key(), never
from toString().
+ Assert.assertEquals(REPLICAS, hashedKeys.size());
+ Assert.assertTrue(hashedKeys.stream().allMatch(k ->
k.startsWith("logical-key:")));
+ Assert.assertTrue(hashedKeys.stream().noneMatch(k ->
k.contains("DISPLAY-ONLY")));
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testAddNodeRejectsNullKey() {
+ // The ring hashes on key(); a null key would silently collapse distinct
nodes onto identical
+ // hash inputs, so it must fail fast.
+ consistentHash.addNode(
+ new ConsistentHash.Node() {
+ @Override
+ public String key() {
+ return null;
+ }
+ });
+ }
+
private static class HostNode implements ConsistentHash.Node {
private final String host;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]