This is an automated email from the ASF dual-hosted git repository.
jiajunwang pushed a commit to branch helix-0.9.x
in repository https://gitbox.apache.org/repos/asf/helix.git
The following commit(s) were added to refs/heads/helix-0.9.x by this push:
new 75b9688 Fix flaky test testGetChildrenOnLargeNumChildren (#1194)
75b9688 is described below
commit 75b9688c7e089f6e5b06a75237e6fa84a4d995cc
Author: Huizhi Lu <[email protected]>
AuthorDate: Wed Aug 5 10:56:55 2020 -0700
Fix flaky test testGetChildrenOnLargeNumChildren (#1194)
testGetChildrenOnLargeNumChildren becomes flaky after more commits are
checked in because of reflection doesn't work as expected. This commit fixes it
by replacing reflection with creating 110K children for the test.
---
.../helix/manager/zk/zookeeper/ZkClient.java | 10 +--
.../apache/helix/manager/zk/TestRawZkClient.java | 75 +++++++++-------------
2 files changed, 33 insertions(+), 52 deletions(-)
diff --git
a/helix-core/src/main/java/org/apache/helix/manager/zk/zookeeper/ZkClient.java
b/helix-core/src/main/java/org/apache/helix/manager/zk/zookeeper/ZkClient.java
index 89b3c7a..6f7c8ef 100644
---
a/helix-core/src/main/java/org/apache/helix/manager/zk/zookeeper/ZkClient.java
+++
b/helix-core/src/main/java/org/apache/helix/manager/zk/zookeeper/ZkClient.java
@@ -77,8 +77,9 @@ public class ZkClient implements Watcher {
// If number of children exceeds this limit, getChildren() should not retry
on connection loss.
// This is a workaround for exiting retry on connection loss because of
large number of children.
+ // 100K is specific for helix messages which use UUID, making packet length
just below 4 MB.
// TODO: remove it once we have a better way to exit retry for this case
- private static final int NUM_CHILDREN_LIMIT;
+ private static final int NUM_CHILDREN_LIMIT = 100 * 1000;
private final IZkConnection _connection;
private final long _operationRetryTimeoutInMillis;
@@ -96,13 +97,6 @@ public class ZkClient implements Watcher {
private PathBasedZkSerializer _pathBasedZkSerializer;
private ZkClientMonitor _monitor;
- static {
- // 100K is specific for helix messages which use UUID, making packet
length just below 4 MB.
- // Set it here for unit test to use reflection to change value
- // because compilers optimize constants by replacing them inline.
- NUM_CHILDREN_LIMIT = 100 * 1000;
- }
-
private class IZkDataListenerEntry {
final IZkDataListener _dataListener;
final boolean _prefetchData;
diff --git
a/helix-core/src/test/java/org/apache/helix/manager/zk/TestRawZkClient.java
b/helix-core/src/test/java/org/apache/helix/manager/zk/TestRawZkClient.java
index 9cea4c2..1cc2f2f 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestRawZkClient.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestRawZkClient.java
@@ -27,6 +27,7 @@ import org.apache.helix.SystemPropertyKeys;
import org.apache.helix.TestHelper;
import org.apache.helix.ZNRecord;
import org.apache.helix.ZkUnitTestBase;
+import org.apache.helix.common.ZkTestBase;
import org.apache.helix.manager.zk.zookeeper.ZkConnection;
import org.apache.helix.monitoring.mbeans.MBeanRegistrar;
import org.apache.helix.monitoring.mbeans.MonitorDomainNames;
@@ -44,8 +45,8 @@ import org.testng.annotations.Test;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import java.lang.management.ManagementFactory;
-import java.lang.reflect.Field;
-import java.lang.reflect.Modifier;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
@@ -413,66 +414,52 @@ public class TestRawZkClient extends ZkUnitTestBase {
}
}
-
/*
* Tests getChildren() when there are an excessive number of children and
connection loss happens,
* the operation should terminate and exit retry loop.
*/
- @Test
+ @Test(timeOut = 30 * 1000L)
public void testGetChildrenOnLargeNumChildren() throws Exception {
- // Default packetLen is 4M. It is static final and initialized
- // when first zkClient is created.
- // So we could not just set "jute.maxbuffer" to change the value.
- // Reflection is needed to change the value.
- // Remove "final" modifier
- Field modifiersField = Field.class.getDeclaredField("modifiers");
- boolean isModifierAccessible = modifiersField.isAccessible();
- modifiersField.setAccessible(true);
-
- Field packetLenField = ClientCnxn.class.getDeclaredField("packetLen");
- Field childrenLimitField =
-
org.apache.helix.manager.zk.zookeeper.ZkClient.class.getDeclaredField("NUM_CHILDREN_LIMIT");
- modifiersField.setInt(packetLenField, packetLenField.getModifiers() &
~Modifier.FINAL);
- modifiersField.setInt(childrenLimitField,
childrenLimitField.getModifiers() & ~Modifier.FINAL);
-
- boolean isPacketLenAccessible = packetLenField.isAccessible();
- packetLenField.setAccessible(true);
- int originPacketLen = packetLenField.getInt(null);
- // Keep 150 bytes for successfully creating each child node.
- packetLenField.set(null, 150);
-
- boolean isChildrenLimitAccessible = childrenLimitField.isAccessible();
- childrenLimitField.setAccessible(true);
- int originChildrenLimit = childrenLimitField.getInt(null);
- childrenLimitField.set(null, 2);
-
- String path = "/" + TestHelper.getTestMethodName();
- // Create 5 children to make packet length of children exceed 150 bytes
+ final String methodName = TestHelper.getTestMethodName();
+ System.out.println("Start test: " + methodName);
+ // Create 110K children to make packet length of children exceed 4 MB
// and cause connection loss for getChildren() operation
- for (int i = 0; i < 5; i++) {
- _zkClient.createPersistent(path + "/" + UUID.randomUUID().toString(),
true);
+ String path = "/" + methodName;
+
+ _zkClient.createPersistent(path);
+
+ for (int i = 0; i < 110; i++) {
+ List<Op> ops = new ArrayList<>(1000);
+ for (int j = 0; j < 1000; j++) {
+ String childPath = path + "/" + UUID.randomUUID().toString();
+ // Create ephemeral nodes so closing zkClient deletes them for cleanup
+ ops.add(
+ Op.create(childPath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE,
CreateMode.EPHEMERAL));
+ }
+ // Reduce total creation time by batch creating znodes
+ _zkClient.multi(ops);
}
try {
_zkClient.getChildren(path);
- Assert.fail("Should not successfully get children.");
+ Assert.fail("Should not successfully get children because of connection
loss.");
} catch (ZkException expected) {
Assert.assertEquals(expected.getMessage(),
"org.apache.zookeeper.KeeperException$MarshallingErrorException:
"
+ "KeeperErrorCode = MarshallingError");
} finally {
- packetLenField.set(null, originPacketLen);
- packetLenField.setAccessible(isPacketLenAccessible);
-
- childrenLimitField.set(null, originChildrenLimit);
- childrenLimitField.setAccessible(isChildrenLimitAccessible);
-
- modifiersField.setAccessible(isModifierAccessible);
+ // Delete children ephemeral znodes
+ _zkClient.close();
+ _zkClient = new ZkClient(ZkTestBase.ZK_ADDR);
Assert.assertTrue(TestHelper.verify(() -> {
- _zkClient.deleteRecursively(path);
- return !_zkClient.exists(path);
+ try {
+ return _zkClient.delete(path);
+ } catch (ZkException e) {
+ return false;
+ }
}, TestHelper.WAIT_DURATION));
}
+ System.out.println("End test: " + methodName);
}
}