This is an automated email from the ASF dual-hosted git repository.
zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new c826cf5a1 improvement(test): Clean temp dir and allocate real ports
for MiniDFSCluster (#2425)
c826cf5a1 is described below
commit c826cf5a13f30dc5b30d0b4462b9e1d5f806f101
Author: Guoyuan <[email protected]>
AuthorDate: Thu Apr 3 10:09:03 2025 +0800
improvement(test): Clean temp dir and allocate real ports for
MiniDFSCluster (#2425)
### What changes were proposed in this pull request?
This PR improves the `KerberizedHadoop` test base class by addressing two
main issues:
1. **Temporary Directory Cleanup**:
Previously created temporary directories (e.g., `tempDir`,
`kerberizedDfsBaseDir`) were not deleted after the test execution, which could
lead to filesystem clutter. This PR ensures that they are properly cleaned up
in the `tearDown()` method.
2. **Port Allocation Simplification and Safety**:
Replaced the manual port selection logic (`findAvailablePorts`) with
dynamic port allocation by configuring HDFS to use port `0`. This avoids the
race condition where manually selected ports may be taken by other processes
before being bound.
### Why are the changes needed?
- To avoid polluting the `/tmp` filesystem with leftover directories.
- To make the test setup more robust and avoid flaky failures caused by
port conflicts.
- To reduce unnecessary complexity and improve maintainability.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
- Existing tests using `KerberizedHadoop` were run multiple times to ensure
stability.
- Verified that no temporary directories remain after tests.
- Verified that port binding is successful and dynamic allocation works as
expected.
---
.../apache/uniffle/common/KerberizedHadoop.java | 73 +++++++++++-----------
1 file changed, 37 insertions(+), 36 deletions(-)
diff --git
a/common/src/test/java/org/apache/uniffle/common/KerberizedHadoop.java
b/common/src/test/java/org/apache/uniffle/common/KerberizedHadoop.java
index 71a09d26b..4da690825 100644
--- a/common/src/test/java/org/apache/uniffle/common/KerberizedHadoop.java
+++ b/common/src/test/java/org/apache/uniffle/common/KerberizedHadoop.java
@@ -24,13 +24,11 @@ import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.net.BindException;
-import java.net.ServerSocket;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.PrivilegedExceptionAction;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.Comparator;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
@@ -100,6 +98,9 @@ public class KerberizedHadoop implements Serializable {
tempDir = Files.createTempDirectory("tempDir").toFile().toPath();
kerberizedDfsBaseDir =
Files.createTempDirectory("kerberizedDfsBaseDir").toFile().toPath();
+ registerShutdownCleanup(tempDir);
+ registerShutdownCleanup(kerberizedDfsBaseDir);
+
startKDC();
try {
startKerberizedDFS();
@@ -198,27 +199,23 @@ public class KerberizedHadoop implements Serializable {
this.kerberizedDfsCluster =
RetryUtils.retry(
() -> {
- List<Integer> ports = findAvailablePorts(5);
- LOGGER.info("Find available ports: {}", ports);
-
- hdfsConf.set("dfs.datanode.ipc.address", "0.0.0.0:" +
ports.get(0));
- hdfsConf.set("dfs.datanode.address", "0.0.0.0:" + ports.get(1));
- hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" +
ports.get(2));
- hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" +
ports.get(3));
-
- return ugi.doAs(
- new PrivilegedExceptionAction<MiniDFSCluster>() {
-
- @Override
- public MiniDFSCluster run() throws Exception {
- return new MiniDFSCluster.Builder(hdfsConf)
- .nameNodePort(ports.get(4))
- .numDataNodes(1)
- .clusterId("kerberized-cluster-1")
- .checkDataNodeAddrConfig(true)
- .build();
- }
- });
+ hdfsConf.set("dfs.datanode.ipc.address", "0.0.0.0:0");
+ hdfsConf.set("dfs.datanode.address", "0.0.0.0:0");
+ hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:0");
+
+ MiniDFSCluster cluster =
ugi.doAs((PrivilegedExceptionAction<MiniDFSCluster>) () ->
+ new MiniDFSCluster.Builder(hdfsConf)
+ .numDataNodes(1)
+ .clusterId("kerberized-cluster-1")
+ .checkDataNodeAddrConfig(true)
+ .format(true)
+ .build()
+ );
+
+ LOGGER.info("NameNode: {}",
cluster.getNameNode().getHttpAddress());
+ LOGGER.info("DataNode: {}",
cluster.getDataNodes().get(0).getXferAddress());
+
+ return cluster;
},
1000L,
5,
@@ -263,21 +260,25 @@ public class KerberizedHadoop implements Serializable {
UserGroupInformation.reset();
}
- private List<Integer> findAvailablePorts(int num) throws IOException {
- List<ServerSocket> sockets = new ArrayList<>();
- List<Integer> ports = new ArrayList<>();
-
- for (int i = 0; i < num; i++) {
- ServerSocket socket = new ServerSocket(0);
- ports.add(socket.getLocalPort());
- sockets.add(socket);
+ private void registerShutdownCleanup(Path dir) {
+ if (dir != null) {
+ Runtime.getRuntime().addShutdownHook(new Thread(() -> {
+ try {
+ deleteDirectory(dir);
+ } catch (IOException e) {
+ LOGGER.warn("Failed to delete temp dir on shutdown: {}", dir, e);
+ }
+ }));
}
+ }
- for (ServerSocket socket : sockets) {
- socket.close();
+ private void deleteDirectory(Path dir) throws IOException {
+ if (dir != null && Files.exists(dir)) {
+ Files.walk(dir)
+ .sorted(Comparator.reverseOrder())
+ .map(Path::toFile)
+ .forEach(File::delete);
}
-
- return ports;
}
public String getSchemeAndAuthorityPrefix() {