This is an automated email from the ASF dual-hosted git repository.
ethanfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new dfb18072e [CELEBORN-932][FOLLOWUP] Remove
StatusSystem#handleWorkerRemove from RegisterWorker to avoid duplicated
behavior in RegisterWorker
dfb18072e is described below
commit dfb18072e261f4853afd7e71d4f650d93e4d3193
Author: SteNicholas <[email protected]>
AuthorDate: Wed Sep 18 14:53:04 2024 +0800
[CELEBORN-932][FOLLOWUP] Remove StatusSystem#handleWorkerRemove from
RegisterWorker to avoid duplicated behavior in RegisterWorker
### What changes were proposed in this pull request?
Remove `StatusSystem#handleWorkerRemove` from `RegisterWorker` to avoid
duplicated behavior in `RegisterWorker`.
### Why are the changes needed?
`RegisterWorker` has already been improved to cover the behavior of
`StatusSystem#handleWorkerRemove`. Therefore, `StatusSystem#handleWorkerRemove`
is recommend to remove from `RegisterWorker` for avoiding duplicated behavior
in `RegisterWorker`.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
CI.
Closes #2731 from SteNicholas/CELEBORN-932.
Authored-by: SteNicholas <[email protected]>
Signed-off-by: mingji <[email protected]>
---
.../master/clustermeta/IMetadataHandler.java | 3 ---
.../clustermeta/SingleMasterMetaManager.java | 6 ------
.../master/clustermeta/ha/HAMasterMetaManager.java | 23 ----------------------
.../deploy/master/clustermeta/ha/MetaHandler.java | 1 +
.../celeborn/service/deploy/master/Master.scala | 5 +----
5 files changed, 2 insertions(+), 36 deletions(-)
diff --git
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/IMetadataHandler.java
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/IMetadataHandler.java
index 2c3a596f1..e9dcb3191 100644
---
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/IMetadataHandler.java
+++
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/IMetadataHandler.java
@@ -47,9 +47,6 @@ public interface IMetadataHandler {
void handleWorkerLost(
String host, int rpcPort, int pushPort, int fetchPort, int
replicatePort, String requestId);
- void handleWorkerRemove(
- String host, int rpcPort, int pushPort, int fetchPort, int
replicatePort, String requestId);
-
void handleRemoveWorkersUnavailableInfo(List<WorkerInfo> unavailableWorkers,
String requestId);
void handleWorkerHeartbeat(
diff --git
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/SingleMasterMetaManager.java
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/SingleMasterMetaManager.java
index fd79eaa84..2adde50b6 100644
---
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/SingleMasterMetaManager.java
+++
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/SingleMasterMetaManager.java
@@ -88,12 +88,6 @@ public class SingleMasterMetaManager extends
AbstractMetaManager {
updateWorkerLostMeta(host, rpcPort, pushPort, fetchPort, replicatePort);
}
- @Override
- public void handleWorkerRemove(
- String host, int rpcPort, int pushPort, int fetchPort, int
replicatePort, String requestId) {
- updateWorkerRemoveMeta(host, rpcPort, pushPort, fetchPort, replicatePort);
- }
-
@Override
public void handleRemoveWorkersUnavailableInfo(
List<WorkerInfo> unavailableWorkers, String requestId) {
diff --git
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAMasterMetaManager.java
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAMasterMetaManager.java
index 15183d8ba..738fe6eb6 100644
---
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAMasterMetaManager.java
+++
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/HAMasterMetaManager.java
@@ -199,29 +199,6 @@ public class HAMasterMetaManager extends
AbstractMetaManager {
}
}
- @Override
- public void handleWorkerRemove(
- String host, int rpcPort, int pushPort, int fetchPort, int
replicatePort, String requestId) {
- try {
- ratisServer.submitRequest(
- ResourceRequest.newBuilder()
- .setCmdType(Type.WorkerRemove)
- .setRequestId(requestId)
- .setWorkerRemoveRequest(
- ResourceProtos.WorkerRemoveRequest.newBuilder()
- .setHost(host)
- .setRpcPort(rpcPort)
- .setPushPort(pushPort)
- .setFetchPort(fetchPort)
- .setReplicatePort(replicatePort)
- .build())
- .build());
- } catch (CelebornRuntimeException e) {
- LOG.error("Handle worker lost for {} failed!", host, e);
- throw e;
- }
- }
-
@Override
public void handleRemoveWorkersUnavailableInfo(
List<WorkerInfo> unavailableWorkers, String requestId) {
diff --git
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/MetaHandler.java
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/MetaHandler.java
index 77d68c598..bb16c1904 100644
---
a/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/MetaHandler.java
+++
b/master/src/main/java/org/apache/celeborn/service/deploy/master/clustermeta/ha/MetaHandler.java
@@ -156,6 +156,7 @@ public class MetaHandler {
break;
case WorkerRemove:
+ // TODO: Remove `WorkerRemove` in 0.7.x version to guarantee upgrade
compatibility.
host = request.getWorkerRemoveRequest().getHost();
rpcPort = request.getWorkerRemoveRequest().getRpcPort();
pushPort = request.getWorkerRemoveRequest().getPushPort();
diff --git
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
index 90a0e6db5..4c65e9816 100644
---
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
+++
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
@@ -771,9 +771,6 @@ private[celeborn] class Master(
if (statusSystem.workers.contains(workerToRegister)) {
logWarning(s"Receive RegisterWorker while worker" +
s" ${workerToRegister.toString()} already exists, re-register.")
- // TODO: remove `WorkerRemove` because we have improve register logic to
cover `WorkerRemove`
- statusSystem.handleWorkerRemove(host, rpcPort, pushPort, fetchPort,
replicatePort, requestId)
- val newRequestId = MasterClient.genRequestId()
statusSystem.handleRegisterWorker(
host,
rpcPort,
@@ -784,7 +781,7 @@ private[celeborn] class Master(
networkLocation,
disks,
userResourceConsumption,
- newRequestId)
+ requestId)
context.reply(RegisterWorkerResponse(true, "Worker in snapshot,
re-register."))
} else if (statusSystem.workerLostEvents.contains(workerToRegister)) {
logWarning(s"Receive RegisterWorker while worker $workerToRegister " +