This is an automated email from the ASF dual-hosted git repository.

zhouky pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new 1109e2c8f [CELEBORN-803][FOLLOWUP] Make ```rpcAskTimeout``` default to 
60s
1109e2c8f is described below

commit 1109e2c8f4468ab6d19632a6f3e7afba89e9b913
Author: zky.zhoukeyong <[email protected]>
AuthorDate: Mon Jul 17 23:53:52 2023 +0800

    [CELEBORN-803][FOLLOWUP] Make ```rpcAskTimeout``` default to 60s
    
    ### What changes were proposed in this pull request?
    As title.
    
    ### Why are the changes needed?
    Timeout of ```RpcEndpointRef.ask``` is controlled by 
```celeborn.rpc.askTimeout```,
    so we also need to increase ```celeborn.rpc.askTimeout``` to extend the 
timeout of commit files.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Passes GA and manual test.
    
    Closes #1725 from waitinfuture/803-fu.
    
    Authored-by: zky.zhoukeyong <[email protected]>
    Signed-off-by: zky.zhoukeyong <[email protected]>
---
 common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala | 4 ++--
 docs/configuration/client.md                                        | 2 +-
 docs/configuration/network.md                                       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git 
a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala 
b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
index bedf5f3b4..56e1d3895 100644
--- a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
+++ b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
@@ -1289,7 +1289,7 @@ object CelebornConf extends Logging {
       .doc("Timeout for RPC ask operations. " +
         "It's recommended to set at least `240s` when `HDFS` is enabled in 
`celeborn.storage.activeTypes`")
       .timeConf(TimeUnit.MILLISECONDS)
-      .createWithDefaultString("30s")
+      .createWithDefaultString("60s")
 
   val RPC_DISPATCHER_THREADS: OptionalConfigEntry[Int] =
     buildConf("celeborn.rpc.dispatcher.threads")
@@ -3191,7 +3191,7 @@ object CelebornConf extends Logging {
       .version("0.3.0")
       .intConf
       .checkValue(v => v > 0, "value must be positive")
-      .createWithDefault(3)
+      .createWithDefault(4)
 
   val CLIENT_COMMIT_IGNORE_EXCLUDED_WORKERS: ConfigEntry[Boolean] =
     buildConf("celeborn.client.commitFiles.ignoreExcludedWorker")
diff --git a/docs/configuration/client.md b/docs/configuration/client.md
index 2a92c35e3..df6799e33 100644
--- a/docs/configuration/client.md
+++ b/docs/configuration/client.md
@@ -62,7 +62,7 @@ license: |
 | celeborn.client.push.timeout | 120s | Timeout for a task to push data rpc 
message. This value should better be more than twice of 
`celeborn.<module>.push.timeoutCheck.interval` | 0.3.0 | 
 | celeborn.client.registerShuffle.maxRetries | 3 | Max retry times for client 
to register shuffle. | 0.3.0 | 
 | celeborn.client.registerShuffle.retryWait | 3s | Wait time before next retry 
if register shuffle failed. | 0.3.0 | 
-| celeborn.client.requestCommitFiles.maxRetries | 3 | Max retry times for 
requestCommitFiles RPC. | 0.3.0 | 
+| celeborn.client.requestCommitFiles.maxRetries | 4 | Max retry times for 
requestCommitFiles RPC. | 0.3.0 | 
 | celeborn.client.reserveSlots.maxRetries | 3 | Max retry times for client to 
reserve slots. | 0.3.0 | 
 | celeborn.client.reserveSlots.rackware.enabled | false | Whether need to 
place different replicates on different racks when allocating slots. | 0.3.0 | 
 | celeborn.client.reserveSlots.retryWait | 3s | Wait time before next retry if 
reserve slots failed. | 0.3.0 | 
diff --git a/docs/configuration/network.md b/docs/configuration/network.md
index cc66b985b..52b39b483 100644
--- a/docs/configuration/network.md
+++ b/docs/configuration/network.md
@@ -44,7 +44,7 @@ license: |
 | celeborn.network.memory.allocator.verbose.metric | false | Weather to enable 
verbose metric for pooled allocator. | 0.3.0 | 
 | celeborn.network.timeout | 240s | Default timeout for network operations. | 
0.2.0 | 
 | celeborn.port.maxRetries | 1 | When port is occupied, we will retry for max 
retry times. | 0.2.0 | 
-| celeborn.rpc.askTimeout | 30s | Timeout for RPC ask operations. It's 
recommended to set at least `240s` when `HDFS` is enabled in 
`celeborn.storage.activeTypes` | 0.2.0 | 
+| celeborn.rpc.askTimeout | 60s | Timeout for RPC ask operations. It's 
recommended to set at least `240s` when `HDFS` is enabled in 
`celeborn.storage.activeTypes` | 0.2.0 | 
 | celeborn.rpc.connect.threads | 64 |  | 0.2.0 | 
 | celeborn.rpc.dispatcher.threads | &lt;undefined&gt; | Threads number of 
message dispatcher event loop | 0.3.0 | 
 | celeborn.rpc.io.threads | &lt;undefined&gt; | Netty IO thread number of 
NettyRpcEnv to handle RPC request. The default threads number is the number of 
runtime available processors. | 0.2.0 | 

Reply via email to