This is an automated email from the ASF dual-hosted git repository.

angerszhuuuu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new 0bf841006 [CELEBORN-663][REFACTOR] Refine RssInputStream logs to help 
admin debug
0bf841006 is described below

commit 0bf841006aa7d97dd14cdc1ffba04f371a6b31fe
Author: Angerszhuuuu <[email protected]>
AuthorDate: Mon Jun 12 18:49:41 2023 +0800

    [CELEBORN-663][REFACTOR] Refine RssInputStream logs to help admin debug
    
    ### What changes were proposed in this pull request?
    Refine RssInputStream logs to indicate the failed location's info to help 
admin debug
    
    ### Why are the changes needed?
    
    ### Does this PR introduce _any_ user-facing change?
    
    ### How was this patch tested?
    
    Closes #1576 from AngersZhuuuu/CELEBORN-663.
    
    Authored-by: Angerszhuuuu <[email protected]>
    Signed-off-by: Angerszhuuuu <[email protected]>
---
 .../celeborn/client/read/RssInputStream.java       | 25 ++++++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git 
a/client/src/main/java/org/apache/celeborn/client/read/RssInputStream.java 
b/client/src/main/java/org/apache/celeborn/client/read/RssInputStream.java
index 1e0b26b52..874445083 100644
--- a/client/src/main/java/org/apache/celeborn/client/read/RssInputStream.java
+++ b/client/src/main/java/org/apache/celeborn/client/read/RssInputStream.java
@@ -240,21 +240,23 @@ public abstract class RssInputStream extends InputStream {
             }
             location = location.getPeer();
             logger.warn(
-                "CreatePartitionReader failed {}/{} times, change to peer",
+                "CreatePartitionReader failed {}/{} times for location {}, 
change to peer",
                 fetchChunkRetryCnt,
                 fetchChunkMaxRetry,
+                location,
                 e);
           } else {
             logger.warn(
-                "CreatePartitionReader failed {}/{} times, retry the same 
location",
+                "CreatePartitionReader failed {}/{} times for location {}, 
retry the same location",
                 fetchChunkRetryCnt,
                 fetchChunkMaxRetry,
+                location,
                 e);
             Uninterruptibles.sleepUninterruptibly(retryWaitMs, 
TimeUnit.MILLISECONDS);
           }
         }
       }
-      throw new CelebornIOException("createPartitionReader failed!");
+      throw new CelebornIOException("createPartitionReader failed! " + 
location);
     }
 
     private ByteBuf getNextChunk() throws IOException {
@@ -267,13 +269,18 @@ public abstract class RssInputStream extends InputStream {
           if (fetchChunkRetryCnt == fetchChunkMaxRetry) {
             logger.warn("Fetch chunk fail exceeds max retry {}", 
fetchChunkRetryCnt, e);
             throw new CelebornIOException(
-                "Fetch chunk failed for " + fetchChunkRetryCnt + " times", e);
+                "Fetch chunk failed for "
+                    + fetchChunkRetryCnt
+                    + " times for location "
+                    + currentReader.getLocation(),
+                e);
           } else {
             if (currentReader.getLocation().getPeer() != null) {
               logger.warn(
-                  "Fetch chunk failed {}/{} times, change to peer",
+                  "Fetch chunk failed {}/{} times for location {}, change to 
peer",
                   fetchChunkRetryCnt,
                   fetchChunkMaxRetry,
+                  currentReader.getLocation(),
                   e);
               // fetchChunkRetryCnt % 2 == 0 means both replicas have been 
tried,
               // so sleep before next try.
@@ -283,14 +290,18 @@ public abstract class RssInputStream extends InputStream {
               currentReader = 
createReaderWithRetry(currentReader.getLocation().getPeer());
             } else {
               logger.warn(
-                  "Fetch chunk failed {}/{} times", fetchChunkRetryCnt, 
fetchChunkMaxRetry, e);
+                  "Fetch chunk failed {}/{} times for location {}",
+                  fetchChunkRetryCnt,
+                  fetchChunkMaxRetry,
+                  currentReader.getLocation(),
+                  e);
               Uninterruptibles.sleepUninterruptibly(retryWaitMs, 
TimeUnit.MILLISECONDS);
               currentReader = 
createReaderWithRetry(currentReader.getLocation());
             }
           }
         }
       }
-      throw new CelebornIOException("Fetch chunk failed!");
+      throw new CelebornIOException("Fetch chunk failed! " + 
currentReader.getLocation());
     }
 
     private PartitionReader createReader(

Reply via email to