This is an automated email from the ASF dual-hosted git repository.
nicholasjiang pushed a commit to branch branch-0.6
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/branch-0.6 by this push:
new 37751ce33 [CELEBORN-2123] Add log for commit file size
37751ce33 is described below
commit 37751ce3382bf84ee5e8f04f0ee218292e0d62a9
Author: xxx <[email protected]>
AuthorDate: Mon Aug 25 17:18:32 2025 +0800
[CELEBORN-2123] Add log for commit file size
### What changes were proposed in this pull request?
Add log for commit file size.
### Why are the changes needed?
Statistics on the file size information of successfully committed files
enables offline analysis of file write sizes, assessment of the proportion of
small files, and implementation of optimizations based on file size data.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
CI.
Closes #3444 from xy2953396112/CELEBORN-2123.
Authored-by: xxx <[email protected]>
Signed-off-by: SteNicholas <[email protected]>
(cherry picked from commit 1d6299717f014584b6505a67638ac1d3c978893e)
Signed-off-by: SteNicholas <[email protected]>
---
.../service/deploy/worker/storage/PartitionDataWriter.java | 6 +++++-
.../org/apache/celeborn/service/deploy/worker/Controller.scala | 8 ++------
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git
a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionDataWriter.java
b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionDataWriter.java
index 35dfdc331..5fbb6e976 100644
---
a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionDataWriter.java
+++
b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionDataWriter.java
@@ -101,7 +101,11 @@ public class PartitionDataWriter implements DeviceObserver
{
}
public String getFilePath() {
- return getDiskFileInfo().getFilePath();
+ DiskFileInfo diskFileInfo = getDiskFileInfo();
+ if (diskFileInfo != null) {
+ return diskFileInfo.getFilePath();
+ }
+ return "";
}
public void incrementPendingWrites() {
diff --git
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Controller.scala
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Controller.scala
index 6d8802f36..cd2485b0a 100644
---
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Controller.scala
+++
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Controller.scala
@@ -334,17 +334,13 @@ private[deploy] class Controller(
waitMapPartitionRegionFinished(fileWriter,
shuffleCommitTimeout)
val bytes = fileWriter.close()
if (bytes > 0L) {
+ logDebug(
+ s"Commit file $shuffleKey $uniqueId
${fileWriter.getFilePath} success fileSize: $bytes")
if (fileWriter.getStorageInfo == null) {
// Only HDFS can be null, means that this partition
location is deleted.
logDebug(s"Location $uniqueId is deleted.")
} else {
val storageInfo = fileWriter.getStorageInfo
- val fileInfo =
- if (null != fileWriter.getDiskFileInfo) {
- fileWriter.getDiskFileInfo
- } else {
- fileWriter.getMemoryFileInfo
- }
committedStorageInfos.put(uniqueId, storageInfo)
if (fileWriter.getMapIdBitMap != null) {
committedMapIdBitMap.put(uniqueId,
fileWriter.getMapIdBitMap)