morningman commented on code in PR #58759:
URL: https://github.com/apache/doris/pull/58759#discussion_r2697744167
##########
fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java:
##########
@@ -633,4 +633,58 @@ protected long getRealFileSplitSize(long blockSize) {
}
return realSplitSize;
}
+
+ /**
+ * Estimate the total number of splits based on file sizes and split size,
+ * and adjust the split size if the estimated total exceeds the limit.
+ *
+ * @param fileSizes list of file sizes in bytes
+ * @param baseSplitSize the base split size to use (from
getRealFileSplitSize)
+ * @return the adjusted split size that ensures total split count doesn't
exceed maxFileSplitsNum
+ */
+ protected long adjustSplitSizeForTotalLimit(List<Long> fileSizes, long
baseSplitSize) {
+ int maxFileSplitsNum = sessionVariable.getMaxFileSplitsNum();
+ if (maxFileSplitsNum <= 0 || fileSizes.isEmpty()) {
+ return baseSplitSize;
+ }
+
+ // Estimate total split count with current split size
+ long estimatedTotalSplits = 0;
+ for (long fileSize : fileSizes) {
+ if (fileSize > 0) {
+ // Estimate splits for this file: ceil(fileSize / splitSize)
+ long splitsForFile = (fileSize + baseSplitSize - 1) /
baseSplitSize;
+ estimatedTotalSplits += splitsForFile;
+ }
+ }
+
+ // If estimated total is within limit, use the base split size
+ if (estimatedTotalSplits <= maxFileSplitsNum) {
+ return baseSplitSize;
+ }
+
+ // Calculate total file size
+ long totalFileSize = 0;
+ for (long fileSize : fileSizes) {
Review Comment:
You iterate `fileSizes` twice.
Only need one
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]