This is an automated email from the ASF dual-hosted git repository.

pratik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 82bf63c737 Remove default use of outputSegmentMaxSize in 
UpsertCompactMerge task (#14766)
82bf63c737 is described below

commit 82bf63c7371e328fc5d5cd00c34e7351ebe6888d
Author: Pratik Tibrewal <[email protected]>
AuthorDate: Tue Jan 7 21:10:46 2025 +0530

    Remove default use of outputSegmentMaxSize in UpsertCompactMerge task 
(#14766)
---
 .../apache/pinot/core/common/MinionConstants.java  |  5 -----
 .../UpsertCompactMergeTaskGenerator.java           | 23 +++++++++++++++-------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java 
b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java
index 24db1f9ede..9fef661075 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java
@@ -287,11 +287,6 @@ public class MinionConstants {
      */
     public static final String OUTPUT_SEGMENT_MAX_SIZE_KEY = 
"outputSegmentMaxSize";
 
-    /**
-     * default output segment size
-     */
-    public static final String DEFAULT_OUTPUT_SEGMENT_MAX_SIZE = "200MB";
-
     /**
      * default maximum number of segments to process in a single task
      */
diff --git 
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/upsertcompactmerge/UpsertCompactMergeTaskGenerator.java
 
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/upsertcompactmerge/UpsertCompactMergeTaskGenerator.java
index dd7bf28353..b15bff8698 100644
--- 
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/upsertcompactmerge/UpsertCompactMergeTaskGenerator.java
+++ 
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/upsertcompactmerge/UpsertCompactMergeTaskGenerator.java
@@ -182,7 +182,8 @@ public class UpsertCompactMergeTaskGenerator extends 
BaseTaskGenerator {
       Set<String> alreadyMergedSegments = 
getAlreadyMergedSegments(allSegments);
 
       SegmentSelectionResult segmentSelectionResult =
-          processValidDocIdsMetadata(taskConfigs, candidateSegmentsMap, 
validDocIdsMetadataList, alreadyMergedSegments);
+          processValidDocIdsMetadata(tableNameWithType, taskConfigs, 
candidateSegmentsMap, validDocIdsMetadataList,
+              alreadyMergedSegments);
 
       if (!segmentSelectionResult.getSegmentsForDeletion().isEmpty()) {
         pinotHelixResourceManager.deleteSegments(tableNameWithType, 
segmentSelectionResult.getSegmentsForDeletion(),
@@ -229,8 +230,8 @@ public class UpsertCompactMergeTaskGenerator extends 
BaseTaskGenerator {
   }
 
   @VisibleForTesting
-  public static SegmentSelectionResult processValidDocIdsMetadata(Map<String, 
String> taskConfigs,
-      Map<String, SegmentZKMetadata> candidateSegmentsMap,
+  public static SegmentSelectionResult processValidDocIdsMetadata(String 
tableNameWithType,
+      Map<String, String> taskConfigs, Map<String, SegmentZKMetadata> 
candidateSegmentsMap,
       Map<String, List<ValidDocIdsMetadataInfo>> validDocIdsMetadataInfoMap, 
Set<String> alreadyMergedSegments) {
     Map<Integer, List<SegmentMergerMetadata>> segmentsEligibleForCompactMerge 
= new HashMap<>();
     Set<String> segmentsForDeletion = new HashSet<>();
@@ -245,14 +246,22 @@ public class UpsertCompactMergeTaskGenerator extends 
BaseTaskGenerator {
     long maxNumSegments = Long.parseLong(
         
taskConfigs.getOrDefault(MinionConstants.UpsertCompactMergeTask.MAX_NUM_SEGMENTS_PER_TASK_KEY,
             
String.valueOf(MinionConstants.UpsertCompactMergeTask.DEFAULT_MAX_NUM_SEGMENTS_PER_TASK)));
+
     // default to Long.MAX_VALUE to avoid size-based compaction by default
     long outputSegmentMaxSizeInBytes = Long.MAX_VALUE;
     try {
-      outputSegmentMaxSizeInBytes = DataSizeUtils.toBytes(
-          
taskConfigs.getOrDefault(MinionConstants.UpsertCompactMergeTask.OUTPUT_SEGMENT_MAX_SIZE_KEY,
-              
MinionConstants.UpsertCompactMergeTask.DEFAULT_OUTPUT_SEGMENT_MAX_SIZE));
+      if 
(taskConfigs.containsKey(MinionConstants.UpsertCompactMergeTask.OUTPUT_SEGMENT_MAX_SIZE_KEY))
 {
+        String configuredOutputSegmentMaxSize =
+            
taskConfigs.get(MinionConstants.UpsertCompactMergeTask.OUTPUT_SEGMENT_MAX_SIZE_KEY);
+        LOGGER.info("Configured outputSegmentMaxSizeInByte: {} for {}", 
configuredOutputSegmentMaxSize,
+            tableNameWithType);
+        outputSegmentMaxSizeInBytes = 
DataSizeUtils.toBytes(configuredOutputSegmentMaxSize);
+      } else {
+        LOGGER.info("No configured outputSegmentMaxSizeInByte for {}, 
defaulting to Long.MAX_VALUE", tableNameWithType);
+      }
     } catch (Exception e) {
-      LOGGER.warn("Invalid value for outputSegmentMaxSizeInBytes, defaulting 
to Long.MAX_VALUE", e);
+      LOGGER.warn("Invalid value outputSegmentMaxSizeInBytes configured for 
{}, defaulting to Long.MAX_VALUE",
+          tableNameWithType, e);
     }
 
     for (String segmentName : validDocIdsMetadataInfoMap.keySet()) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to