This is an automated email from the ASF dual-hosted git repository.
pratik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 82bf63c737 Remove default use of outputSegmentMaxSize in
UpsertCompactMerge task (#14766)
82bf63c737 is described below
commit 82bf63c7371e328fc5d5cd00c34e7351ebe6888d
Author: Pratik Tibrewal <[email protected]>
AuthorDate: Tue Jan 7 21:10:46 2025 +0530
Remove default use of outputSegmentMaxSize in UpsertCompactMerge task
(#14766)
---
.../apache/pinot/core/common/MinionConstants.java | 5 -----
.../UpsertCompactMergeTaskGenerator.java | 23 +++++++++++++++-------
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java
b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java
index 24db1f9ede..9fef661075 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java
@@ -287,11 +287,6 @@ public class MinionConstants {
*/
public static final String OUTPUT_SEGMENT_MAX_SIZE_KEY =
"outputSegmentMaxSize";
- /**
- * default output segment size
- */
- public static final String DEFAULT_OUTPUT_SEGMENT_MAX_SIZE = "200MB";
-
/**
* default maximum number of segments to process in a single task
*/
diff --git
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/upsertcompactmerge/UpsertCompactMergeTaskGenerator.java
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/upsertcompactmerge/UpsertCompactMergeTaskGenerator.java
index dd7bf28353..b15bff8698 100644
---
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/upsertcompactmerge/UpsertCompactMergeTaskGenerator.java
+++
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/upsertcompactmerge/UpsertCompactMergeTaskGenerator.java
@@ -182,7 +182,8 @@ public class UpsertCompactMergeTaskGenerator extends
BaseTaskGenerator {
Set<String> alreadyMergedSegments =
getAlreadyMergedSegments(allSegments);
SegmentSelectionResult segmentSelectionResult =
- processValidDocIdsMetadata(taskConfigs, candidateSegmentsMap,
validDocIdsMetadataList, alreadyMergedSegments);
+ processValidDocIdsMetadata(tableNameWithType, taskConfigs,
candidateSegmentsMap, validDocIdsMetadataList,
+ alreadyMergedSegments);
if (!segmentSelectionResult.getSegmentsForDeletion().isEmpty()) {
pinotHelixResourceManager.deleteSegments(tableNameWithType,
segmentSelectionResult.getSegmentsForDeletion(),
@@ -229,8 +230,8 @@ public class UpsertCompactMergeTaskGenerator extends
BaseTaskGenerator {
}
@VisibleForTesting
- public static SegmentSelectionResult processValidDocIdsMetadata(Map<String,
String> taskConfigs,
- Map<String, SegmentZKMetadata> candidateSegmentsMap,
+ public static SegmentSelectionResult processValidDocIdsMetadata(String
tableNameWithType,
+ Map<String, String> taskConfigs, Map<String, SegmentZKMetadata>
candidateSegmentsMap,
Map<String, List<ValidDocIdsMetadataInfo>> validDocIdsMetadataInfoMap,
Set<String> alreadyMergedSegments) {
Map<Integer, List<SegmentMergerMetadata>> segmentsEligibleForCompactMerge
= new HashMap<>();
Set<String> segmentsForDeletion = new HashSet<>();
@@ -245,14 +246,22 @@ public class UpsertCompactMergeTaskGenerator extends
BaseTaskGenerator {
long maxNumSegments = Long.parseLong(
taskConfigs.getOrDefault(MinionConstants.UpsertCompactMergeTask.MAX_NUM_SEGMENTS_PER_TASK_KEY,
String.valueOf(MinionConstants.UpsertCompactMergeTask.DEFAULT_MAX_NUM_SEGMENTS_PER_TASK)));
+
// default to Long.MAX_VALUE to avoid size-based compaction by default
long outputSegmentMaxSizeInBytes = Long.MAX_VALUE;
try {
- outputSegmentMaxSizeInBytes = DataSizeUtils.toBytes(
-
taskConfigs.getOrDefault(MinionConstants.UpsertCompactMergeTask.OUTPUT_SEGMENT_MAX_SIZE_KEY,
-
MinionConstants.UpsertCompactMergeTask.DEFAULT_OUTPUT_SEGMENT_MAX_SIZE));
+ if
(taskConfigs.containsKey(MinionConstants.UpsertCompactMergeTask.OUTPUT_SEGMENT_MAX_SIZE_KEY))
{
+ String configuredOutputSegmentMaxSize =
+
taskConfigs.get(MinionConstants.UpsertCompactMergeTask.OUTPUT_SEGMENT_MAX_SIZE_KEY);
+ LOGGER.info("Configured outputSegmentMaxSizeInByte: {} for {}",
configuredOutputSegmentMaxSize,
+ tableNameWithType);
+ outputSegmentMaxSizeInBytes =
DataSizeUtils.toBytes(configuredOutputSegmentMaxSize);
+ } else {
+ LOGGER.info("No configured outputSegmentMaxSizeInByte for {},
defaulting to Long.MAX_VALUE", tableNameWithType);
+ }
} catch (Exception e) {
- LOGGER.warn("Invalid value for outputSegmentMaxSizeInBytes, defaulting
to Long.MAX_VALUE", e);
+ LOGGER.warn("Invalid value outputSegmentMaxSizeInBytes configured for
{}, defaulting to Long.MAX_VALUE",
+ tableNameWithType, e);
}
for (String segmentName : validDocIdsMetadataInfoMap.keySet()) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]