somandal commented on code in PR #16341:
URL: https://github.com/apache/pinot/pull/16341#discussion_r2257158163
##########
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java:
##########
@@ -2552,7 +2552,7 @@ public void
repairSegmentsInErrorStateForPauselessConsumption(TableConfig tableC
}
}
- private boolean allowRepairOfErrorSegments(boolean
repairErrorSegmentsForPartialUpsertOrDedup,
+ public boolean allowRepairOfErrorSegments(boolean
repairErrorSegmentsForPartialUpsertOrDedup,
Review Comment:
done
##########
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/DefaultRebalancePreChecker.java:
##########
@@ -352,35 +351,49 @@ private RebalancePreCheckerResult
checkRebalanceConfig(RebalanceConfig rebalance
List<String> segmentsToMove =
SegmentAssignmentUtils.getSegmentsToMove(currentAssignment, targetAssignment);
int numReplicas = Integer.MAX_VALUE;
- if (rebalanceConfig.isDowntime() ||
PauselessConsumptionUtils.isPauselessEnabled(tableConfig)) {
+ String peerSegmentDownloadScheme =
tableConfig.getValidationConfig().getPeerSegmentDownloadScheme();
+ if (rebalanceConfig.isDowntime() || peerSegmentDownloadScheme != null) {
for (String segment : segmentsToMove) {
numReplicas = Math.min(targetAssignment.get(segment).size(),
numReplicas);
}
}
+ // For non-peer download enabled tables, warn if downtime is enabled but
numReplicas > 1. Should only use
+ // downtime=true for such tables if downtime is indeed acceptable whereas
for numReplicas = 1, rebalance cannot
+ // be done without downtime
if (rebalanceConfig.isDowntime()) {
if (!segmentsToMove.isEmpty() && numReplicas > 1) {
pass = false;
warnings.add("Number of replicas (" + numReplicas + ") is greater than
1, downtime is not recommended.");
}
}
- // It was revealed a risk of data loss for pauseless tables during
rebalance, when downtime=true or
- // minAvailableReplicas=0 -- If a segment is being moved and has not yet
uploaded to deep store, premature
- // deletion could cause irrecoverable data loss. This pre-check added as a
workaround to warn the potential risk.
- // TODO: Get to the root cause of the issue and revisit this pre-check.
- if (PauselessConsumptionUtils.isPauselessEnabled(tableConfig)) {
+ // Peer download enabled tables may have data loss during rebalance, when
downtime=true or minAvailableReplicas=0.
+ // The scenario plays out as follows:
+ // 1. If the newly built consuming segment is cannot be uploaded to deep
store, it may set up the download URI
Review Comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]