This is an automated email from the ASF dual-hosted git repository. nehapawar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new eb2b941a16 make it more configurable to wait for external view to converge with ideal states (#10112) eb2b941a16 is described below commit eb2b941a1637287f8b489db36e8bfe2146ef89b7 Author: Xiaobing <61892277+klsi...@users.noreply.github.com> AuthorDate: Thu Jan 12 09:50:29 2023 -0800 make it more configurable to wait for external view to converge with ideal states (#10112) added two configs to make it more configurable on how often to check if and how long to wait if external view converges with ideal states --- .../apache/pinot/controller/ControllerConf.java | 15 ++++++++ .../api/resources/PinotTableRestletResource.java | 10 +++++- .../helix/core/rebalance/TableRebalancer.java | 41 +++++++++++++--------- .../helix/core/relocation/SegmentRelocator.java | 18 ++++++++-- .../pinot/spi/utils/RebalanceConfigConstants.java | 8 +++++ .../org/apache/pinot/tools/MultiDirQuickstart.java | 2 ++ .../apache/pinot/tools/PinotTableRebalancer.java | 6 +++- .../tools/admin/command/RebalanceTableCommand.java | 13 ++++++- 8 files changed, 91 insertions(+), 22 deletions(-) diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerConf.java b/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerConf.java index 015b1e90de..825abcc783 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerConf.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerConf.java @@ -32,6 +32,7 @@ import org.apache.pinot.common.protocols.SegmentCompletionProtocol; import org.apache.pinot.spi.env.PinotConfiguration; import org.apache.pinot.spi.filesystem.LocalPinotFS; import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.RebalanceConfigConstants; import org.apache.pinot.spi.utils.TimeUtils; import static org.apache.pinot.spi.utils.CommonConstants.Controller.CONFIG_OF_CONTROLLER_METRICS_PREFIX; @@ -196,6 +197,10 @@ public class ControllerConf extends PinotConfiguration { "controller.segmentRelocator.initialDelayInSeconds"; public static final String SEGMENT_RELOCATOR_ENABLE_LOCAL_TIER_MIGRATION = "controller.segmentRelocator.enableLocalTierMigration"; + public static final String SEGMENT_RELOCATOR_EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS = + "controller.segmentRelocator.externalViewStabilizationTimeoutInMs"; + public static final String SEGMENT_RELOCATOR_EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS = + "controller.segmentRelocator.externalViewCheckIntervalInMs"; // The flag to indicate if controller periodic job will fix the missing LLC segment deep store copy. // Default value is false. @@ -912,6 +917,16 @@ public class ControllerConf extends PinotConfiguration { return getProperty(ControllerPeriodicTasksConf.SEGMENT_RELOCATOR_ENABLE_LOCAL_TIER_MIGRATION, false); } + public long getSegmentRelocatorExternalViewCheckIntervalInMs() { + return getProperty(ControllerPeriodicTasksConf.SEGMENT_RELOCATOR_EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS, + RebalanceConfigConstants.DEFAULT_EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS); + } + + public long getSegmentRelocatorExternalViewStabilizationTimeoutInMs() { + return getProperty(ControllerPeriodicTasksConf.SEGMENT_RELOCATOR_EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS, + RebalanceConfigConstants.DEFAULT_EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS); + } + public long getPeriodicTaskInitialDelayInSeconds() { return ControllerPeriodicTasksConf.getRandomInitialDelayInSeconds(); } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTableRestletResource.java b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTableRestletResource.java index 54f9b57250..0ec2fae86a 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTableRestletResource.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTableRestletResource.java @@ -641,7 +641,11 @@ public class PinotTableRestletResource { + "number of replicas allowed to be unavailable if value is negative") @DefaultValue("1") @QueryParam("minAvailableReplicas") int minAvailableReplicas, @ApiParam( value = "Whether to use best-efforts to rebalance (not fail the rebalance when the no-downtime contract cannot " - + "be achieved)") @DefaultValue("false") @QueryParam("bestEfforts") boolean bestEfforts) { + + "be achieved)") @DefaultValue("false") @QueryParam("bestEfforts") boolean bestEfforts, @ApiParam( + value = "How often to check if external view converges with ideal states") @DefaultValue("1000") + @QueryParam("externalViewCheckIntervalInMs") long externalViewCheckIntervalInMs, + @ApiParam(value = "How long to wait till external view converges with ideal states") @DefaultValue("3600000") + @QueryParam("externalViewStabilizationTimeoutInMs") long externalViewStabilizationTimeoutInMs) { String tableNameWithType = constructTableNameWithType(tableName, tableTypeStr); @@ -653,6 +657,10 @@ public class PinotTableRestletResource { rebalanceConfig.addProperty(RebalanceConfigConstants.DOWNTIME, downtime); rebalanceConfig.addProperty(RebalanceConfigConstants.MIN_REPLICAS_TO_KEEP_UP_FOR_NO_DOWNTIME, minAvailableReplicas); rebalanceConfig.addProperty(RebalanceConfigConstants.BEST_EFFORTS, bestEfforts); + rebalanceConfig.addProperty(RebalanceConfigConstants.EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS, + externalViewCheckIntervalInMs); + rebalanceConfig.addProperty(RebalanceConfigConstants.EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS, + externalViewStabilizationTimeoutInMs); try { if (dryRun || downtime) { diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java index 4808b53bcc..4a1bc13b52 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java @@ -113,11 +113,6 @@ import org.slf4j.LoggerFactory; */ public class TableRebalancer { private static final Logger LOGGER = LoggerFactory.getLogger(TableRebalancer.class); - - // TODO: make them configurable - private static final long EXTERNAL_VIEW_CHECK_INTERVAL_MS = 1_000L; // 1 second - private static final long EXTERNAL_VIEW_STABILIZATION_MAX_WAIT_MS = 60 * 60_000L; // 1 hour - private final HelixManager _helixManager; private final HelixDataAccessor _helixDataAccessor; @@ -148,11 +143,19 @@ public class TableRebalancer { tableConfig.getRoutingConfig().getInstanceSelectorType()); boolean bestEfforts = rebalanceConfig.getBoolean(RebalanceConfigConstants.BEST_EFFORTS, RebalanceConfigConstants.DEFAULT_BEST_EFFORTS); + long externalViewCheckIntervalInMs = + rebalanceConfig.getLong(RebalanceConfigConstants.EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS, + RebalanceConfigConstants.DEFAULT_EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS); + long externalViewStabilizationTimeoutInMs = + rebalanceConfig.getLong(RebalanceConfigConstants.EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS, + RebalanceConfigConstants.DEFAULT_EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS); LOGGER.info( "Start rebalancing table: {} with dryRun: {}, reassignInstances: {}, includeConsuming: {}, bootstrap: {}, " - + "downtime: {}, minReplicasToKeepUpForNoDowntime: {}, enableStrictReplicaGroup: {}, bestEfforts: {}", + + "downtime: {}, minReplicasToKeepUpForNoDowntime: {}, enableStrictReplicaGroup: {}, bestEfforts: {}, " + + "externalViewCheckIntervalInMs: {}, externalViewStabilizationTimeoutInMs: {}", tableNameWithType, dryRun, reassignInstances, includeConsuming, bootstrap, downtime, - minReplicasToKeepUpForNoDowntime, enableStrictReplicaGroup, bestEfforts); + minReplicasToKeepUpForNoDowntime, enableStrictReplicaGroup, bestEfforts, externalViewCheckIntervalInMs, + externalViewStabilizationTimeoutInMs); // Validate table config try { @@ -301,15 +304,19 @@ public class TableRebalancer { minAvailableReplicas = Math.max(numReplicas + minReplicasToKeepUpForNoDowntime, 0); } - LOGGER.info("Rebalancing table: {} with minAvailableReplicas: {}, enableStrictReplicaGroup: {}, bestEfforts: {}", - tableNameWithType, minAvailableReplicas, enableStrictReplicaGroup, bestEfforts); + LOGGER.info("Rebalancing table: {} with minAvailableReplicas: {}, enableStrictReplicaGroup: {}, bestEfforts: {}, " + + "externalViewCheckIntervalInMs: {}, externalViewStabilizationTimeoutInMs: {}", tableNameWithType, + minAvailableReplicas, enableStrictReplicaGroup, bestEfforts, externalViewCheckIntervalInMs, + externalViewStabilizationTimeoutInMs); int expectedVersion = currentIdealState.getRecord().getVersion(); while (true) { // Wait for ExternalView to converge before updating the next IdealState Set<String> segmentsToMove = SegmentAssignmentUtils.getSegmentsToMove(currentAssignment, targetAssignment); IdealState idealState; try { - idealState = waitForExternalViewToConverge(tableNameWithType, bestEfforts, segmentsToMove); + idealState = + waitForExternalViewToConverge(tableNameWithType, bestEfforts, segmentsToMove, externalViewCheckIntervalInMs, + externalViewStabilizationTimeoutInMs); } catch (Exception e) { LOGGER.warn("Caught exception while waiting for ExternalView to converge for table: {}, aborting the rebalance", tableNameWithType, e); @@ -544,12 +551,13 @@ public class TableRebalancer { } private IdealState waitForExternalViewToConverge(String tableNameWithType, boolean bestEfforts, - Set<String> segmentsToMonitor) + Set<String> segmentsToMonitor, long externalViewCheckIntervalInMs, long externalViewStabilizationTimeoutInMs) throws InterruptedException, TimeoutException { - long endTimeMs = System.currentTimeMillis() + EXTERNAL_VIEW_STABILIZATION_MAX_WAIT_MS; + long endTimeMs = System.currentTimeMillis() + externalViewStabilizationTimeoutInMs; IdealState idealState; do { + LOGGER.debug("Start to check if ExternalView converges to IdealStates"); idealState = _helixDataAccessor.getProperty(_helixDataAccessor.keyBuilder().idealStates(tableNameWithType)); // IdealState might be null if table got deleted, throwing exception to abort the rebalance Preconditions.checkState(idealState != null, "Failed to find the IdealState"); @@ -564,16 +572,17 @@ public class TableRebalancer { return idealState; } } - - Thread.sleep(EXTERNAL_VIEW_CHECK_INTERVAL_MS); + LOGGER.debug("ExternalView has not converged to IdealStates. Retry after: {}ms", externalViewCheckIntervalInMs); + Thread.sleep(externalViewCheckIntervalInMs); } while (System.currentTimeMillis() < endTimeMs); if (bestEfforts) { LOGGER.warn("ExternalView has not converged within: {}ms for table: {}, continuing the rebalance (best-efforts)", - EXTERNAL_VIEW_STABILIZATION_MAX_WAIT_MS, tableNameWithType); + externalViewStabilizationTimeoutInMs, tableNameWithType); return idealState; } else { - throw new TimeoutException("Timeout while waiting for ExternalView to converge"); + throw new TimeoutException(String.format("ExternalView has not converged within: %d ms for table: %s", + externalViewStabilizationTimeoutInMs, tableNameWithType)); } } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/relocation/SegmentRelocator.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/relocation/SegmentRelocator.java index cf8b878b52..4f0366e5d6 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/relocation/SegmentRelocator.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/relocation/SegmentRelocator.java @@ -71,7 +71,9 @@ public class SegmentRelocator extends ControllerPeriodicTask<Void> { private final ExecutorService _executorService; private final HttpConnectionManager _connectionManager; private final boolean _enableLocalTierMigration; - private final int _timeoutMs; + private final int _serverAdminRequestTimeoutMs; + private final long _externalViewCheckIntervalInMs; + private final long _externalViewStabilizationTimeoutInMs; public SegmentRelocator(PinotHelixResourceManager pinotHelixResourceManager, LeadControllerManager leadControllerManager, ControllerConf config, ControllerMetrics controllerMetrics, @@ -82,7 +84,13 @@ public class SegmentRelocator extends ControllerPeriodicTask<Void> { _executorService = executorService; _connectionManager = connectionManager; _enableLocalTierMigration = config.enableSegmentRelocatorLocalTierMigration(); - _timeoutMs = config.getServerAdminRequestTimeoutSeconds() * 1000; + _serverAdminRequestTimeoutMs = config.getServerAdminRequestTimeoutSeconds() * 1000; + long taskIntervalInMs = config.getSegmentRelocatorFrequencyInSeconds() * 1000L; + // Best effort to let inner part of the task run no longer than the task interval, although not enforced strictly. + _externalViewCheckIntervalInMs = + Math.min(taskIntervalInMs, config.getSegmentRelocatorExternalViewCheckIntervalInMs()); + _externalViewStabilizationTimeoutInMs = + Math.min(taskIntervalInMs, config.getSegmentRelocatorExternalViewStabilizationTimeoutInMs()); } @Override @@ -118,6 +126,10 @@ public class SegmentRelocator extends ControllerPeriodicTask<Void> { // Allow at most one replica unavailable during relocation Configuration rebalanceConfig = new BaseConfiguration(); rebalanceConfig.addProperty(RebalanceConfigConstants.MIN_REPLICAS_TO_KEEP_UP_FOR_NO_DOWNTIME, -1); + rebalanceConfig.addProperty(RebalanceConfigConstants.EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS, + _externalViewCheckIntervalInMs); + rebalanceConfig.addProperty(RebalanceConfigConstants.EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS, + _externalViewStabilizationTimeoutInMs); // Run rebalance asynchronously _executorService.submit(() -> { try { @@ -188,7 +200,7 @@ public class SegmentRelocator extends ControllerPeriodicTask<Void> { try { TableTierReader.TableTierDetails tableTiers = new TableTierReader(_executorService, _connectionManager, _pinotHelixResourceManager).getTableTierDetails( - tableNameWithType, null, _timeoutMs, true); + tableNameWithType, null, _serverAdminRequestTimeoutMs, true); triggerLocalTierMigration(tableNameWithType, tableTiers, _pinotHelixResourceManager.getHelixZkManager().getMessagingService()); LOGGER.info("Migrated segments of table: {} to new tiers on hosting servers", tableNameWithType); diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/RebalanceConfigConstants.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/RebalanceConfigConstants.java index c6fbadcde5..cf4290b7d5 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/RebalanceConfigConstants.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/RebalanceConfigConstants.java @@ -57,4 +57,12 @@ public class RebalanceConfigConstants { // - ExternalView has not converged within the maximum wait time -> continue to the next stage public static final String BEST_EFFORTS = "bestEfforts"; public static final boolean DEFAULT_BEST_EFFORTS = false; + + // The check on external view can be very costly when the table has very large ideal and external states, i.e. when + // having a huge number of segments. These two configs help reduce the cpu load on controllers, e.g. by doing the + // check less frequently and bail out sooner to rebalance at best effort if configured so. + public static final String EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS = "externalViewCheckIntervalInMs"; + public static final long DEFAULT_EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS = 1_000L; // 1 second + public static final String EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS = "externalViewStabilizationTimeoutInMs"; + public static final long DEFAULT_EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS = 60 * 60_000L; // 1 hour } diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/MultiDirQuickstart.java b/pinot-tools/src/main/java/org/apache/pinot/tools/MultiDirQuickstart.java index dc2ef5c166..cf63203368 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/MultiDirQuickstart.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/MultiDirQuickstart.java @@ -43,6 +43,8 @@ public class MultiDirQuickstart extends Quickstart { properties.put("controller.segment.relocator.frequencyPeriod", "60s"); properties.put("controller.segmentRelocator.initialDelayInSeconds", "10"); properties.put("controller.segmentRelocator.enableLocalTierMigration", "true"); + properties.put("controller.segmentRelocator.externalViewCheckIntervalInMs", "2000"); + properties.put("controller.segmentRelocator.externalViewStabilizationTimeoutInMs", "60000"); /* * One can also set `dataDir` as part of tierConfigs in TableConfig to overwrite the instance configs (or set as * cluster configs), but it's recommended to use instance (or cluster) configs for consistency across tables. diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/PinotTableRebalancer.java b/pinot-tools/src/main/java/org/apache/pinot/tools/PinotTableRebalancer.java index 365bd0a8bb..7e4006f9ea 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/PinotTableRebalancer.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/PinotTableRebalancer.java @@ -36,7 +36,7 @@ public class PinotTableRebalancer extends PinotZKChanger { public PinotTableRebalancer(String zkAddress, String clusterName, boolean dryRun, boolean reassignInstances, boolean includeConsuming, boolean bootstrap, boolean downtime, int minReplicasToKeepUpForNoDowntime, - boolean bestEffort) { + boolean bestEffort, long externalViewCheckIntervalInMs, long externalViewStabilizationTimeoutInMs) { super(zkAddress, clusterName); _rebalanceConfig.addProperty(RebalanceConfigConstants.DRY_RUN, dryRun); _rebalanceConfig.addProperty(RebalanceConfigConstants.REASSIGN_INSTANCES, reassignInstances); @@ -46,6 +46,10 @@ public class PinotTableRebalancer extends PinotZKChanger { _rebalanceConfig.addProperty(RebalanceConfigConstants.MIN_REPLICAS_TO_KEEP_UP_FOR_NO_DOWNTIME, minReplicasToKeepUpForNoDowntime); _rebalanceConfig.addProperty(RebalanceConfigConstants.BEST_EFFORTS, bestEffort); + _rebalanceConfig.addProperty(RebalanceConfigConstants.EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS, + externalViewCheckIntervalInMs); + _rebalanceConfig.addProperty(RebalanceConfigConstants.EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS, + externalViewStabilizationTimeoutInMs); } public RebalanceResult rebalance(String tableNameWithType) { diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RebalanceTableCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RebalanceTableCommand.java index 6f15c8632b..258c66cea2 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RebalanceTableCommand.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RebalanceTableCommand.java @@ -20,6 +20,7 @@ package org.apache.pinot.tools.admin.command; import org.apache.pinot.controller.helix.core.rebalance.RebalanceResult; import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.spi.utils.RebalanceConfigConstants; import org.apache.pinot.tools.Command; import org.apache.pinot.tools.PinotTableRebalancer; import org.slf4j.Logger; @@ -77,6 +78,15 @@ public class RebalanceTableCommand extends AbstractBaseAdminCommand implements C + " cannot be achieved, false by default)") private boolean _bestEfforts = false; + @CommandLine.Option(names = {"-externalViewCheckIntervalInMs"}, + description = "How often to check if external view converges with ideal view") + private long _externalViewCheckIntervalInMs = RebalanceConfigConstants.DEFAULT_EXTERNAL_VIEW_CHECK_INTERVAL_IN_MS; + + @CommandLine.Option(names = {"-externalViewStabilizationTimeoutInMs"}, + description = "How long to wait till external view converges with ideal view") + private long _externalViewStabilizationTimeoutInMs = + RebalanceConfigConstants.DEFAULT_EXTERNAL_VIEW_STABILIZATION_TIMEOUT_IN_MS; + @CommandLine.Option(names = {"-help", "-h", "--h", "--help"}, help = true, description = "Print this message") private boolean _help = false; @@ -94,7 +104,8 @@ public class RebalanceTableCommand extends AbstractBaseAdminCommand implements C throws Exception { PinotTableRebalancer tableRebalancer = new PinotTableRebalancer(_zkAddress, _clusterName, _dryRun, _reassignInstances, _includeConsuming, _bootstrap, - _downtime, _minAvailableReplicas, _bestEfforts); + _downtime, _minAvailableReplicas, _bestEfforts, _externalViewCheckIntervalInMs, + _externalViewStabilizationTimeoutInMs); RebalanceResult rebalanceResult = tableRebalancer.rebalance(_tableNameWithType); LOGGER .info("Got rebalance result: {} for table: {}", JsonUtils.objectToString(rebalanceResult), _tableNameWithType); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org