[ https://issues.apache.org/jira/browse/FLINK-9159?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16563581#comment-16563581 ]
ASF GitHub Bot commented on FLINK-9159: --------------------------------------- asfgit closed pull request #6406: [FLINK-9159][runtime] Sanity check default timeout values URL: https://github.com/apache/flink/pull/6406 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/docs/_includes/generated/mesos_configuration.html b/docs/_includes/generated/mesos_configuration.html index cd0ae2432e3..c514c86af3e 100644 --- a/docs/_includes/generated/mesos_configuration.html +++ b/docs/_includes/generated/mesos_configuration.html @@ -9,7 +9,7 @@ <tbody> <tr> <td><h5>mesos.failover-timeout</h5></td> - <td style="word-wrap: break-word;">600</td> + <td style="word-wrap: break-word;">604800</td> <td>The failover timeout in seconds for the Mesos scheduler, after which running tasks are automatically shut down.</td> </tr> <tr> diff --git a/docs/_includes/generated/resource_manager_configuration.html b/docs/_includes/generated/resource_manager_configuration.html index 1b82e51b4ef..9243fcd3cb9 100644 --- a/docs/_includes/generated/resource_manager_configuration.html +++ b/docs/_includes/generated/resource_manager_configuration.html @@ -32,5 +32,10 @@ <td style="word-wrap: break-word;">0</td> <td>Defines the network port to connect to for communication with the resource manager. By default, the port of the JobManager, because the same ActorSystem is used. Its not possible to use this configuration key to define port ranges.</td> </tr> + <tr> + <td><h5>resourcemanager.taskmanager-timeout</h5></td> + <td style="word-wrap: break-word;">30000</td> + <td>The timeout for an idle task manager to be released.</td> + </tr> </tbody> </table> diff --git a/docs/_includes/generated/slot_manager_configuration.html b/docs/_includes/generated/slot_manager_configuration.html deleted file mode 100644 index 1517a395f27..00000000000 --- a/docs/_includes/generated/slot_manager_configuration.html +++ /dev/null @@ -1,21 +0,0 @@ -<table class="table table-bordered"> - <thead> - <tr> - <th class="text-left" style="width: 20%">Key</th> - <th class="text-left" style="width: 15%">Default</th> - <th class="text-left" style="width: 65%">Description</th> - </tr> - </thead> - <tbody> - <tr> - <td><h5>slotmanager.request-timeout</h5></td> - <td style="word-wrap: break-word;">600000</td> - <td>The timeout for a slot request to be discarded.</td> - </tr> - <tr> - <td><h5>slotmanager.taskmanager-timeout</h5></td> - <td style="word-wrap: break-word;">30000</td> - <td>The timeout for an idle task manager to be released.</td> - </tr> - </tbody> -</table> diff --git a/docs/ops/config.md b/docs/ops/config.md index 1e6be19e724..fd0df0c2370 100644 --- a/docs/ops/config.md +++ b/docs/ops/config.md @@ -170,12 +170,6 @@ You have to configure `jobmanager.archive.fs.dir` in order to archive terminated {% include generated/history_server_configuration.html %} -### Slot Manager - -The configuration keys in this section are relevant for the SlotManager running in the ResourceManager - -{% include generated/slot_manager_configuration.html %} - ## Legacy - `mode`: Execution mode of Flink. Possible values are `legacy` and `new`. In order to start the legacy components, you have to specify `legacy` (DEFAULT: `new`). diff --git a/flink-core/src/main/java/org/apache/flink/configuration/JobManagerOptions.java b/flink-core/src/main/java/org/apache/flink/configuration/JobManagerOptions.java index f78ed9d367d..2bb5732cb42 100644 --- a/flink-core/src/main/java/org/apache/flink/configuration/JobManagerOptions.java +++ b/flink-core/src/main/java/org/apache/flink/configuration/JobManagerOptions.java @@ -145,11 +145,17 @@ .defaultValue(60L * 60L) .withDescription("The time in seconds after which a completed job expires and is purged from the job store."); + /** + * The timeout in milliseconds for requesting a slot from Slot Pool. + */ public static final ConfigOption<Long> SLOT_REQUEST_TIMEOUT = key("slot.request.timeout") .defaultValue(5L * 60L * 1000L) .withDescription("The timeout in milliseconds for requesting a slot from Slot Pool."); + /** + * The timeout in milliseconds for a idle slot in Slot Pool. + */ public static final ConfigOption<Long> SLOT_IDLE_TIMEOUT = key("slot.idle.timeout") // default matches heartbeat.timeout so that sticky allocation is not lost on timeouts for local recovery diff --git a/flink-core/src/main/java/org/apache/flink/configuration/ResourceManagerOptions.java b/flink-core/src/main/java/org/apache/flink/configuration/ResourceManagerOptions.java index 4ce49813bc5..5a203e3372b 100644 --- a/flink-core/src/main/java/org/apache/flink/configuration/ResourceManagerOptions.java +++ b/flink-core/src/main/java/org/apache/flink/configuration/ResourceManagerOptions.java @@ -19,16 +19,12 @@ package org.apache.flink.configuration; import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.annotation.docs.ConfigGroup; -import org.apache.flink.annotation.docs.ConfigGroups; +import org.apache.flink.configuration.description.Description; /** * The set of configuration options relating to the ResourceManager. */ @PublicEvolving -@ConfigGroups(groups = { - @ConfigGroup(name = "SlotManager", keyPrefix = "slotmanager") -}) public class ResourceManagerOptions { /** @@ -72,20 +68,35 @@ /** * The timeout for a slot request to be discarded, in milliseconds. + * @deprecated Use {@link JobManagerOptions#SLOT_REQUEST_TIMEOUT}. */ + @Deprecated public static final ConfigOption<Long> SLOT_REQUEST_TIMEOUT = ConfigOptions .key("slotmanager.request-timeout") - .defaultValue(600000L) + .defaultValue(-1L) .withDescription("The timeout for a slot request to be discarded."); /** * The timeout for an idle task manager to be released, in milliseconds. + * @deprecated Use {@link #TASK_MANAGER_TIMEOUT}. */ - public static final ConfigOption<Long> TASK_MANAGER_TIMEOUT = ConfigOptions + @Deprecated + public static final ConfigOption<Long> SLOT_MANAGER_TASK_MANAGER_TIMEOUT = ConfigOptions .key("slotmanager.taskmanager-timeout") .defaultValue(30000L) .withDescription("The timeout for an idle task manager to be released."); + /** + * The timeout for an idle task manager to be released, in milliseconds. + */ + public static final ConfigOption<Long> TASK_MANAGER_TIMEOUT = ConfigOptions + .key("resourcemanager.taskmanager-timeout") + .defaultValue(30000L) + .withDeprecatedKeys(SLOT_MANAGER_TASK_MANAGER_TIMEOUT.key()) + .withDescription(Description.builder() + .text("The timeout for an idle task manager to be released.") + .build()); + /** * Prefix for passing custom environment variables to Flink's master process. * For example for passing LD_LIBRARY_PATH as an env variable to the AppMaster, set: diff --git a/flink-docs/README.md b/flink-docs/README.md index 61624fad16d..0a574286574 100644 --- a/flink-docs/README.md +++ b/flink-docs/README.md @@ -28,7 +28,7 @@ The `RestAPIDocGenerator` can be used to generate a full reference of the REST A To integrate a new endpoint into the generator 1. Add a new `DocumentingRestEndpoint` class to `RestAPIDocGenerator` that extends the new endpoint class 2. Add another call to `createHtmlFile` in `RestAPIDocGenerator#main` -3. Regenerate the documentation by running `mvn package -Dgenerate-rest-docs -pl flink-docs -am -nsu` +3. Regenerate the documentation by running `mvn package -Dgenerate-rest-docs -pl flink-docs -am -nsu -DskipTests` 4. Integrate the generated file into the REST API documentation by adding `{% include generated/<file-name>.html %}` to the corresponding markdown file. The documentation must be regenerated whenever @@ -41,7 +41,7 @@ The `ConfigOptionsDocGenerator` can be use to generate a reference of `ConfigOpt To integrate an `*Options` class from another package, add another module-package argument pair to `ConfigOptionsDocGenerator#LOCATIONS`. -The files can be generated by running `mvn package -Dgenerate-config-docs -pl flink-docs -am -nsu`, and can be integrated into the documentation using `{% include generated/<file-name>.html %}`. +The files can be generated by running `mvn package -Dgenerate-config-docs -pl flink-docs -am -nsu -DskipTests`, and can be integrated into the documentation using `{% include generated/<file-name>.html %}`. The documentation must be regenerated whenever * an `*Options` class was added or removed diff --git a/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java b/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java index 753923fc35e..7046605f514 100644 --- a/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java +++ b/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java @@ -72,7 +72,7 @@ */ public static final ConfigOption<Integer> FAILOVER_TIMEOUT_SECONDS = key("mesos.failover-timeout") - .defaultValue(600) + .defaultValue(60 * 60 * 24 * 7) .withDescription("The failover timeout in seconds for the Mesos scheduler, after which running tasks are" + " automatically shut down."); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerConfiguration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerConfiguration.java index 2f8751a2203..3c806120587 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerConfiguration.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerConfiguration.java @@ -21,10 +21,14 @@ import org.apache.flink.api.common.time.Time; import org.apache.flink.configuration.AkkaOptions; import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.JobManagerOptions; import org.apache.flink.configuration.ResourceManagerOptions; import org.apache.flink.util.ConfigurationException; import org.apache.flink.util.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import scala.concurrent.duration.Duration; /** @@ -32,6 +36,8 @@ */ public class SlotManagerConfiguration { + private static final Logger LOGGER = LoggerFactory.getLogger(SlotManagerConfiguration.class); + private final Time taskManagerRequestTimeout; private final Time slotRequestTimeout; private final Time taskManagerTimeout; @@ -68,11 +74,24 @@ public static SlotManagerConfiguration fromConfiguration(Configuration configura "value " + AkkaOptions.ASK_TIMEOUT + '.', e); } - final Time slotRequestTimeout = Time.milliseconds( - configuration.getLong(ResourceManagerOptions.SLOT_REQUEST_TIMEOUT)); + final Time slotRequestTimeout = getSlotRequestTimeout(configuration); final Time taskManagerTimeout = Time.milliseconds( configuration.getLong(ResourceManagerOptions.TASK_MANAGER_TIMEOUT)); return new SlotManagerConfiguration(rpcTimeout, slotRequestTimeout, taskManagerTimeout); } + + private static Time getSlotRequestTimeout(final Configuration configuration) { + final long legacySlotRequestTimeoutMs = configuration.getLong(ResourceManagerOptions.SLOT_REQUEST_TIMEOUT); + final long slotRequestTimeoutMs; + if (legacySlotRequestTimeoutMs == ResourceManagerOptions.SLOT_REQUEST_TIMEOUT.defaultValue()) { + slotRequestTimeoutMs = configuration.getLong(JobManagerOptions.SLOT_REQUEST_TIMEOUT); + } else { + LOGGER.warn("Config key {} is deprecated; use {} instead.", + ResourceManagerOptions.SLOT_REQUEST_TIMEOUT, + JobManagerOptions.SLOT_REQUEST_TIMEOUT); + slotRequestTimeoutMs = legacySlotRequestTimeoutMs; + } + return Time.milliseconds(slotRequestTimeoutMs); + } } diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerConfigurationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerConfigurationTest.java new file mode 100644 index 00000000000..255ed6b4114 --- /dev/null +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerConfigurationTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.resourcemanager.slotmanager; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.JobManagerOptions; +import org.apache.flink.configuration.ResourceManagerOptions; + +import org.junit.Test; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +/** + * Tests for {@link SlotManagerConfiguration}. + */ +public class SlotManagerConfigurationTest { + + /** + * Tests that {@link SlotManagerConfiguration#getSlotRequestTimeout()} returns the value + * configured under key {@link JobManagerOptions#SLOT_REQUEST_TIMEOUT}. + */ + @Test + public void testSetSlotRequestTimeout() throws Exception { + final long slotIdleTimeout = 42; + + final Configuration configuration = new Configuration(); + configuration.setLong(JobManagerOptions.SLOT_REQUEST_TIMEOUT, slotIdleTimeout); + final SlotManagerConfiguration slotManagerConfiguration = SlotManagerConfiguration.fromConfiguration(configuration); + + assertThat(slotManagerConfiguration.getSlotRequestTimeout().toMilliseconds(), is(equalTo(slotIdleTimeout))); + } + + /** + * Tests that {@link ResourceManagerOptions#SLOT_REQUEST_TIMEOUT} is preferred over + * {@link JobManagerOptions#SLOT_REQUEST_TIMEOUT} if set. + */ + @Test + public void testPreferLegacySlotRequestTimeout() throws Exception { + final long legacySlotIdleTimeout = 42; + + final Configuration configuration = new Configuration(); + configuration.setLong(ResourceManagerOptions.SLOT_REQUEST_TIMEOUT, legacySlotIdleTimeout); + configuration.setLong(JobManagerOptions.SLOT_REQUEST_TIMEOUT, 300000L); + final SlotManagerConfiguration slotManagerConfiguration = SlotManagerConfiguration.fromConfiguration(configuration); + + assertThat(slotManagerConfiguration.getSlotRequestTimeout().toMilliseconds(), is(equalTo(legacySlotIdleTimeout))); + } +} ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Sanity check default timeout values > ----------------------------------- > > Key: FLINK-9159 > URL: https://issues.apache.org/jira/browse/FLINK-9159 > Project: Flink > Issue Type: Bug > Components: Distributed Coordination > Affects Versions: 1.5.0 > Reporter: Till Rohrmann > Assignee: Gary Yao > Priority: Blocker > Labels: flip-6, pull-request-available > Fix For: 1.5.3, 1.6.0 > > > Check that the default timeout values for resource release are sanely chosen. -- This message was sent by Atlassian JIRA (v7.6.3#76005)