This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 4f56e3852b92 [SPARK-45785][CORE] Support `spark.deploy.appNumberModulo` to rotate app number 4f56e3852b92 is described below commit 4f56e3852b9275a0097384305e3966eda49c045d Author: Dongjoon Hyun <dh...@apple.com> AuthorDate: Fri Nov 3 22:21:36 2023 -0700 [SPARK-45785][CORE] Support `spark.deploy.appNumberModulo` to rotate app number ### What changes were proposed in this pull request? This PR aims to support to rotate app number by introducing a new configuration, `spark.deploy.appNumberModulo`. ### Why are the changes needed? Historically, Apache Spark's App ID has a style, `app-yyyyMMddHHmmss-1234`. Since the 3rd part, `1234`, is a simple sequentially incremented number without any rotation, the generated IDs are like the following. ``` app-yyyyMMddHHmmss-0000 app-yyyyMMddHHmmss-0001 ... app-yyyyMMddHHmmss-9999 app-yyyyMMddHHmmss-10000 ``` If we support rotation by modulo 10000, it will keep 4 digits. ``` app-yyyyMMddHHmmss-0000 app-yyyyMMddHHmmss-0001 ... app-yyyyMMddHHmmss-9999 app-yyyyMMddHHmmss-0000 ``` Please note that the second part changes every seconds. In general, modulo by 10000 is enough to generate unique AppIDs. The following is an example to use modulo 1000. You can tune further by using `spark.deploy.appIdPattern` configuration. ``` $ SPARK_MASTER_OPTS="-Dspark.deploy.appNumberModulo=1000 -Dspark.master.rest.enabled=true" sbin/start-master.sh ``` <img width="220" alt="Screenshot 2023-11-03 at 5 56 17 PM" src="https://github.com/apache/spark/assets/9700541/ad1f14c2-49ff-4fa7-b702-923b94d54e29"> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs with newly added test case. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43654 from dongjoon-hyun/SPARK-45785. Authored-by: Dongjoon Hyun <dh...@apple.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../src/main/scala/org/apache/spark/deploy/master/Master.scala | 4 ++++ .../main/scala/org/apache/spark/internal/config/Deploy.scala | 10 ++++++++++ .../scala/org/apache/spark/deploy/master/MasterSuite.scala | 9 +++++++++ 3 files changed, 23 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index d5de1366ac05..63d981c5fde8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -79,6 +79,7 @@ private[deploy] class Master( private val addressToApp = new HashMap[RpcAddress, ApplicationInfo] private val completedApps = new ArrayBuffer[ApplicationInfo] private var nextAppNumber = 0 + private val moduloAppNumber = conf.get(APP_NUMBER_MODULO).getOrElse(0) private val drivers = new HashSet[DriverInfo] private val completedDrivers = new ArrayBuffer[DriverInfo] @@ -1156,6 +1157,9 @@ private[deploy] class Master( private def newApplicationId(submitDate: Date): String = { val appId = appIdPattern.format(createDateFormat.format(submitDate), nextAppNumber) nextAppNumber += 1 + if (moduloAppNumber > 0) { + nextAppNumber %= moduloAppNumber + } appId } diff --git a/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala b/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala index c6ccf9550bc9..906ec0fc9973 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala @@ -82,6 +82,16 @@ private[spark] object Deploy { .checkValue(_ > 0, "The maximum number of running drivers should be positive.") .createWithDefault(Int.MaxValue) + val APP_NUMBER_MODULO = ConfigBuilder("spark.deploy.appNumberModulo") + .doc("The modulo for app number. By default, the next of `app-yyyyMMddHHmmss-9999` is " + + "`app-yyyyMMddHHmmss-10000`. If we have 10000 as modulo, it will be " + + "`app-yyyyMMddHHmmss-0000`. In most cases, the prefix `app-yyyyMMddHHmmss` is increased " + + "already during creating 10000 applications.") + .version("4.0.0") + .intConf + .checkValue(_ >= 1000, "The modulo for app number should be greater than or equal to 1000.") + .createOptional + val DRIVER_ID_PATTERN = ConfigBuilder("spark.deploy.driverIdPattern") .doc("The pattern for driver ID generation based on Java `String.format` method. " + "The default value is `driver-%s-%04d` which represents the existing driver id string " + diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala index e8615cdbdd55..4f8457f930e4 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala @@ -1266,6 +1266,15 @@ class MasterSuite extends SparkFunSuite }.getMessage assert(m.contains("Whitespace is not allowed")) } + + test("SPARK-45785: Rotate app num with modulo operation") { + val conf = new SparkConf().set(APP_ID_PATTERN, "%2$d").set(APP_NUMBER_MODULO, 1000) + val master = makeMaster(conf) + val submitDate = new Date() + (0 to 2000).foreach { i => + assert(master.invokePrivate(_newApplicationId(submitDate)) === s"${i % 1000}") + } + } } private class FakeRecoveryModeFactory(conf: SparkConf, ser: serializer.Serializer) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org