This is an automated email from the ASF dual-hosted git repository. github-merge-queue[bot] pushed a commit to branch gh-readonly-queue/main/pr-5659-6becb8596df32058f69473c39f4a9028f149954e in repository https://gitbox.apache.org/repos/asf/texera.git
commit 7ae2374bead9361735f5538b6501337d3ea32c56 Author: Xinyuan Lin <[email protected]> AuthorDate: Sat Jun 13 11:52:29 2026 -0700 test(config): add unit test coverage for ConfigParserUtil (#5659) ### What changes were proposed in this PR? Pin behavior of `ConfigParserUtil.parseSizeStringToBytes` — the size-string parser used by `StorageConfig` for S3 multipart sizing. The `common/config` module had no test infrastructure before this PR (no `src/test` directory existed); this PR adds the directory and configures the standard ScalaTest dependency the way the other backend modules do. | Spec | Source class | Tests | | --- | --- | --- | | `ConfigParserUtilSpec` | `ConfigParserUtil` | 16 | Spec file name follows the `<srcClassName>Spec.scala` one-to-one convention. **Behavior pinned** | Surface | Contract | | --- | --- | | `1KB` / `1MB` / `1GB` | parses to `1024L` / `1024 * 1024L` / `1024 * 1024 * 1024L` | | Multi-digit values (`100MB`, `1024KB`, `128GB`) | scales correctly by the unit multiplier | | `5GB` | preserves `Long` precision (result exceeds `Int.MaxValue`) | | `0010KB` | parses to `10 * 1024L` (decimal-only, no octal interpretation) | | Missing unit (`100`) | throws `IllegalArgumentException` with diagnostic | | Unsupported unit (`5TB`) | throws `IllegalArgumentException` | | Empty string | throws `IllegalArgumentException` | | Lowercase unit (`5mb`) | throws `IllegalArgumentException` (regex is anchored to `[KMG]B`) | | Embedded whitespace (`5 MB`) | throws `IllegalArgumentException` | | Non-numeric value (`abcMB`) | throws `IllegalArgumentException` | | Unit-only input (`MB`) | throws `IllegalArgumentException` | | Return type | `Long` (compile-time enforced) | **Build-config change** Adds `org.scalatest %% scalatest % 3.2.15 % Test` to `common/config/build.sbt`. The version matches the other backend modules (`common/workflow-operator`, `common/dao`, `amber`). Scope is `Test` so the dependency does not leak into the production classpath. ### Any related issues, documentation, discussions? Closes #5655. ### How was this PR tested? Pure unit-test addition (plus the build-config tweak above); verified locally with: - `sbt "Config/testOnly org.apache.texera.amber.util.ConfigParserUtilSpec"` — 16 tests, all green - `sbt scalafmtCheckAll` — clean - CI to confirm ### Was this PR authored or co-authored using generative AI tooling? Generated-by: Claude Code (Opus 4.7 [1M context]) --- .github/workflows/build.yml | 4 +- common/config/build.sbt | 3 +- .../texera/amber/util/ConfigParserUtilSpec.scala | 132 +++++++++++++++++++++ 3 files changed, 136 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3c019ccabc..aa2822e187 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -270,8 +270,7 @@ jobs: # module jacoco.xml that the codecov upload step picks up. # `WorkflowExecutionService/jacoco` only runs that project's # Test config (sbt's `test` task does not transit dependsOn), - # so common modules' tests are listed explicitly here. Modules - # with no tests (Config) are skipped. + # so common modules' tests are listed explicitly here. # # AMBER_TEST_FILTER=skip-integration tells amber/build.sbt to # exclude @org.apache.texera.amber.tags.IntegrationTest specs; @@ -281,6 +280,7 @@ jobs: run: | sbt "DAO/jacoco" \ "Auth/jacoco" \ + "Config/jacoco" \ "PyBuilder/jacoco" \ "WorkflowCore/jacoco" \ "WorkflowOperator/jacoco" \ diff --git a/common/config/build.sbt b/common/config/build.sbt index bb561de4f3..f6a8aa0568 100644 --- a/common/config/build.sbt +++ b/common/config/build.sbt @@ -50,5 +50,6 @@ Compile / scalacOptions ++= Seq( // Core Dependencies libraryDependencies ++= Seq( - "com.typesafe" % "config" % "1.4.6" // For configuration management + "com.typesafe" % "config" % "1.4.6", // For configuration management + "org.scalatest" %% "scalatest" % "3.2.15" % Test // ScalaTest (for unit tests) ) \ No newline at end of file diff --git a/common/config/src/test/scala/org/apache/texera/amber/util/ConfigParserUtilSpec.scala b/common/config/src/test/scala/org/apache/texera/amber/util/ConfigParserUtilSpec.scala new file mode 100644 index 0000000000..495dae148d --- /dev/null +++ b/common/config/src/test/scala/org/apache/texera/amber/util/ConfigParserUtilSpec.scala @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.texera.amber.util + +import org.scalatest.flatspec.AnyFlatSpec + +class ConfigParserUtilSpec extends AnyFlatSpec { + + // --------------------------------------------------------------------------- + // Unit-multiplier round-trips + // --------------------------------------------------------------------------- + + "ConfigParserUtil.parseSizeStringToBytes" should "parse `1KB` to 1024L" in { + assert(ConfigParserUtil.parseSizeStringToBytes("1KB") == 1024L) + } + + it should "parse `1MB` to 1024 * 1024 bytes" in { + assert(ConfigParserUtil.parseSizeStringToBytes("1MB") == 1024L * 1024) + } + + it should "parse `1GB` to 1024 * 1024 * 1024 bytes" in { + assert(ConfigParserUtil.parseSizeStringToBytes("1GB") == 1024L * 1024 * 1024) + } + + // --------------------------------------------------------------------------- + // Multi-digit values + // --------------------------------------------------------------------------- + + it should "scale multi-digit values by the unit multiplier (100MB → 104857600 bytes)" in { + assert(ConfigParserUtil.parseSizeStringToBytes("100MB") == 100L * 1024L * 1024L) + } + + it should "preserve Long precision for large GB values (5GB)" in { + // 5 * 1024^3 = 5_368_709_120 — exceeds Int.MaxValue, so Long math is required. + val expected = 5L * 1024L * 1024L * 1024L + assert(ConfigParserUtil.parseSizeStringToBytes("5GB") == expected) + assert(expected > Int.MaxValue.toLong, "the GB result must exceed Int.MaxValue") + } + + it should "parse multi-digit KB values" in { + assert(ConfigParserUtil.parseSizeStringToBytes("1024KB") == 1024L * 1024L) + } + + it should "parse multi-digit GB values" in { + assert(ConfigParserUtil.parseSizeStringToBytes("128GB") == 128L * 1024L * 1024L * 1024L) + } + + it should "parse leading-zero values without octal interpretation (`0010KB` == 10KB)" in { + // The regex captures `\d+`; the value is parsed via `String.toLong`, + // which treats decimal-only (no octal). Pin this so a future refactor + // that switched to Integer.decode (which would octal-interpret) is + // caught. + assert(ConfigParserUtil.parseSizeStringToBytes("0010KB") == 10L * 1024L) + } + + // --------------------------------------------------------------------------- + // Malformed input — IllegalArgumentException + // --------------------------------------------------------------------------- + + it should "throw IllegalArgumentException when the unit suffix is missing" in { + val ex = intercept[IllegalArgumentException] { + ConfigParserUtil.parseSizeStringToBytes("100") + } + assert(ex.getMessage.contains("Invalid")) + } + + it should "throw IllegalArgumentException for an unsupported unit (e.g. TB)" in { + intercept[IllegalArgumentException] { + ConfigParserUtil.parseSizeStringToBytes("5TB") + } + } + + it should "throw IllegalArgumentException on the empty string" in { + intercept[IllegalArgumentException] { + ConfigParserUtil.parseSizeStringToBytes("") + } + } + + it should + "throw IllegalArgumentException for lowercase units (regex is anchored to [KMG]B)" in { + // Pin case sensitivity — the regex `[KMG]B` requires uppercase + // letters. `5mb` does NOT match and should fail with the diagnostic. + intercept[IllegalArgumentException] { + ConfigParserUtil.parseSizeStringToBytes("5mb") + } + } + + it should "throw IllegalArgumentException when the value and unit are separated by whitespace" in { + // `(\d+)([KMG]B)` is unanchored on the outside but adjacent on the + // inside; whitespace between value and unit breaks the regex. + intercept[IllegalArgumentException] { + ConfigParserUtil.parseSizeStringToBytes("5 MB") + } + } + + it should "throw IllegalArgumentException when the value is non-numeric (e.g. `abcMB`)" in { + intercept[IllegalArgumentException] { + ConfigParserUtil.parseSizeStringToBytes("abcMB") + } + } + + it should "throw IllegalArgumentException when only the unit is supplied (e.g. `MB`)" in { + intercept[IllegalArgumentException] { + ConfigParserUtil.parseSizeStringToBytes("MB") + } + } + + // --------------------------------------------------------------------------- + // Return type — Long (not Int) + // --------------------------------------------------------------------------- + + it should "return a `Long` (compile-time enforced)" in { + val v: Long = ConfigParserUtil.parseSizeStringToBytes("1MB") + assert(v == 1024L * 1024L) + } +}
