This is an automated email from the ASF dual-hosted git repository.

Yicong-Huang pushed a commit to branch release/v1.1.0-incubating
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/release/v1.1.0-incubating by 
this push:
     new d263f7d174 refactor(ci): split scala job into amber + platform matrix 
(#4632)
d263f7d174 is described below

commit d263f7d174957beacf627d5092b554f556a249d7
Author: Yicong Huang <[email protected]>
AuthorDate: Sat May 2 23:16:34 2026 +0000

    refactor(ci): split scala job into amber + platform matrix (#4632)
    
    ## What changes were proposed in this PR?
    
    - `.github/workflows/build.yml`:
    - Replace the `scala` job with `amber`. It runs the cross-cutting Scala
    lints (`scalafmtCheckAll`, `scalafixAll --check`) once on behalf of
    every Scala module, builds `WorkflowExecutionService/dist`,
    license-checks the amber dist against `amber/LICENSE-binary-java`, and
    runs amber tests via `WorkflowExecutionService/jacoco`.
    - New `platform` job: a `strategy.matrix.include` over the five
    non-amber Scala services (config-service, access-control-service,
    file-service, computing-unit-managing-service,
    workflow-compiling-service). Each entry runs `sbt "<Service>/dist"
    "<Service>/test"` and license-checks its own dist `lib/` against
    `<service>/LICENSE-binary` in isolation. This is now possible because
    per-module LICENSE-binary files were introduced in #4668.
      - `run_scala` input replaced by `run_amber` + `run_platform`.
    
    - `.github/labeler.yml`:
      - New `platform` label for the five platform service dirs.
    - `service` label removed. The two things it carried go elsewhere:
    `pyright-language-service/**` is left uncategorized (no test stack
    today), and the root-level Scala build/lint config (`build.sbt`,
    `project/**`, `.scalafix.conf`, `.scalafmt.conf`) joins the `common`
    glob — `common` already maps to amber + platform, which is correct for
    changes that affect every Scala module.
    
    - `.github/workflows/required-checks.yml`:
    - Precheck now emits `run_amber` + `run_platform` instead of
    `run_scala`.
    - LABEL_STACKS routes the new label set. Build and backport callers pass
    the new inputs through.
    
    ### Label → stack matrix
    
    | Label | frontend | amber | platform | python | agent-service |
    |---|:-:|:-:|:-:|:-:|:-:|
    | `frontend` | ✓ | | | | |
    | `engine` | | ✓ | | ✓ | |
    | `python` | | ✓ | | ✓ | |
    | `platform` | | | ✓ | | |
    | `common` | | ✓ | ✓ | | |
    | `ddl-change` | | ✓ | ✓ | | |
    | `agent-service` | | | | | ✓ |
    | `ci` | ✓ | ✓ | ✓ | ✓ | ✓ |
    | `docs`, `dev`, `dependencies`, `feature`, `fix`, `refactor`,
    `release/*` | | | | | |
    
    The selected stacks are the union across all PR labels. PRs that pick up
    only no-stack labels (e.g. docs-only, dev-only) skip every build stack.
    Push and `workflow_dispatch` events run every stack unconditionally.
    
    ### Why per-service license check is now possible
    
    Before #4668 there was a single repo-wide `LICENSE-binary` covering the
    union of all service jars. Splitting the license check per service would
    have made every per-service check fail — each lib is a strict subset of
    the union, so the script would report STALE jars (claimed in the union,
    not in this service). #4668 ships per-module `LICENSE-binary` files at
    the repo root (`config-service/LICENSE-binary`,
    `amber/LICENSE-binary-java`, etc.), so each service's dist `lib/` is now
    validated against its own ground-truth file via `check_binary_deps.py
    --license-binary <module>/LICENSE-binary`.
    
    ## Any related issues, documentation, discussions?
    
    Closes #4631. Builds on #4668 (per-module LICENSE-binary files) and
    #4640 (LABEL_STACKS gating).
    
    ## How was this PR tested?
    
    YAML parses locally for all three modified files. Currently exercising
    on this PR's CI run: amber job runs unconditionally; platform matrix
    runs because the `platform` and `ci` labels are present.
    
    ## Was this PR authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Opus 4.7 (Claude Code)
    
    (backported from commit 6cf43225a5b21e5d324344b3021e222028a2acac)
---
 .github/labeler.yml                   |  15 ++-
 .github/workflows/build.yml           | 181 +++++++++++++++++++++++++---------
 .github/workflows/required-checks.yml |  69 +++++++------
 3 files changed, 184 insertions(+), 81 deletions(-)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index defbdc9011..1cfd4eef74 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -23,22 +23,21 @@ common:
   - changed-files:
       - any-glob-to-any-file:
           - 'common/**'
+          # Root-level Scala build / lint config: a change to any of these
+          # affects every Scala stack (amber + the platform services).
+          - 'build.sbt'
+          - 'project/**'
+          - '.scalafix.conf'
+          - '.scalafmt.conf'
 
-service:
+platform:
   - changed-files:
       - any-glob-to-any-file:
           - 'access-control-service/**'
           - 'computing-unit-managing-service/**'
           - 'config-service/**'
           - 'file-service/**'
-          - 'pyright-language-service/**'
           - 'workflow-compiling-service/**'
-          # Root-level scala build / lint config: a change to any of these
-          # affects the scala stack, but no component label catches them.
-          - 'build.sbt'
-          - 'project/**'
-          - '.scalafix.conf'
-          - '.scalafmt.conf'
 
 agent-service:
   - changed-files:
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 45569e311c..9bda7c3ddf 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -40,7 +40,11 @@ on:
         required: false
         type: boolean
         default: true
-      run_scala:
+      run_amber:
+        required: false
+        type: boolean
+        default: true
+      run_platform:
         required: false
         type: boolean
         default: true
@@ -121,9 +125,14 @@ jobs:
           flags: frontend
           fail_ci_if_error: false
 
-  scala:
-    if: ${{ inputs.run_scala }}
-    name: ${{ format('scala{0} ({1}, 11)', inputs.job_name_suffix, matrix.os) 
}}
+  amber:
+    # The amber job runs the cross-cutting Scala lints (scalafmtCheckAll,
+    # scalafixAll --check) once on behalf of every Scala module, then builds
+    # and tests just the WorkflowExecutionService dist. Per-service builds
+    # and tests for the platform services live in the `platform` matrix
+    # below. License-binary checks are scoped to the amber dist.
+    if: ${{ inputs.run_amber }}
+    name: ${{ format('amber{0} ({1}, 11)', inputs.job_name_suffix, matrix.os) 
}}
     strategy:
       matrix:
         os: [ubuntu-22.04]
@@ -179,51 +188,31 @@ jobs:
       - uses: coursier/cache-action@90c37294538be80a558fd665531fcdc2b467b475 # 
v8.1.0
         with:
           extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}", 
"project/build.properties" ]'
-      - name: Lint and build distributable bundles
-        # Single sbt invocation runs scalafmt -> scalafix -> per-module dist
-        # in order; sbt exits at the first failing command (fail-fast). Each
-        # command is a separate sbt arg, not joined with ';', so a dist
-        # failure aborts the rest. scalafix triggers compile (and JOOQ
-        # codegen), which the dist commands then reuse incrementally.
+      - name: Lint and build amber distributable bundle
+        # Single sbt invocation: scalafmt -> scalafix -> amber dist.
+        # scalafmtCheckAll and scalafixAll cover every Scala module, so the
+        # platform matrix below skips them. scalafix triggers compile (and
+        # JOOQ codegen), which the dist command then reuses incrementally.
         run: |
           sbt scalafmtCheckAll \
               "scalafixAll --check" \
-              ConfigService/dist \
-              AccessControlService/dist \
-              FileService/dist \
-              ComputingUnitManagingService/dist \
-              WorkflowCompilingService/dist \
               WorkflowExecutionService/dist
-      - name: Unzip dists and check binary licenses
-        # Unzips every service's dist bundle, runs the binding LICENSE-binary
-        # check, then runs the advisory per-dep audit. The audit always runs
-        # (mirroring the previous 'if: always()' on its own step) and never
-        # fails the step; the binding check's exit code drives the result.
+      - name: Unzip amber dist and check binary licenses
+        # Per-module LICENSE-binary files live at the repo root after #4668;
+        # the amber JVM dist is checked against amber/LICENSE-binary-java.
+        # The audit always runs (mirroring the previous 'if: always()' on its
+        # own step) and never fails the step; the binding check's exit code
+        # drives it.
         run: |
           set -euo pipefail
           mkdir -p /tmp/dists
-          for zip in \
-            config-service/target/universal/config-service-*.zip \
-            
access-control-service/target/universal/access-control-service-*.zip \
-            file-service/target/universal/file-service-*.zip \
-            
computing-unit-managing-service/target/universal/computing-unit-managing-service-*.zip
 \
-            
workflow-compiling-service/target/universal/workflow-compiling-service-*.zip \
-            amber/target/universal/amber-*.zip; do
-              unzip -q "$zip" -d /tmp/dists/
-          done
-
-          lib_paths=(
-            /tmp/dists/config-service-*/lib
-            /tmp/dists/access-control-service-*/lib
-            /tmp/dists/file-service-*/lib
-            /tmp/dists/computing-unit-managing-service-*/lib
-            /tmp/dists/workflow-compiling-service-*/lib
-            /tmp/dists/amber-*/lib
-          )
+          unzip -q amber/target/universal/amber-*.zip -d /tmp/dists/
 
           check_exit=0
-          ./bin/licensing/check_binary_deps.py --ignore-transitive-version jar 
"${lib_paths[@]}" || check_exit=$?
-          ./bin/licensing/audit_jar_licenses.py "${lib_paths[@]}" || true
+          ./bin/licensing/check_binary_deps.py --ignore-transitive-version jar 
\
+            --license-binary amber/LICENSE-binary-java \
+            /tmp/dists/amber-*/lib || check_exit=$?
+          ./bin/licensing/audit_jar_licenses.py /tmp/dists/amber-*/lib || true
           exit "$check_exit"
       - name: Install dependencies
         # Only the backend test step needs the python deps; install just
@@ -240,17 +229,121 @@ jobs:
         run: |
           echo "api.version=1.52" >> ~/.docker-java.properties
           cat ~/.docker-java.properties
-      - name: Run backend tests
+      - name: Run amber and common module tests with coverage
         # 'jacoco' runs tests under sbt-jacoco's JVM agent and emits per-
         # module jacoco.xml that the codecov upload step picks up.
-        run: sbt jacoco
-      - name: Upload scala coverage to Codecov
+        # `WorkflowExecutionService/jacoco` only runs that project's tests
+        # (sbt's `test` task does not transit dependsOn), so common
+        # modules' tests are listed explicitly here. Modules with no
+        # tests (Auth, Config) are skipped.
+        run: |
+          sbt "DAO/jacoco" \
+              "PyBuilder/jacoco" \
+              "WorkflowCore/jacoco" \
+              "WorkflowOperator/jacoco" \
+              "WorkflowExecutionService/jacoco"
+      - name: Upload amber and common coverage to Codecov
         if: always()
         uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe 
# v5.5.4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           files: ./**/target/scala-2.13/jacoco/report/jacoco.xml
-          flags: scala
+          flags: amber
+          fail_ci_if_error: false
+
+  platform:
+    # Per-service build, test, and license check for the non-amber Scala
+    # services. Each matrix entry runs its own dist + test in isolation
+    # against per-module LICENSE-binary (#4668). scalafmt / scalafix already
+    # cover every module in the amber job above, so this matrix skips them.
+    if: ${{ inputs.run_platform }}
+    name: ${{ format('platform{0} ({1})', inputs.job_name_suffix, 
matrix.service) }}
+    runs-on: ubuntu-22.04
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - service: config-service
+            sbt_project: ConfigService
+          - service: access-control-service
+            sbt_project: AccessControlService
+          - service: file-service
+            sbt_project: FileService
+          - service: computing-unit-managing-service
+            sbt_project: ComputingUnitManagingService
+          - service: workflow-compiling-service
+            sbt_project: WorkflowCompilingService
+    env:
+      JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M 
-Dfile.encoding=UTF-8
+      JVM_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M 
-Dfile.encoding=UTF-8
+    services:
+      # Each platform service transitively depends on DAO, which runs JOOQ
+      # code generation at compile time and needs the live texera schema.
+      postgres:
+        image: postgres
+        env:
+          POSTGRES_PASSWORD: postgres
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd="pg_isready -U postgres"
+          --health-interval=10s
+          --health-timeout=5s
+          --health-retries=5
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v5
+        with:
+          ref: ${{ inputs.checkout_ref || github.sha }}
+          fetch-depth: 0
+      - name: Prepare backport workspace
+        if: ${{ inputs.backport_target_branch != '' }}
+        working-directory: ${{ github.workspace }}
+        run: bash ./.github/scripts/prepare-backport-checkout.sh "${{ 
inputs.backport_target_branch }}" "${{ inputs.backport_commit_range }}"
+      - name: Setup JDK
+        uses: actions/setup-java@v5
+        with:
+          distribution: "temurin"
+          java-version: 11
+      - name: Setup sbt launcher
+        uses: sbt/setup-sbt@508b753e53cb6095967669e0911487d2b9bc9f41 # v1.1.22
+      - uses: coursier/cache-action@90c37294538be80a558fd665531fcdc2b467b475 # 
v8.1.0
+        with:
+          extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}", 
"project/build.properties" ]'
+      - name: Create Databases
+        run: |
+          psql -h localhost -U postgres -f sql/texera_ddl.sql
+          psql -h localhost -U postgres -f sql/iceberg_postgres_catalog.sql
+          psql -h localhost -U postgres -f sql/texera_lakefs.sql
+        env:
+          PGPASSWORD: postgres
+      - name: Build dist and run ${{ matrix.service }} tests with coverage
+        # Single sbt invocation so dist + test share compiled state. Use
+        # `jacoco` so the codecov upload step has a report to pick up.
+        run: sbt "${{ matrix.sbt_project }}/dist" "${{ matrix.sbt_project 
}}/jacoco"
+      - name: Unzip ${{ matrix.service }} dist and check binary licenses
+        # Each platform service has its own LICENSE-binary at the repo root
+        # after #4668; check this service's dist against just its own file.
+        run: |
+          set -euo pipefail
+          mkdir -p /tmp/dists
+          unzip -q ${{ matrix.service }}/target/universal/${{ matrix.service 
}}-*.zip -d /tmp/dists/
+
+          check_exit=0
+          ./bin/licensing/check_binary_deps.py jar \
+            --license-binary ${{ matrix.service }}/LICENSE-binary \
+            /tmp/dists/${{ matrix.service }}-*/lib || check_exit=$?
+          ./bin/licensing/audit_jar_licenses.py /tmp/dists/${{ matrix.service 
}}-*/lib || true
+          exit "$check_exit"
+      - name: Upload ${{ matrix.service }} coverage to Codecov
+        # Per-service flag so each matrix entry has its own Codecov view
+        # rather than being merged into one umbrella `platform` flag.
+        if: always()
+        uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe 
# v5.5.4
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          files: ./${{ matrix.service 
}}/target/scala-2.13/jacoco/report/jacoco.xml
+          flags: ${{ matrix.service }}
           fail_ci_if_error: false
 
   python:
diff --git a/.github/workflows/required-checks.yml 
b/.github/workflows/required-checks.yml
index 1429521985..9046682bd7 100644
--- a/.github/workflows/required-checks.yml
+++ b/.github/workflows/required-checks.yml
@@ -46,11 +46,12 @@ jobs:
   #   - On PR events, wait for the Pull Request Labeler workflow to finish so
   #     the labels it applies (frontend, docs, dev, ...) are available, then
   #     gate run_* outputs on those labels.
-  #   - run_frontend / run_scala / run_python / run_agent_service: gate the
-  #     main build stacks. Each labeler-applied label maps to the stacks it
-  #     requires (LABEL_STACKS below); the run set is the union across all
-  #     PR labels. Empty union (e.g. docs-only / dev-only PRs) skips every
-  #     stack. Push and workflow_dispatch events run every stack.
+  #   - run_frontend / run_amber / run_platform / run_python /
+  #     run_agent_service: gate the main build stacks. Each labeler-applied
+  #     label maps to the stacks it requires (LABEL_STACKS below); the run
+  #     set is the union across all PR labels. Empty union (e.g. docs-only
+  #     / dev-only PRs) skips every stack. Push and workflow_dispatch
+  #     events run every stack.
   #   - backport_targets: JSON array of release/* labels currently on the PR.
   #     Drives the backport matrix; empty array means no backport runs.
   precheck:
@@ -58,7 +59,8 @@ jobs:
     runs-on: ubuntu-latest
     outputs:
       run_frontend: ${{ steps.decide.outputs.run_frontend }}
-      run_scala: ${{ steps.decide.outputs.run_scala }}
+      run_amber: ${{ steps.decide.outputs.run_amber }}
+      run_platform: ${{ steps.decide.outputs.run_platform }}
       run_python: ${{ steps.decide.outputs.run_python }}
       run_agent_service: ${{ steps.decide.outputs.run_agent_service }}
       backport_targets: ${{ steps.decide.outputs.backport_targets }}
@@ -113,31 +115,36 @@ jobs:
             // labeler matches lives under a component dir and is already
             // covered by that component's label.
             //
-            //   label                | frontend | scala | python | 
agent-service
-            //   
---------------------|----------|-------|--------|--------------
-            //   frontend             |    x     |       |        |
-            //   python               |          |   x   |   x    |
-            //   engine               |          |   x   |   x    |
-            //   service              |          |   x   |        |
-            //   agent-service        |          |       |        |     x
-            //   common               |          |   x   |        |
-            //   ddl-change           |          |   x   |        |
-            //   ci                   |    x     |   x   |   x    |     x
-            //   docs / dev / deps /  |          |       |        |
-            //   release/* / branch   |          |       |        |
+            //   label          | frontend | amber | platform | python | 
agent-service
+            //   
---------------|----------|-------|----------|--------|--------------
+            //   frontend       |    x     |       |          |        |
+            //   python         |          |   x   |          |   x    |
+            //   engine         |          |   x   |          |   x    |
+            //   platform       |          |       |    x     |        |
+            //   agent-service  |          |       |          |        |     x
+            //   common         |          |   x   |    x     |        |  
(also catches
+            //                                                            root 
scala
+            //                                                            
build/lint
+            //                                                            
config)
+            //   ddl-change     |          |   x   |    x     |        |
+            //   ci             |    x     |   x   |    x     |   x    |     x
+            //   docs / dev /   |          |       |          |        |
+            //   deps / release/|          |       |          |        |
+            //   * / branch     |          |       |          |        |
             const LABEL_STACKS = {
               frontend: ["frontend"],
-              python: ["scala", "python"],     // pyamber drives scala 
integration tests too
-              engine: ["scala", "python"],     // amber/** spans both
-              service: ["scala"],              // scala-side services; 
agent-service is its own label
+              python: ["amber", "python"],          // pyamber drives amber 
integration tests too
+              engine: ["amber", "python"],          // amber/** spans both
+              platform: ["platform"],               // platform services
               "agent-service": ["agent-service"],
-              common: ["scala"],
-              "ddl-change": ["scala"],
-              ci: ["frontend", "scala", "python", "agent-service"],
+              common: ["amber", "platform"],        // common/** + root scala 
build/lint
+              "ddl-change": ["amber", "platform"],
+              ci: ["frontend", "amber", "platform", "python", "agent-service"],
             };
 
             let runFrontend = true;
-            let runScala = true;
+            let runAmber = true;
+            let runPlatform = true;
             let runPython = true;
             let runAgentService = true;
 
@@ -149,7 +156,8 @@ jobs:
                 }
               }
               runFrontend = stacks.has("frontend");
-              runScala = stacks.has("scala");
+              runAmber = stacks.has("amber");
+              runPlatform = stacks.has("platform");
               runPython = stacks.has("python");
               runAgentService = stacks.has("agent-service");
               core.info(
@@ -158,7 +166,8 @@ jobs:
             }
 
             core.setOutput("run_frontend", runFrontend ? "true" : "false");
-            core.setOutput("run_scala", runScala ? "true" : "false");
+            core.setOutput("run_amber", runAmber ? "true" : "false");
+            core.setOutput("run_platform", runPlatform ? "true" : "false");
             core.setOutput("run_python", runPython ? "true" : "false");
             core.setOutput("run_agent_service", runAgentService ? "true" : 
"false");
 
@@ -211,7 +220,8 @@ jobs:
     uses: ./.github/workflows/build.yml
     with:
       run_frontend: ${{ needs.precheck.outputs.run_frontend == 'true' }}
-      run_scala: ${{ needs.precheck.outputs.run_scala == 'true' }}
+      run_amber: ${{ needs.precheck.outputs.run_amber == 'true' }}
+      run_platform: ${{ needs.precheck.outputs.run_platform == 'true' }}
       run_python: ${{ needs.precheck.outputs.run_python == 'true' }}
       run_agent_service: ${{ needs.precheck.outputs.run_agent_service == 
'true' }}
     secrets: inherit
@@ -230,7 +240,8 @@ jobs:
       backport_commit_range: ${{ format('{0}..{1}', 
github.event.pull_request.base.sha, github.event.pull_request.head.sha) }}
       job_name_suffix: ""
       run_frontend: ${{ needs.precheck.outputs.run_frontend == 'true' }}
-      run_scala: ${{ needs.precheck.outputs.run_scala == 'true' }}
+      run_amber: ${{ needs.precheck.outputs.run_amber == 'true' }}
+      run_platform: ${{ needs.precheck.outputs.run_platform == 'true' }}
       run_python: ${{ needs.precheck.outputs.run_python == 'true' }}
       run_agent_service: ${{ needs.precheck.outputs.run_agent_service == 
'true' }}
     secrets: inherit

Reply via email to