This is an automated email from the ASF dual-hosted git repository.
Yicong-Huang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new 6cf43225a5 refactor(ci): split scala job into amber + platform matrix
(#4632)
6cf43225a5 is described below
commit 6cf43225a5b21e5d324344b3021e222028a2acac
Author: Yicong Huang <[email protected]>
AuthorDate: Sat May 2 16:16:14 2026 -0700
refactor(ci): split scala job into amber + platform matrix (#4632)
## What changes were proposed in this PR?
- `.github/workflows/build.yml`:
- Replace the `scala` job with `amber`. It runs the cross-cutting Scala
lints (`scalafmtCheckAll`, `scalafixAll --check`) once on behalf of
every Scala module, builds `WorkflowExecutionService/dist`,
license-checks the amber dist against `amber/LICENSE-binary-java`, and
runs amber tests via `WorkflowExecutionService/jacoco`.
- New `platform` job: a `strategy.matrix.include` over the five
non-amber Scala services (config-service, access-control-service,
file-service, computing-unit-managing-service,
workflow-compiling-service). Each entry runs `sbt "<Service>/dist"
"<Service>/test"` and license-checks its own dist `lib/` against
`<service>/LICENSE-binary` in isolation. This is now possible because
per-module LICENSE-binary files were introduced in #4668.
- `run_scala` input replaced by `run_amber` + `run_platform`.
- `.github/labeler.yml`:
- New `platform` label for the five platform service dirs.
- `service` label removed. The two things it carried go elsewhere:
`pyright-language-service/**` is left uncategorized (no test stack
today), and the root-level Scala build/lint config (`build.sbt`,
`project/**`, `.scalafix.conf`, `.scalafmt.conf`) joins the `common`
glob — `common` already maps to amber + platform, which is correct for
changes that affect every Scala module.
- `.github/workflows/required-checks.yml`:
- Precheck now emits `run_amber` + `run_platform` instead of
`run_scala`.
- LABEL_STACKS routes the new label set. Build and backport callers pass
the new inputs through.
### Label → stack matrix
| Label | frontend | amber | platform | python | agent-service |
|---|:-:|:-:|:-:|:-:|:-:|
| `frontend` | ✓ | | | | |
| `engine` | | ✓ | | ✓ | |
| `python` | | ✓ | | ✓ | |
| `platform` | | | ✓ | | |
| `common` | | ✓ | ✓ | | |
| `ddl-change` | | ✓ | ✓ | | |
| `agent-service` | | | | | ✓ |
| `ci` | ✓ | ✓ | ✓ | ✓ | ✓ |
| `docs`, `dev`, `dependencies`, `feature`, `fix`, `refactor`,
`release/*` | | | | | |
The selected stacks are the union across all PR labels. PRs that pick up
only no-stack labels (e.g. docs-only, dev-only) skip every build stack.
Push and `workflow_dispatch` events run every stack unconditionally.
### Why per-service license check is now possible
Before #4668 there was a single repo-wide `LICENSE-binary` covering the
union of all service jars. Splitting the license check per service would
have made every per-service check fail — each lib is a strict subset of
the union, so the script would report STALE jars (claimed in the union,
not in this service). #4668 ships per-module `LICENSE-binary` files at
the repo root (`config-service/LICENSE-binary`,
`amber/LICENSE-binary-java`, etc.), so each service's dist `lib/` is now
validated against its own ground-truth file via `check_binary_deps.py
--license-binary <module>/LICENSE-binary`.
## Any related issues, documentation, discussions?
Closes #4631. Builds on #4668 (per-module LICENSE-binary files) and
#4640 (LABEL_STACKS gating).
## How was this PR tested?
YAML parses locally for all three modified files. Currently exercising
on this PR's CI run: amber job runs unconditionally; platform matrix
runs because the `platform` and `ci` labels are present.
## Was this PR authored or co-authored using generative AI tooling?
Generated-by: Claude Opus 4.7 (Claude Code)
---
.github/labeler.yml | 15 ++-
.github/workflows/build.yml | 181 +++++++++++++++++++++++++---------
.github/workflows/required-checks.yml | 69 +++++++------
3 files changed, 184 insertions(+), 81 deletions(-)
diff --git a/.github/labeler.yml b/.github/labeler.yml
index defbdc9011..1cfd4eef74 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -23,22 +23,21 @@ common:
- changed-files:
- any-glob-to-any-file:
- 'common/**'
+ # Root-level Scala build / lint config: a change to any of these
+ # affects every Scala stack (amber + the platform services).
+ - 'build.sbt'
+ - 'project/**'
+ - '.scalafix.conf'
+ - '.scalafmt.conf'
-service:
+platform:
- changed-files:
- any-glob-to-any-file:
- 'access-control-service/**'
- 'computing-unit-managing-service/**'
- 'config-service/**'
- 'file-service/**'
- - 'pyright-language-service/**'
- 'workflow-compiling-service/**'
- # Root-level scala build / lint config: a change to any of these
- # affects the scala stack, but no component label catches them.
- - 'build.sbt'
- - 'project/**'
- - '.scalafix.conf'
- - '.scalafmt.conf'
agent-service:
- changed-files:
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 45569e311c..9bda7c3ddf 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -40,7 +40,11 @@ on:
required: false
type: boolean
default: true
- run_scala:
+ run_amber:
+ required: false
+ type: boolean
+ default: true
+ run_platform:
required: false
type: boolean
default: true
@@ -121,9 +125,14 @@ jobs:
flags: frontend
fail_ci_if_error: false
- scala:
- if: ${{ inputs.run_scala }}
- name: ${{ format('scala{0} ({1}, 11)', inputs.job_name_suffix, matrix.os)
}}
+ amber:
+ # The amber job runs the cross-cutting Scala lints (scalafmtCheckAll,
+ # scalafixAll --check) once on behalf of every Scala module, then builds
+ # and tests just the WorkflowExecutionService dist. Per-service builds
+ # and tests for the platform services live in the `platform` matrix
+ # below. License-binary checks are scoped to the amber dist.
+ if: ${{ inputs.run_amber }}
+ name: ${{ format('amber{0} ({1}, 11)', inputs.job_name_suffix, matrix.os)
}}
strategy:
matrix:
os: [ubuntu-22.04]
@@ -179,51 +188,31 @@ jobs:
- uses: coursier/cache-action@90c37294538be80a558fd665531fcdc2b467b475 #
v8.1.0
with:
extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}",
"project/build.properties" ]'
- - name: Lint and build distributable bundles
- # Single sbt invocation runs scalafmt -> scalafix -> per-module dist
- # in order; sbt exits at the first failing command (fail-fast). Each
- # command is a separate sbt arg, not joined with ';', so a dist
- # failure aborts the rest. scalafix triggers compile (and JOOQ
- # codegen), which the dist commands then reuse incrementally.
+ - name: Lint and build amber distributable bundle
+ # Single sbt invocation: scalafmt -> scalafix -> amber dist.
+ # scalafmtCheckAll and scalafixAll cover every Scala module, so the
+ # platform matrix below skips them. scalafix triggers compile (and
+ # JOOQ codegen), which the dist command then reuses incrementally.
run: |
sbt scalafmtCheckAll \
"scalafixAll --check" \
- ConfigService/dist \
- AccessControlService/dist \
- FileService/dist \
- ComputingUnitManagingService/dist \
- WorkflowCompilingService/dist \
WorkflowExecutionService/dist
- - name: Unzip dists and check binary licenses
- # Unzips every service's dist bundle, runs the binding LICENSE-binary
- # check, then runs the advisory per-dep audit. The audit always runs
- # (mirroring the previous 'if: always()' on its own step) and never
- # fails the step; the binding check's exit code drives the result.
+ - name: Unzip amber dist and check binary licenses
+ # Per-module LICENSE-binary files live at the repo root after #4668;
+ # the amber JVM dist is checked against amber/LICENSE-binary-java.
+ # The audit always runs (mirroring the previous 'if: always()' on its
+ # own step) and never fails the step; the binding check's exit code
+ # drives it.
run: |
set -euo pipefail
mkdir -p /tmp/dists
- for zip in \
- config-service/target/universal/config-service-*.zip \
-
access-control-service/target/universal/access-control-service-*.zip \
- file-service/target/universal/file-service-*.zip \
-
computing-unit-managing-service/target/universal/computing-unit-managing-service-*.zip
\
-
workflow-compiling-service/target/universal/workflow-compiling-service-*.zip \
- amber/target/universal/amber-*.zip; do
- unzip -q "$zip" -d /tmp/dists/
- done
-
- lib_paths=(
- /tmp/dists/config-service-*/lib
- /tmp/dists/access-control-service-*/lib
- /tmp/dists/file-service-*/lib
- /tmp/dists/computing-unit-managing-service-*/lib
- /tmp/dists/workflow-compiling-service-*/lib
- /tmp/dists/amber-*/lib
- )
+ unzip -q amber/target/universal/amber-*.zip -d /tmp/dists/
check_exit=0
- ./bin/licensing/check_binary_deps.py --ignore-transitive-version jar
"${lib_paths[@]}" || check_exit=$?
- ./bin/licensing/audit_jar_licenses.py "${lib_paths[@]}" || true
+ ./bin/licensing/check_binary_deps.py --ignore-transitive-version jar
\
+ --license-binary amber/LICENSE-binary-java \
+ /tmp/dists/amber-*/lib || check_exit=$?
+ ./bin/licensing/audit_jar_licenses.py /tmp/dists/amber-*/lib || true
exit "$check_exit"
- name: Install dependencies
# Only the backend test step needs the python deps; install just
@@ -240,17 +229,121 @@ jobs:
run: |
echo "api.version=1.52" >> ~/.docker-java.properties
cat ~/.docker-java.properties
- - name: Run backend tests
+ - name: Run amber and common module tests with coverage
# 'jacoco' runs tests under sbt-jacoco's JVM agent and emits per-
# module jacoco.xml that the codecov upload step picks up.
- run: sbt jacoco
- - name: Upload scala coverage to Codecov
+ # `WorkflowExecutionService/jacoco` only runs that project's tests
+ # (sbt's `test` task does not transit dependsOn), so common
+ # modules' tests are listed explicitly here. Modules with no
+ # tests (Auth, Config) are skipped.
+ run: |
+ sbt "DAO/jacoco" \
+ "PyBuilder/jacoco" \
+ "WorkflowCore/jacoco" \
+ "WorkflowOperator/jacoco" \
+ "WorkflowExecutionService/jacoco"
+ - name: Upload amber and common coverage to Codecov
if: always()
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe
# v5.5.4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./**/target/scala-2.13/jacoco/report/jacoco.xml
- flags: scala
+ flags: amber
+ fail_ci_if_error: false
+
+ platform:
+ # Per-service build, test, and license check for the non-amber Scala
+ # services. Each matrix entry runs its own dist + test in isolation
+ # against per-module LICENSE-binary (#4668). scalafmt / scalafix already
+ # cover every module in the amber job above, so this matrix skips them.
+ if: ${{ inputs.run_platform }}
+ name: ${{ format('platform{0} ({1})', inputs.job_name_suffix,
matrix.service) }}
+ runs-on: ubuntu-22.04
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - service: config-service
+ sbt_project: ConfigService
+ - service: access-control-service
+ sbt_project: AccessControlService
+ - service: file-service
+ sbt_project: FileService
+ - service: computing-unit-managing-service
+ sbt_project: ComputingUnitManagingService
+ - service: workflow-compiling-service
+ sbt_project: WorkflowCompilingService
+ env:
+ JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M
-Dfile.encoding=UTF-8
+ JVM_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M
-Dfile.encoding=UTF-8
+ services:
+ # Each platform service transitively depends on DAO, which runs JOOQ
+ # code generation at compile time and needs the live texera schema.
+ postgres:
+ image: postgres
+ env:
+ POSTGRES_PASSWORD: postgres
+ ports:
+ - 5432:5432
+ options: >-
+ --health-cmd="pg_isready -U postgres"
+ --health-interval=10s
+ --health-timeout=5s
+ --health-retries=5
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v5
+ with:
+ ref: ${{ inputs.checkout_ref || github.sha }}
+ fetch-depth: 0
+ - name: Prepare backport workspace
+ if: ${{ inputs.backport_target_branch != '' }}
+ working-directory: ${{ github.workspace }}
+ run: bash ./.github/scripts/prepare-backport-checkout.sh "${{
inputs.backport_target_branch }}" "${{ inputs.backport_commit_range }}"
+ - name: Setup JDK
+ uses: actions/setup-java@v5
+ with:
+ distribution: "temurin"
+ java-version: 11
+ - name: Setup sbt launcher
+ uses: sbt/setup-sbt@508b753e53cb6095967669e0911487d2b9bc9f41 # v1.1.22
+ - uses: coursier/cache-action@90c37294538be80a558fd665531fcdc2b467b475 #
v8.1.0
+ with:
+ extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}",
"project/build.properties" ]'
+ - name: Create Databases
+ run: |
+ psql -h localhost -U postgres -f sql/texera_ddl.sql
+ psql -h localhost -U postgres -f sql/iceberg_postgres_catalog.sql
+ psql -h localhost -U postgres -f sql/texera_lakefs.sql
+ env:
+ PGPASSWORD: postgres
+ - name: Build dist and run ${{ matrix.service }} tests with coverage
+ # Single sbt invocation so dist + test share compiled state. Use
+ # `jacoco` so the codecov upload step has a report to pick up.
+ run: sbt "${{ matrix.sbt_project }}/dist" "${{ matrix.sbt_project
}}/jacoco"
+ - name: Unzip ${{ matrix.service }} dist and check binary licenses
+ # Each platform service has its own LICENSE-binary at the repo root
+ # after #4668; check this service's dist against just its own file.
+ run: |
+ set -euo pipefail
+ mkdir -p /tmp/dists
+ unzip -q ${{ matrix.service }}/target/universal/${{ matrix.service
}}-*.zip -d /tmp/dists/
+
+ check_exit=0
+ ./bin/licensing/check_binary_deps.py jar \
+ --license-binary ${{ matrix.service }}/LICENSE-binary \
+ /tmp/dists/${{ matrix.service }}-*/lib || check_exit=$?
+ ./bin/licensing/audit_jar_licenses.py /tmp/dists/${{ matrix.service
}}-*/lib || true
+ exit "$check_exit"
+ - name: Upload ${{ matrix.service }} coverage to Codecov
+ # Per-service flag so each matrix entry has its own Codecov view
+ # rather than being merged into one umbrella `platform` flag.
+ if: always()
+ uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe
# v5.5.4
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }}
+ files: ./${{ matrix.service
}}/target/scala-2.13/jacoco/report/jacoco.xml
+ flags: ${{ matrix.service }}
fail_ci_if_error: false
python:
diff --git a/.github/workflows/required-checks.yml
b/.github/workflows/required-checks.yml
index 1429521985..9046682bd7 100644
--- a/.github/workflows/required-checks.yml
+++ b/.github/workflows/required-checks.yml
@@ -46,11 +46,12 @@ jobs:
# - On PR events, wait for the Pull Request Labeler workflow to finish so
# the labels it applies (frontend, docs, dev, ...) are available, then
# gate run_* outputs on those labels.
- # - run_frontend / run_scala / run_python / run_agent_service: gate the
- # main build stacks. Each labeler-applied label maps to the stacks it
- # requires (LABEL_STACKS below); the run set is the union across all
- # PR labels. Empty union (e.g. docs-only / dev-only PRs) skips every
- # stack. Push and workflow_dispatch events run every stack.
+ # - run_frontend / run_amber / run_platform / run_python /
+ # run_agent_service: gate the main build stacks. Each labeler-applied
+ # label maps to the stacks it requires (LABEL_STACKS below); the run
+ # set is the union across all PR labels. Empty union (e.g. docs-only
+ # / dev-only PRs) skips every stack. Push and workflow_dispatch
+ # events run every stack.
# - backport_targets: JSON array of release/* labels currently on the PR.
# Drives the backport matrix; empty array means no backport runs.
precheck:
@@ -58,7 +59,8 @@ jobs:
runs-on: ubuntu-latest
outputs:
run_frontend: ${{ steps.decide.outputs.run_frontend }}
- run_scala: ${{ steps.decide.outputs.run_scala }}
+ run_amber: ${{ steps.decide.outputs.run_amber }}
+ run_platform: ${{ steps.decide.outputs.run_platform }}
run_python: ${{ steps.decide.outputs.run_python }}
run_agent_service: ${{ steps.decide.outputs.run_agent_service }}
backport_targets: ${{ steps.decide.outputs.backport_targets }}
@@ -113,31 +115,36 @@ jobs:
// labeler matches lives under a component dir and is already
// covered by that component's label.
//
- // label | frontend | scala | python |
agent-service
- //
---------------------|----------|-------|--------|--------------
- // frontend | x | | |
- // python | | x | x |
- // engine | | x | x |
- // service | | x | |
- // agent-service | | | | x
- // common | | x | |
- // ddl-change | | x | |
- // ci | x | x | x | x
- // docs / dev / deps / | | | |
- // release/* / branch | | | |
+ // label | frontend | amber | platform | python |
agent-service
+ //
---------------|----------|-------|----------|--------|--------------
+ // frontend | x | | | |
+ // python | | x | | x |
+ // engine | | x | | x |
+ // platform | | | x | |
+ // agent-service | | | | | x
+ // common | | x | x | |
(also catches
+ // root
scala
+ //
build/lint
+ //
config)
+ // ddl-change | | x | x | |
+ // ci | x | x | x | x | x
+ // docs / dev / | | | | |
+ // deps / release/| | | | |
+ // * / branch | | | | |
const LABEL_STACKS = {
frontend: ["frontend"],
- python: ["scala", "python"], // pyamber drives scala
integration tests too
- engine: ["scala", "python"], // amber/** spans both
- service: ["scala"], // scala-side services;
agent-service is its own label
+ python: ["amber", "python"], // pyamber drives amber
integration tests too
+ engine: ["amber", "python"], // amber/** spans both
+ platform: ["platform"], // platform services
"agent-service": ["agent-service"],
- common: ["scala"],
- "ddl-change": ["scala"],
- ci: ["frontend", "scala", "python", "agent-service"],
+ common: ["amber", "platform"], // common/** + root scala
build/lint
+ "ddl-change": ["amber", "platform"],
+ ci: ["frontend", "amber", "platform", "python", "agent-service"],
};
let runFrontend = true;
- let runScala = true;
+ let runAmber = true;
+ let runPlatform = true;
let runPython = true;
let runAgentService = true;
@@ -149,7 +156,8 @@ jobs:
}
}
runFrontend = stacks.has("frontend");
- runScala = stacks.has("scala");
+ runAmber = stacks.has("amber");
+ runPlatform = stacks.has("platform");
runPython = stacks.has("python");
runAgentService = stacks.has("agent-service");
core.info(
@@ -158,7 +166,8 @@ jobs:
}
core.setOutput("run_frontend", runFrontend ? "true" : "false");
- core.setOutput("run_scala", runScala ? "true" : "false");
+ core.setOutput("run_amber", runAmber ? "true" : "false");
+ core.setOutput("run_platform", runPlatform ? "true" : "false");
core.setOutput("run_python", runPython ? "true" : "false");
core.setOutput("run_agent_service", runAgentService ? "true" :
"false");
@@ -211,7 +220,8 @@ jobs:
uses: ./.github/workflows/build.yml
with:
run_frontend: ${{ needs.precheck.outputs.run_frontend == 'true' }}
- run_scala: ${{ needs.precheck.outputs.run_scala == 'true' }}
+ run_amber: ${{ needs.precheck.outputs.run_amber == 'true' }}
+ run_platform: ${{ needs.precheck.outputs.run_platform == 'true' }}
run_python: ${{ needs.precheck.outputs.run_python == 'true' }}
run_agent_service: ${{ needs.precheck.outputs.run_agent_service ==
'true' }}
secrets: inherit
@@ -230,7 +240,8 @@ jobs:
backport_commit_range: ${{ format('{0}..{1}',
github.event.pull_request.base.sha, github.event.pull_request.head.sha) }}
job_name_suffix: ""
run_frontend: ${{ needs.precheck.outputs.run_frontend == 'true' }}
- run_scala: ${{ needs.precheck.outputs.run_scala == 'true' }}
+ run_amber: ${{ needs.precheck.outputs.run_amber == 'true' }}
+ run_platform: ${{ needs.precheck.outputs.run_platform == 'true' }}
run_python: ${{ needs.precheck.outputs.run_python == 'true' }}
run_agent_service: ${{ needs.precheck.outputs.run_agent_service ==
'true' }}
secrets: inherit