This is an automated email from the ASF dual-hosted git repository.
github-merge-queue[bot] pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new ed22158534 refactor(config): unify default-data-transfer-batch-size
into one config key (#5545)
ed22158534 is described below
commit ed22158534c1ae503fea9b4338a03862b5231c97
Author: Matthew B. <[email protected]>
AuthorDate: Sun Jun 21 16:48:37 2026 -0700
refactor(config): unify default-data-transfer-batch-size into one config
key (#5545)
### What changes were proposed in this PR?
The default data-transfer batch size was controlled by **two** separate
config keys, each with its own env var, both defaulting to `400`:
- `network-buffering.default-data-transfer-batch-size`
(`NETWORK_BUFFERING_DEFAULT_DATA_TRANSFER_BATCH_SIZE`), read by the
backend (`ApplicationConfig.defaultDataTransferBatchSize`).
- `gui.workflow-workspace.default-data-transfer-batch-size`
(`GUI_WORKFLOW_WORKSPACE_DEFAULT_DATA_TRANSFER_BATCH_SIZE`), read by
`GuiConfig` and sent to the frontend via `ConfigResource`.
Having two keys for one value meant an operator could set one env var
and forget the other, leaving the backend and GUI out of sync.
This PR makes the `network-buffering` key the single source of truth:
- `ConfigResource` now surfaces
`ApplicationConfig.defaultDataTransferBatchSize` directly.
- Removed the duplicate
`guiWorkflowWorkspaceDefaultDataTransferBatchSize` field from
`GuiConfig`.
- Removed the duplicate key (and its env override) from `gui.conf`.
The frontend is unchanged: the JSON field name
(`defaultDataTransferBatchSize`) it consumes stays the same.
### Any related issues, documentation, discussions?
Closes #5544
### How was this PR tested?
- `sbt Config/compile ConfigService/compile` both succeed.
- Repo-wide grep confirms no remaining references to the old key or
`GUI_WORKFLOW_WORKSPACE_DEFAULT_DATA_TRANSFER_BATCH_SIZE` (outside
generated `dist/` artifacts).
- The backend reader of the canonical key is untouched, so existing
tests (e.g. `NetworkOutputBufferSpec`) are unaffected.
- Also tested that the application performs as normal on all services
(e.g., running workflows, uploading data, creating workflows, etc..)
### Was this PR authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Opus 4.8)
---
common/config/src/main/resources/gui.conf | 4 ---
.../apache/texera/common/config/GuiConfig.scala | 2 --
.../texera/service/resource/ConfigResource.scala | 11 ++++++-
.../service/resource/ConfigResourceAuthSpec.scala | 34 ++++++++++++++++++++++
.../app/common/service/gui-config.service.spec.ts | 34 ++++++++++++++++------
.../src/app/common/service/gui-config.service.ts | 11 ++++---
6 files changed, 76 insertions(+), 20 deletions(-)
diff --git a/common/config/src/main/resources/gui.conf
b/common/config/src/main/resources/gui.conf
index a0673c9a3f..aa7e4ff541 100644
--- a/common/config/src/main/resources/gui.conf
+++ b/common/config/src/main/resources/gui.conf
@@ -59,10 +59,6 @@ gui {
auto-attribute-correction-enabled = true
auto-attribute-correction-enabled =
${?GUI_WORKFLOW_WORKSPACE_AUTO_ATTRIBUTE_CORRECTION_ENABLED}
- # default data transfer batch size for workflows
- default-data-transfer-batch-size = 400
- default-data-transfer-batch-size =
${?GUI_WORKFLOW_WORKSPACE_DEFAULT_DATA_TRANSFER_BATCH_SIZE}
-
# default execution mode for workflows, can be either MATERIALIZED or
PIPELINED
default-execution-mode = PIPELINED
default-execution-mode = ${?GUI_WORKFLOW_WORKSPACE_DEFAULT_EXECUTION_MODE}
diff --git
a/common/config/src/main/scala/org/apache/texera/common/config/GuiConfig.scala
b/common/config/src/main/scala/org/apache/texera/common/config/GuiConfig.scala
index 3b378de127..65406b7e74 100644
---
a/common/config/src/main/scala/org/apache/texera/common/config/GuiConfig.scala
+++
b/common/config/src/main/scala/org/apache/texera/common/config/GuiConfig.scala
@@ -45,8 +45,6 @@ object GuiConfig {
conf.getBoolean("gui.workflow-workspace.export-execution-result-enabled")
val guiWorkflowWorkspaceAutoAttributeCorrectionEnabled: Boolean =
conf.getBoolean("gui.workflow-workspace.auto-attribute-correction-enabled")
- val guiWorkflowWorkspaceDefaultDataTransferBatchSize: Int =
- conf.getInt("gui.workflow-workspace.default-data-transfer-batch-size")
val guiWorkflowWorkspaceDefaultExecutionMode: String =
conf.getString("gui.workflow-workspace.default-execution-mode")
val guiWorkflowWorkspaceSelectingFilesFromDatasetsEnabled: Boolean =
diff --git
a/config-service/src/main/scala/org/apache/texera/service/resource/ConfigResource.scala
b/config-service/src/main/scala/org/apache/texera/service/resource/ConfigResource.scala
index 55dc386a3e..e80e2383e9 100644
---
a/config-service/src/main/scala/org/apache/texera/service/resource/ConfigResource.scala
+++
b/config-service/src/main/scala/org/apache/texera/service/resource/ConfigResource.scala
@@ -23,6 +23,7 @@ import jakarta.annotation.security.{PermitAll, RolesAllowed}
import jakarta.ws.rs.core.MediaType
import jakarta.ws.rs.{GET, Path, Produces}
import org.apache.texera.common.config.{
+ ApplicationConfig,
AuthConfig,
ComputingUnitConfig,
GuiConfig,
@@ -67,7 +68,6 @@ class ConfigResource {
"asyncRenderingEnabled" ->
GuiConfig.guiWorkflowWorkspaceAsyncRenderingEnabled,
"timetravelEnabled" -> GuiConfig.guiWorkflowWorkspaceTimetravelEnabled,
"productionSharedEditingServer" ->
GuiConfig.guiWorkflowWorkspaceProductionSharedEditingServer,
- "defaultDataTransferBatchSize" ->
GuiConfig.guiWorkflowWorkspaceDefaultDataTransferBatchSize,
"defaultExecutionMode" ->
GuiConfig.guiWorkflowWorkspaceDefaultExecutionMode,
"workflowEmailNotificationEnabled" ->
GuiConfig.guiWorkflowWorkspaceWorkflowEmailNotificationEnabled,
"sharingComputingUnitEnabled" ->
ComputingUnitConfig.sharingComputingUnitEnabled,
@@ -81,6 +81,15 @@ class ConfigResource {
"expirationTimeInMinutes" -> AuthConfig.jwtExpirationMinutes
)
+ // Engine configs.
+ @GET
+ @RolesAllowed(Array("REGULAR", "ADMIN"))
+ @Path("/amber")
+ def getAmberConfig: Map[String, Any] =
+ Map(
+ "defaultDataTransferBatchSize" ->
ApplicationConfig.defaultDataTransferBatchSize
+ )
+
@GET
@RolesAllowed(Array("REGULAR", "ADMIN"))
@Path("/user-system")
diff --git
a/config-service/src/test/scala/org/apache/texera/service/resource/ConfigResourceAuthSpec.scala
b/config-service/src/test/scala/org/apache/texera/service/resource/ConfigResourceAuthSpec.scala
index d5418ea0f7..2f9f5fb25c 100644
---
a/config-service/src/test/scala/org/apache/texera/service/resource/ConfigResourceAuthSpec.scala
+++
b/config-service/src/test/scala/org/apache/texera/service/resource/ConfigResourceAuthSpec.scala
@@ -153,6 +153,40 @@ class ConfigResourceAuthSpec extends AnyFlatSpec with
Matchers with BeforeAndAft
response.getStatus shouldBe 200
}
+ "GET /config/amber" should "return 401 with a Bearer challenge without an
Authorization header" in {
+ val response =
+
resources.target("/config/amber").request(MediaType.APPLICATION_JSON).get()
+ response.getStatus shouldBe 401
+ response.getHeaderString("WWW-Authenticate") shouldBe
JwtAuthFilter.BearerChallenge
+ }
+
+ it should "return 200 with a valid Bearer token whose role matches
@RolesAllowed" in {
+ val response = resources
+ .target("/config/amber")
+ .request(MediaType.APPLICATION_JSON)
+ .header("Authorization", s"Bearer ${regularToken()}")
+ .get()
+ response.getStatus shouldBe 200
+ }
+
+ it should "expose the engine config separated from the gui payload" in {
+ // The endpoint exists to keep engine configs out of /config/gui (see PR
#5545).
+ // Pin that defaultDataTransferBatchSize is served here and not folded
back into gui.
+ val amberPayload = resources
+ .target("/config/amber")
+ .request(MediaType.APPLICATION_JSON)
+ .header("Authorization", s"Bearer ${regularToken()}")
+ .get(classOf[Map[String, Any]])
+ amberPayload.keySet should contain("defaultDataTransferBatchSize")
+
+ val guiPayload = resources
+ .target("/config/gui")
+ .request(MediaType.APPLICATION_JSON)
+ .header("Authorization", s"Bearer ${regularToken()}")
+ .get(classOf[Map[String, Any]])
+ guiPayload.keySet should not contain "defaultDataTransferBatchSize"
+ }
+
"GET an @RolesAllowed probe endpoint" should "return 401 without an
Authorization header" in {
// Sanity: JwtAuthFilter is now eager — missing Authorization is rejected
// by the filter itself with a 401 + Bearer challenge, before
diff --git a/frontend/src/app/common/service/gui-config.service.spec.ts
b/frontend/src/app/common/service/gui-config.service.spec.ts
index 0294ea8307..92a0d5a615 100644
--- a/frontend/src/app/common/service/gui-config.service.spec.ts
+++ b/frontend/src/app/common/service/gui-config.service.spec.ts
@@ -40,6 +40,10 @@ const GUI_PAYLOAD = {
defaultExecutionMode: "PIPELINED",
};
+const AMBER_PAYLOAD = {
+ defaultDataTransferBatchSize: 400,
+};
+
const USER_SYSTEM_PAYLOAD = {
inviteOnly: true,
};
@@ -91,20 +95,24 @@ describe("GuiConfigService", () => {
// ─── loadPostLogin ────────────────────────────────────────────────────────
- it("loadPostLogin fetches /config/gui and /config/user-system in parallel
and merges", async () => {
+ it("loadPostLogin fetches /config/gui, /config/amber and /config/user-system
in parallel and merges", async () => {
const pending = firstValueFrom(service.loadPostLogin());
const gui = http.expectOne(`${API}/config/gui`);
+ const amber = http.expectOne(`${API}/config/amber`);
const userSystem = http.expectOne(`${API}/config/user-system`);
expect(gui.request.method).toBe("GET");
+ expect(amber.request.method).toBe("GET");
expect(userSystem.request.method).toBe("GET");
gui.flush(GUI_PAYLOAD);
+ amber.flush(AMBER_PAYLOAD);
userSystem.flush(USER_SYSTEM_PAYLOAD);
await pending;
expect(service.env.copilotEnabled).toBe(true);
expect(service.env.limitColumns).toBe(42);
+ expect(service.env.defaultDataTransferBatchSize).toBe(400);
expect(service.env.inviteOnly).toBe(true);
});
@@ -118,6 +126,7 @@ describe("GuiConfigService", () => {
const postLoginPending = firstValueFrom(service.loadPostLogin());
http.expectOne(`${API}/config/gui`).flush(GUI_PAYLOAD);
+ http.expectOne(`${API}/config/amber`).flush(AMBER_PAYLOAD);
http.expectOne(`${API}/config/user-system`).flush(USER_SYSTEM_PAYLOAD);
await postLoginPending;
@@ -132,21 +141,23 @@ describe("GuiConfigService", () => {
it("load() only hits /config/pre-login when no access token is in
localStorage", async () => {
const pending = firstValueFrom(service.load());
http.expectOne(`${API}/config/pre-login`).flush(PRE_LOGIN_PAYLOAD);
- // /config/gui and /config/user-system must not be requested when
anonymous;
+ // post-login endpoints must not be requested when anonymous;
// the no-Authorization-header request would 403 and pollute network logs.
http.expectNone(`${API}/config/gui`);
+ http.expectNone(`${API}/config/amber`);
http.expectNone(`${API}/config/user-system`);
await pending;
expect(service.env.localLogin).toBe(true);
});
- it("load() chains /config/gui + /config/user-system when a token is stored",
async () => {
+ it("load() chains /config/gui + /config/amber + /config/user-system when a
token is stored", async () => {
localStorage.setItem(TOKEN_KEY, "stored-token");
const pending = firstValueFrom(service.load());
http.expectOne(`${API}/config/pre-login`).flush(PRE_LOGIN_PAYLOAD);
- // forkJoin fires both requests in parallel after pre-login resolves.
+ // forkJoin fires all requests in parallel after pre-login resolves.
http.expectOne(`${API}/config/gui`).flush(GUI_PAYLOAD);
+ http.expectOne(`${API}/config/amber`).flush(AMBER_PAYLOAD);
http.expectOne(`${API}/config/user-system`).flush(USER_SYSTEM_PAYLOAD);
await pending;
@@ -164,11 +175,15 @@ describe("GuiConfigService", () => {
http.expectOne(`${API}/config/pre-login`).flush(PRE_LOGIN_PAYLOAD);
const guiReq = http.expectOne(`${API}/config/gui`);
+ const amberReq = http.expectOne(`${API}/config/amber`);
const userSystemReq = http.expectOne(`${API}/config/user-system`);
guiReq.flush({}, { status: 403, statusText: "Forbidden" });
- // forkJoin tears down the sibling observable on first error, so the
- // user-system request is already cancelled by the time we get here.
- // Flushing a cancelled TestRequest throws.
+ // forkJoin tears down the sibling observables on first error, so the
+ // amber and user-system requests are already cancelled by the time we get
+ // here. Flushing a cancelled TestRequest throws.
+ if (!amberReq.cancelled) {
+ amberReq.flush({}, { status: 403, statusText: "Forbidden" });
+ }
if (!userSystemReq.cancelled) {
userSystemReq.flush({}, { status: 403, statusText: "Forbidden" });
}
@@ -185,9 +200,10 @@ describe("GuiConfigService", () => {
localStorage.setItem(TOKEN_KEY, "stored-token");
const pending = firstValueFrom(service.load());
http.expectOne(`${API}/config/pre-login`).error(new
ProgressEvent("offline"));
- // /config/gui must NOT be attempted if pre-login fails — the catchError on
- // the inner pipe must not catch the outer pre-login rejection.
+ // post-login endpoints must NOT be attempted if pre-login fails: the
+ // catchError on the inner pipe must not catch the outer pre-login
rejection.
http.expectNone(`${API}/config/gui`);
+ http.expectNone(`${API}/config/amber`);
http.expectNone(`${API}/config/user-system`);
await expect(pending).rejects.toThrow(/pre-login configuration/);
});
diff --git a/frontend/src/app/common/service/gui-config.service.ts
b/frontend/src/app/common/service/gui-config.service.ts
index e96d98b191..28695f7d06 100644
--- a/frontend/src/app/common/service/gui-config.service.ts
+++ b/frontend/src/app/common/service/gui-config.service.ts
@@ -29,7 +29,9 @@ import { AppSettings } from "../app-setting";
const ACCESS_TOKEN_KEY = "access_token";
type PreLoginConfig = Pick<GuiConfig, "localLogin" | "googleLogin" |
"defaultLocalUser" | "attributionEnabled">;
-type GuiOnlyConfig = Omit<GuiConfig, keyof PreLoginConfig | "inviteOnly">;
+// Fields served by /config/amber.
+type AmberConfig = Pick<GuiConfig, "defaultDataTransferBatchSize">;
+type GuiOnlyConfig = Omit<GuiConfig, keyof PreLoginConfig | keyof AmberConfig
| "inviteOnly">;
type UserSystemConfig = Pick<GuiConfig, "inviteOnly">;
@Injectable({ providedIn: "root" })
@@ -83,10 +85,11 @@ export class GuiConfigService {
*/
loadPostLogin(): Observable<Partial<GuiConfig>> {
const guiConfig$ =
this.http.get<GuiOnlyConfig>(`${AppSettings.getApiEndpoint()}/config/gui`);
+ const amberConfig$ =
this.http.get<AmberConfig>(`${AppSettings.getApiEndpoint()}/config/amber`);
const userSystemConfig$ =
this.http.get<UserSystemConfig>(`${AppSettings.getApiEndpoint()}/config/user-system`);
- return forkJoin([guiConfig$, userSystemConfig$]).pipe(
- tap(([guiConfig, userSystemConfig]) => {
- this.config = { ...this.config, ...guiConfig, ...userSystemConfig };
+ return forkJoin([guiConfig$, amberConfig$, userSystemConfig$]).pipe(
+ tap(([guiConfig, amberConfig, userSystemConfig]) => {
+ this.config = { ...this.config, ...guiConfig, ...amberConfig,
...userSystemConfig };
}),
map(() => this.config)
);