This is an automated email from the ASF dual-hosted git repository.
wankai123 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/skywalking-horizon-ui.git
The following commit(s) were added to refs/heads/main by this push:
new cfa8836 feat(dsl): track runtime-rule applies with a live phase
stepper (#55)
cfa8836 is described below
commit cfa8836c3f82aaebd405ae4bfb1033f8a5635c54
Author: 吴晟 Wu Sheng <[email protected]>
AuthorDate: Tue Jun 16 13:55:44 2026 +0800
feat(dsl): track runtime-rule applies with a live phase stepper (#55)
---
CHANGELOG.md | 7 +
apps/bff/src/http/admin/dsl/_shared.ts | 22 +-
apps/bff/src/http/admin/dsl/rule.ts | 57 +++-
apps/bff/src/rbac/route-policy.ts | 1 +
apps/ui/src/api/scopes/dsl.ts | 16 ++
apps/ui/src/features/operate/dsl/ApplyProgress.vue | 294 +++++++++++++++++++++
apps/ui/src/features/operate/dsl/DslEditorView.vue | 289 +++++++++++++++++---
apps/ui/src/features/operate/dsl/contentHash.ts | 41 +++
apps/ui/src/features/operate/dsl/useRuleEditor.ts | 110 +++++++-
apps/ui/src/i18n/locales/de.json | 24 +-
apps/ui/src/i18n/locales/en.json | 22 ++
apps/ui/src/i18n/locales/es.json | 24 +-
apps/ui/src/i18n/locales/fr.json | 24 +-
apps/ui/src/i18n/locales/ja.json | 24 +-
apps/ui/src/i18n/locales/ko.json | 24 +-
apps/ui/src/i18n/locales/pt.json | 24 +-
apps/ui/src/i18n/locales/zh-CN.json | 24 +-
docs/menu.yml | 2 +
docs/operate/runtime-rules.md | 53 ++++
packages/api-client/src/index.ts | 1 +
packages/api-client/src/runtime-rule.ts | 28 ++
packages/api-client/src/types.ts | 85 ++++++
22 files changed, 1112 insertions(+), 84 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 933ed60..1044860 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -126,6 +126,13 @@ The version line is shared by every package in the
monorepo (apps + shared packa
- **Multiple output entities collapse the same way.** When a record
materialises one metric for several entities (e.g. a per-endpoint write rate
over `sw_metricsMinute` / `sw_metricsHour` / `sw_metricsDay`), the repeated
meter cards fold into one block: a shared header (metric / function / time
bucket), a `N outputs · values=…` summary, and a diff that surfaces only the
entity fields that actually differ — whichever they are, not a fixed field —
with each output's value beside it.
- **Readable sample values.** Long fractional values from `rate()` / `avg()`
(e.g. `57.0333333333…`) are trimmed to a few significant digits for display so
they stop overflowing the value column; integer counters still render exact,
and the precise value stays available on hover.
+### DSL management — live apply progress & recovery
+
+- **A structural rule change now shows live apply progress.** Saving an edit
that moves a metric's storage shape (scope, downsampling, or the metric set) no
longer just flashes "submitted" — the editor tracks the apply across the
cluster through a phase stepper (Compiled → Confirming across the cluster →
Committing → Done) and reports success only once OAP confirms the change is
durable. **Revert to bundled** (also a schema change) goes through the same
stepper. Body- and filter-only edi [...]
+- **"Applied — cluster propagation unconfirmed" is a warning, not an error.**
When a structural change is committed and durable but one or more nodes hadn't
confirmed the new schema within OAP's fence budget, the editor names the
lagging nodes and explains they self-converge on their next scan — the rule is
applied, not rolled back. Reloading the editor reads it back as applied (from
the stored rule).
+- **A failed apply is called out as rolled back** — the cluster stays on the
previous rule, the failure reason is shown inline, and your edit is kept in the
editor so you can fix and save again. A compile error now surfaces as an inline
diagnostic under the editor instead of a transient toast.
+- **Force re-apply to recover.** A degraded or transiently-failed apply offers
a one-click **Force re-apply (recover)** that re-runs the apply across the
cluster to re-confirm the schema and un-stick any waiting node — gated behind a
confirm that spells out it briefly pauses collection for that rule's metrics,
even when the content is unchanged. This subsumes the old Advanced `force`
toggle for the recovery case.
+
## 0.6.0
This release is the production-readiness pass for Horizon UI: every page now
renders correctly across the eight supported languages on non-UTC OAP
deployments, with deliberate caps and validation on the load surfaces that
operators reach. The pillars below describe the operator- visible result.
diff --git a/apps/bff/src/http/admin/dsl/_shared.ts
b/apps/bff/src/http/admin/dsl/_shared.ts
index a7ba512..34108e4 100644
--- a/apps/bff/src/http/admin/dsl/_shared.ts
+++ b/apps/bff/src/http/admin/dsl/_shared.ts
@@ -119,16 +119,18 @@ export function passOapError(err: unknown, reply:
FastifyReply): FastifyReply {
}
/**
- * A structural rule apply (revertToBundled, a storage-change save) can take
- * longer than OAP's admin request timeout: OAP returns 503
- * `RequestTimeoutException` at ~10s, or the BFF's own fetch aborts first —
- * but the apply keeps running under OAP's per-file lock and completes
- * regardless (verified in the OAP runtime-rule engine logs). Treating that
- * as a hard failure makes operators retry, which piles new waiters on the
- * same lock. Instead we report `202 submitted` so the UI confirms by polling
- * the rule status. Genuine 4xx rejections (409 requires_inactivate_first /
- * requires_revert_to_bundled, 400 no_bundled_twin, …) are NOT timeouts and
- * fall through to the normal error path.
+ * Revert-to-bundled runs OAP's structural apply pipeline INLINE (it has no
+ * applyId / phase tracking, unlike `/addOrUpdate`) and can outlast OAP's
+ * admin request timeout: OAP returns 503 `RequestTimeoutException`, or the
+ * BFF's own fetch aborts first — but the apply keeps running under OAP's
+ * per-file lock and completes regardless. Treating that as a hard failure
+ * makes operators retry, piling new waiters on the same lock. So for revert
+ * we report `202 submitted` and the UI confirms by re-reading the rule.
+ * (`/addOrUpdate` no longer needs this: a structural apply returns 200
+ * `structural_applied` + `applyId` immediately at phase FENCING, so the UI
+ * polls /status instead.) Genuine 4xx rejections (409
+ * requires_inactivate_first / requires_revert_to_bundled, 400
+ * no_bundled_twin, …) are NOT timeouts and fall through to the error path.
*/
export function isOapApplyTimeout(err: unknown): boolean {
if (err instanceof RuntimeRuleApiError) {
diff --git a/apps/bff/src/http/admin/dsl/rule.ts
b/apps/bff/src/http/admin/dsl/rule.ts
index 9390930..fb74ca5 100644
--- a/apps/bff/src/http/admin/dsl/rule.ts
+++ b/apps/bff/src/http/admin/dsl/rule.ts
@@ -17,8 +17,13 @@
/**
* GET /api/rule — single rule fetch (`If-None-Match` aware).
+ * GET /api/rule/status — structural-apply progress (`rule:read`,
+ * poll by `applyId` or `contentHash`).
* POST /api/rule — add or update (audited; structural
* writes need `rule:write:structural`).
+ * A structural apply returns 200
+ * `structural_applied` + `applyId` at
+ * phase FENCING; the UI polls /status.
* POST /api/rule/inactivate — `rule:write`, audited.
* POST /api/rule/delete — `rule:delete`; `mode=revertToBundled`
* is treated as a structural write.
@@ -35,6 +40,7 @@ import {
parseDeleteMode,
parseRequiredCatalog,
passOapError,
+ passOapErrorAudit,
passSubmittedOrError,
} from './_shared.js';
@@ -80,6 +86,30 @@ export function registerDslRuleRoutes(app: FastifyInstance,
deps: DslRouteDeps):
}
});
+ // Poll target for a structural apply. The `structural_applied` response
+ // carries an `applyId`; the UI polls this until the phase is terminal
+ // (APPLIED / DEGRADED / FAILED), or by `contentHash` after a reload.
+ app.get('/api/rule/status', { preHandler: auth }, async (req:
FastifyRequest, reply: FastifyReply) => {
+ if (!ensureVerb(req, reply, deps, 'rule:read')) return;
+ const q = req.query as Record<string, string | undefined>;
+ const catalog = parseRequiredCatalog(q, reply);
+ if (!catalog) return;
+ if (!q.name) return reply.code(400).send({ error: 'missing_name' });
+ try {
+ const status = await clients()
+ .primary()
+ .status({
+ catalog,
+ name: q.name,
+ ...(q.applyId ? { applyId: q.applyId } : {}),
+ ...(q.contentHash ? { contentHash: q.contentHash } : {}),
+ });
+ return reply.send(status);
+ } catch (err) {
+ return passOapError(err, reply);
+ }
+ });
+
app.post('/api/rule', { preHandler: auth }, async (req: FastifyRequest,
reply: FastifyReply) => {
const q = req.query as Record<string, string | undefined>;
const catalog = parseRequiredCatalog(q, reply);
@@ -94,6 +124,9 @@ export function registerDslRuleRoutes(app: FastifyInstance,
deps: DslRouteDeps):
if (!ensureVerb(req, reply, deps, verb)) return;
try {
+ // A structural apply now returns 200 `structural_applied` + `applyId`
+ // at phase FENCING (no longer runs past the request timeout) — relay
+ // the result verbatim; the UI polls /api/rule/status by applyId.
const result = await clients().primary().addOrUpdate({
catalog,
name: q.name,
@@ -107,7 +140,7 @@ export function registerDslRuleRoutes(app: FastifyInstance,
deps: DslRouteDeps):
});
return reply.send(result);
} catch (err) {
- return passSubmittedOrError(err, reply, deps, req, 'addOrUpdate', verb,
catalog, q.name);
+ return passOapErrorAudit(err, reply, deps, req, 'addOrUpdate', verb,
catalog, q.name);
}
});
@@ -121,20 +154,13 @@ export function registerDslRuleRoutes(app:
FastifyInstance, deps: DslRouteDeps):
if (!q.name) return reply.code(400).send({ error: 'missing_name' });
if (!ensureVerb(req, reply, deps, 'rule:write')) return;
try {
+ // inactivate returns synchronously (no schema change) — no async
+ // apply to poll, so a failure is just an error, never `submitted`.
const result = await clients().primary().inactivate(catalog, q.name);
auditMutation(deps, req, 'inactivate', 'rule:write', catalog, q.name,
result.applyStatus);
return reply.send(result);
} catch (err) {
- return passSubmittedOrError(
- err,
- reply,
- deps,
- req,
- 'inactivate',
- 'rule:write',
- catalog,
- q.name,
- );
+ return passOapErrorAudit(err, reply, deps, req, 'inactivate',
'rule:write', catalog, q.name);
}
},
);
@@ -159,7 +185,14 @@ export function registerDslRuleRoutes(app:
FastifyInstance, deps: DslRouteDeps):
auditMutation(deps, req, 'delete', verb, catalog, q.name,
result.applyStatus, { mode });
return reply.send(result);
} catch (err) {
- return passSubmittedOrError(err, reply, deps, req, 'delete', verb,
catalog, q.name, { mode });
+ // Plain delete and inactivate are synchronous. Only revert-to-bundled
+ // runs the structural apply pipeline inline and can outlast the
+ // request timeout (it has no applyId), so it alone still maps a
+ // timeout to `202 submitted` for the UI's poll-by-reread.
+ if (mode === 'revertToBundled') {
+ return passSubmittedOrError(err, reply, deps, req, 'delete', verb,
catalog, q.name, { mode });
+ }
+ return passOapErrorAudit(err, reply, deps, req, 'delete', verb,
catalog, q.name, { mode });
}
},
);
diff --git a/apps/bff/src/rbac/route-policy.ts
b/apps/bff/src/rbac/route-policy.ts
index 4b27c31..40c4025 100644
--- a/apps/bff/src/rbac/route-policy.ts
+++ b/apps/bff/src/rbac/route-policy.ts
@@ -167,6 +167,7 @@ export const ROUTE_POLICY: Record<string, RoutePolicy> = {
// ── DSL / OAL / MQE rules (admin operate) ────────────────────────
'GET /api/rule': 'rule:read',
+ 'GET /api/rule/status': 'rule:read',
'POST /api/rule': 'rule:write',
'POST /api/rule/inactivate': 'rule:write',
'POST /api/rule/delete': 'rule:delete',
diff --git a/apps/ui/src/api/scopes/dsl.ts b/apps/ui/src/api/scopes/dsl.ts
index 021eeb7..504c09a 100644
--- a/apps/ui/src/api/scopes/dsl.ts
+++ b/apps/ui/src/api/scopes/dsl.ts
@@ -26,6 +26,7 @@ import type {
OalSourceDetail,
RuleResponse,
RuleSource,
+ RuleStatusResponse,
} from '@skywalking-horizon-ui/api-client';
import type { BffClient, ClusterStateResponse } from '../client';
import { BffApiError, withBase } from '../client';
@@ -124,6 +125,21 @@ export class DslApi {
return (await res.json()) as ApplyResult;
}
+ /** Poll a structural apply's progress. Pass `applyId` from the
+ * `structural_applied` response while the apply is live; pass
+ * `contentHash` to resolve from the durable rule row after a reload. */
+ ruleStatus(args: {
+ catalog: Catalog;
+ name: string;
+ applyId?: string;
+ contentHash?: string;
+ }): Promise<RuleStatusResponse> {
+ const params = new URLSearchParams({ catalog: args.catalog, name:
args.name });
+ if (args.applyId) params.set('applyId', args.applyId);
+ if (args.contentHash) params.set('contentHash', args.contentHash);
+ return this.bff.request<RuleStatusResponse>('GET',
`/api/rule/status?${params.toString()}`);
+ }
+
inactivateRule(catalog: Catalog, name: string): Promise<ApplyResult> {
const params = new URLSearchParams({ catalog, name });
return this.bff.request<ApplyResult>('POST',
`/api/rule/inactivate?${params.toString()}`);
diff --git a/apps/ui/src/features/operate/dsl/ApplyProgress.vue
b/apps/ui/src/features/operate/dsl/ApplyProgress.vue
new file mode 100644
index 0000000..a7a6890
--- /dev/null
+++ b/apps/ui/src/features/operate/dsl/ApplyProgress.vue
@@ -0,0 +1,294 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!--
+ Phase stepper for a structural `/addOrUpdate` apply. The POST returns at
+ FENCING (DDL already fired) and the rest runs in OAP's background, so this
+ renders the live phase the editor polls from `/runtime/rule/status`:
+ Compiled → Confirming (FENCING) → Committing (ROLLING_OUT) → Done.
+ DEGRADED is a warning (durable + converging), FAILED is rolled back. The
+ parent owns the poll and the recover/re-check/dismiss actions; this only
+ renders + emits.
+-->
+<script setup lang="ts">
+import { computed } from 'vue';
+import { useI18n } from 'vue-i18n';
+import type { ApplyPhase } from '@skywalking-horizon-ui/api-client';
+import Section from '@/components/primitives/Section.vue';
+import Btn from '@/components/primitives/Btn.vue';
+
+const props = defineProps<{
+ phase: ApplyPhase;
+ failureReason?: string;
+ fenceLaggards?: string[];
+ /** `'durable-dao'` → the status was reconstructed from the durable row
+ * (e.g. after a reload) rather than live progress. */
+ derivedFrom?: string;
+ /** Gate the recover button on `rule:write:structural`. */
+ canRecover?: boolean;
+ /** A recover / re-check apply is in flight — disable the actions. */
+ busy?: boolean;
+}>();
+
+const emit = defineEmits<{ recover: []; recheck: []; dismiss: [] }>();
+
+const { t } = useI18n();
+
+type Marker = 'done' | 'active' | 'pending' | 'ok' | 'warn' | 'err';
+
+const steps = computed<{ label: string; marker: Marker }[]>(() => {
+ const labels = [
+ t('Compiled & schema applied'),
+ t('Confirming across the cluster…'),
+ t('Committing'),
+ t('Done'),
+ ];
+ const markers = markersFor(props.phase);
+ return labels.map((label, i) => ({ label, marker: markers[i] }));
+});
+
+function markersFor(phase: ApplyPhase): Marker[] {
+ switch (phase) {
+ case 'PENDING':
+ case 'DDL':
+ return ['active', 'pending', 'pending', 'pending'];
+ case 'FENCING':
+ return ['done', 'active', 'pending', 'pending'];
+ case 'ROLLING_OUT':
+ return ['done', 'done', 'active', 'pending'];
+ case 'APPLIED':
+ return ['done', 'done', 'done', 'ok'];
+ case 'DEGRADED':
+ return ['done', 'done', 'done', 'warn'];
+ case 'FAILED':
+ // Rolled back — nothing durable landed (the cluster stays on the prior
+ // rule), so don't mark any step done; a ✓ here would contradict the
+ // "rolled back" banner. Only the terminal node shows the failure.
+ return ['pending', 'pending', 'pending', 'err'];
+ default:
+ return ['pending', 'pending', 'pending', 'pending'];
+ }
+}
+
+const GLYPH: Record<Marker, string> = {
+ done: '✓',
+ ok: '✓',
+ active: '●',
+ pending: '○',
+ warn: '!',
+ err: '✕',
+};
+
+const isTerminal = computed(
+ () => props.phase === 'APPLIED' || props.phase === 'DEGRADED' || props.phase
=== 'FAILED',
+);
+const fromStored = computed(() => props.derivedFrom === 'durable-dao');
+
+const title = computed(() => {
+ switch (props.phase) {
+ case 'APPLIED':
+ return fromStored.value ? t('applied (from stored state)') : t('Schema
change applied');
+ case 'DEGRADED':
+ return t('Applied — cluster propagation unconfirmed');
+ case 'FAILED':
+ return t('Apply failed — rolled back');
+ default:
+ return t('Applying schema change');
+ }
+});
+</script>
+
+<template>
+ <Section :title="title" :data-testid="'apply-progress'" :data-phase="phase">
+ <div class="ap">
+ <ol class="ap__steps">
+ <li v-for="(s, i) in steps" :key="i" class="ap__step">
+ <span class="ap__glyph" :class="[`ap__glyph--${s.marker}`, {
'ap__glyph--pulse': s.marker === 'active' }]">
+ {{ GLYPH[s.marker] }}
+ </span>
+ <span class="ap__label" :class="{ 'ap__label--muted': s.marker ===
'pending' }">{{ s.label }}</span>
+ <span v-if="i < steps.length - 1" class="ap__bar" :class="{
'ap__bar--done': s.marker === 'done' || s.marker === 'ok' }" />
+ </li>
+ </ol>
+
+ <p v-if="!isTerminal" class="ap__caption">
+ {{ t('Schema changes apply across the cluster in the background —
usually seconds, up to a few minutes if a node is slow. You can leave this
page.') }}
+ </p>
+
+ <div v-else-if="phase === 'DEGRADED'" class="ap__banner
ap__banner--warn">
+ <p class="ap__msg">
+ {{ t('Durable and applied, but cluster-wide schema propagation
wasn’t confirmed in time. The listed nodes catch up automatically on their next
scan.') }}
+ </p>
+ <p v-if="fenceLaggards && fenceLaggards.length" class="ap__nodes">
+ {{ t('Waiting on: {nodes}', { nodes: fenceLaggards.join(', ') }) }}
+ </p>
+ <p v-if="failureReason" class="ap__reason">{{ failureReason }}</p>
+ </div>
+
+ <div v-else-if="phase === 'FAILED'" class="ap__banner ap__banner--err">
+ <p class="ap__msg">
+ {{ t('The change was rolled back; the cluster is still on the
previous rule. Fix the issue and save again.') }}
+ </p>
+ <p v-if="failureReason" class="ap__reason">{{ failureReason }}</p>
+ </div>
+
+ <div v-if="phase === 'DEGRADED' || phase === 'FAILED'"
class="ap__actions">
+ <Btn
+ v-if="canRecover"
+ size="sm"
+ :disabled="busy"
+ :data-testid="'apply-recover'"
+ @click="emit('recover')"
+ >
+ {{ t('Force re-apply (recover)') }}
+ </Btn>
+ <Btn v-if="phase === 'DEGRADED'" size="sm" :disabled="busy"
@click="emit('recheck')">
+ {{ t('Re-check') }}
+ </Btn>
+ <Btn size="sm" :disabled="busy" @click="emit('dismiss')">{{
t('Dismiss') }}</Btn>
+ </div>
+ </div>
+ </Section>
+</template>
+
+<style scoped>
+.ap {
+ display: flex;
+ flex-direction: column;
+ gap: 10px;
+}
+
+.ap__steps {
+ display: flex;
+ flex-wrap: wrap;
+ align-items: center;
+ gap: 6px 8px;
+ margin: 0;
+ padding: 0;
+ list-style: none;
+}
+
+.ap__step {
+ display: flex;
+ align-items: center;
+ gap: 6px;
+}
+
+.ap__glyph {
+ display: inline-flex;
+ align-items: center;
+ justify-content: center;
+ width: 16px;
+ font-family: var(--rr-font-mono);
+ font-size: 12px;
+ line-height: 1;
+ color: var(--rr-dim);
+}
+.ap__glyph--done,
+.ap__glyph--ok {
+ color: var(--rr-ok);
+}
+.ap__glyph--active {
+ color: var(--rr-info);
+}
+.ap__glyph--warn {
+ color: var(--rr-warn);
+}
+.ap__glyph--err {
+ color: var(--rr-err);
+}
+.ap__glyph--pulse {
+ animation: ap-pulse 1.2s ease-in-out infinite;
+}
+@keyframes ap-pulse {
+ 0%,
+ 100% {
+ opacity: 1;
+ }
+ 50% {
+ opacity: 0.3;
+ }
+}
+
+.ap__label {
+ font-family: var(--rr-font-mono);
+ font-size: var(--sw-fs-sm);
+ color: var(--rr-ink2);
+}
+.ap__label--muted {
+ color: var(--rr-dim);
+}
+
+.ap__bar {
+ width: 22px;
+ height: 1px;
+ margin-left: 2px;
+ background: var(--rr-border2);
+}
+.ap__bar--done {
+ background: color-mix(in oklch, var(--rr-ok) 60%, var(--rr-border2));
+}
+
+.ap__caption {
+ margin: 0;
+ font-size: var(--sw-fs-base);
+ color: var(--rr-dim);
+ line-height: var(--sw-lh-tight);
+}
+
+.ap__banner {
+ display: flex;
+ flex-direction: column;
+ gap: 4px;
+ padding: 8px 10px;
+ border-radius: var(--rr-radius-md);
+ border-left: 2px solid var(--rr-border2);
+ background: var(--rr-bg2);
+}
+.ap__banner--warn {
+ border-left-color: var(--rr-warn);
+}
+.ap__banner--err {
+ border-left-color: var(--rr-err);
+}
+
+.ap__msg {
+ margin: 0;
+ font-size: var(--sw-fs-base);
+ color: var(--rr-ink2);
+ line-height: var(--sw-lh-tight);
+}
+
+.ap__nodes {
+ margin: 0;
+ font-family: var(--rr-font-mono);
+ font-size: var(--sw-fs-sm);
+ color: var(--rr-warn);
+}
+
+.ap__reason {
+ margin: 0;
+ font-family: var(--rr-font-mono);
+ font-size: var(--sw-fs-sm);
+ color: var(--rr-dim);
+ word-break: break-word;
+}
+
+.ap__actions {
+ display: flex;
+ gap: 8px;
+}
+</style>
diff --git a/apps/ui/src/features/operate/dsl/DslEditorView.vue
b/apps/ui/src/features/operate/dsl/DslEditorView.vue
index 9e196a1..2918eff 100644
--- a/apps/ui/src/features/operate/dsl/DslEditorView.vue
+++ b/apps/ui/src/features/operate/dsl/DslEditorView.vue
@@ -18,14 +18,21 @@
import { computed, ref, shallowRef, watch } from 'vue';
import { useI18n } from 'vue-i18n';
import { useRoute, useRouter } from 'vue-router';
-import { isCatalog, type Catalog, type RuleResponse } from
'@skywalking-horizon-ui/api-client';
+import {
+ isCatalog,
+ type ApplyPhase,
+ type Catalog,
+ type RuleResponse,
+} from '@skywalking-horizon-ui/api-client';
import { useAuthStore } from '@/state/auth';
-import { useRuleEditor } from '@/features/operate/dsl/useRuleEditor';
+import { useRuleEditor, type SaveOutcome } from
'@/features/operate/dsl/useRuleEditor';
+import { sha256Hex } from '@/features/operate/dsl/contentHash';
import Btn from '@/components/primitives/Btn.vue';
import Pill from '@/components/primitives/Pill.vue';
import MonacoYaml from '@/features/operate/_shared/MonacoYaml.vue';
import MonacoDiff from '@/features/operate/_shared/MonacoDiff.vue';
import DestructiveConfirm from
'@/features/operate/_shared/DestructiveConfirm.vue';
+import ApplyProgress from '@/features/operate/dsl/ApplyProgress.vue';
import AdminFeatureWarning from '@/shell/AdminFeatureWarning.vue';
const { t } = useI18n();
@@ -47,6 +54,23 @@ const editor = useRuleEditor({ catalog, name });
const showAdvanced = ref(false);
const force = ref(false);
+// Live structural-apply state, driving the phase stepper. Declared ABOVE the
+// (immediate) resume watch below — that watch fires during setup and reads
+// these, so hoisting them avoids a TDZ ReferenceError that would blank the
+// page. `null` = no apply in flight / panel dismissed.
+interface ApplyState {
+ phase: ApplyPhase;
+ applyId: string;
+ hash: string;
+ failureReason?: string;
+ fenceLaggards?: string[];
+ derivedFrom?: string;
+}
+const apply = ref<ApplyState | null>(null);
+const polling = ref(false);
+/** Inline YAML diagnostic for a 400 compile_failed — the operator's to fix. */
+const compileError = ref<string | null>(null);
+
type DiffMode = 'none' | 'current' | 'bundled';
const diffMode = ref<DiffMode>('none');
const bundled = shallowRef<RuleResponse | null>(null);
@@ -95,42 +119,119 @@ function setFlash(msg: string): void {
}, 4000);
}
-// A structural apply (revert-to-bundled, storage-change save) runs past
-// OAP's admin request timeout, so the mutation returns `pending` before it
-// lands. Poll the rule to its expected end state instead of re-firing
-// (a retry just queues another waiter on OAP's per-file lock). The action
-// row stays disabled via `applying` for the duration so the operator can't
-// stack retries.
-const applying = ref(false);
-async function trackApply(op: 'revert' | 'inactivate' | 'delete'):
Promise<void> {
- applying.value = true;
- setFlash(t('submitted — OAP is applying it; a structural apply can take a
minute…'));
- const done =
- op === 'revert'
- ? (r: RuleResponse | null): boolean => r != null && (r.status ===
'BUNDLED' || r.status === 'STATIC')
- : op === 'inactivate'
- ? (r: RuleResponse | null): boolean => r?.status === 'INACTIVE'
- : (r: RuleResponse | null): boolean => r === null;
- const res = await editor.awaitApplied(done);
- applying.value = false;
- if (res === 'applied') {
+// Every action button is gated on `busy` so retries can't stack on OAP's
+// per-file lock while a save / structural poll / revert poll / confirm is in
+// flight.
+const busy = computed(
+ () => editor.saving.value || polling.value || applying.value ||
confirmBusy.value,
+);
+
+// ── Structural apply: poll /status by applyId, show the phase stepper ──
+
+/** Poll a structural apply to a terminal phase. APPLIED auto-collapses the
+ * panel; DEGRADED / FAILED keep it so the operator can recover or dismiss. */
+async function runStructuralPoll(applyId: string, hash: string): Promise<void>
{
+ if (polling.value) return;
+ polling.value = true;
+ flash.value = null;
+ compileError.value = null;
+ apply.value = { phase: 'FENCING', applyId, hash };
+ const final = await editor.awaitPhase(applyId, hash, (s) => {
+ apply.value = {
+ phase: (s.phase as ApplyPhase) || 'UNKNOWN',
+ applyId,
+ hash,
+ failureReason: s.failureReason,
+ fenceLaggards: s.fenceLaggards,
+ derivedFrom: s.derivedFrom,
+ };
+ });
+ polling.value = false;
+ await stripApplyQuery();
+ const phase = final?.phase;
+ if (phase === 'APPLIED') {
setFlash(t('applied ✓'));
- if (op === 'delete') {
- await router.push({ name: 'catalog', params: { catalog: catalog.value ??
'' } });
- }
+ window.setTimeout(() => {
+ if (apply.value?.applyId === applyId) apply.value = null;
+ }, 2500);
+ return;
+ }
+ if (phase === 'DEGRADED' || phase === 'FAILED') {
+ return; // keep the banner so the operator can recover / dismiss
+ }
+ // No terminal phase reached: the applyId is no longer tracked (UNKNOWN /
+ // not found), the budget elapsed while still applying, or every poll
+ // errored. Drop the stale stepper — the apply (if any) finishes on OAP and
+ // a reload shows the durable state.
+ apply.value = null;
+ if (final && (final.found === false || final.phase === 'UNKNOWN')) {
+ await editor.load();
+ setFlash(t('Apply status is no longer tracked — reload to see the stored
rule.'));
} else {
setFlash(t('still applying on OAP — refresh in a moment to confirm'));
}
}
-async function onSave(): Promise<void> {
- const r = await editor.save({ force: force.value });
+/** Start tracking a fresh structural apply. The applyId + content hash live
+ * in the URL ONLY while in-flight (stripped on terminal) so a reload resumes
+ * the stepper; after terminal, a reload resolves from the durable row, which
+ * reads a DEGRADED apply back as applied-from-stored-state — by design. */
+async function beginStructural(applyId: string, content?: string):
Promise<void> {
+ // Hash the content that was actually applied — the editor buffer for a save,
+ // the bundled content for a revert — so the durable-row resume can match it.
+ const hash = await sha256Hex(content ?? editor.buffer.value);
+ await router.replace({ path: route.path, query: { ...route.query, applyId,
hash } });
+ await runStructuralPoll(applyId, hash);
+}
+
+function resumeStructural(applyId: string, hash: string): void {
+ if (!polling.value) void runStructuralPoll(applyId, hash);
+}
+
+async function stripApplyQuery(): Promise<void> {
+ if (!('applyId' in route.query) && !('hash' in route.query)) return;
+ const q = { ...route.query };
+ delete q.applyId;
+ delete q.hash;
+ await router.replace({ path: route.path, query: q });
+}
+
+function onRecheck(): void {
+ if (apply.value) resumeStructural(apply.value.applyId, apply.value.hash);
+}
+
+function onDismissApply(): void {
+ apply.value = null;
+}
+
+/** Force re-apply the current content to recover a DEGRADED / FAILED apply:
+ * re-runs the schema fence (re-checking laggards) and re-resumes any stuck
+ * peers. Byte-identical content is a no-op against a healthy node, but the
+ * re-apply still pauses collection for the rule's metrics — gate it. */
+function onRecover(): void {
+ if (!name.value) return;
+ confirm.value = {
+ title: t('Force re-apply to recover'),
+ intent: t('force re-apply to recover'),
+ warning: [
+ t('Re-applies the rule across the cluster to recover — this briefly
pauses collection for this rule’s metrics, even though the content is
unchanged.'),
+ t('Use this to clear a stuck apply or coax laggard nodes to re-confirm
the schema.'),
+ ],
+ perform: async () => {
+ const r = await editor.save({ force: true });
+ handleSaveOutcome(r);
+ },
+ };
+}
+
+function handleSaveOutcome(r: SaveOutcome): void {
if (r.kind === 'ok') {
+ apply.value = null;
setFlash(t('saved · {status}', { status: r.result.applyStatus }));
return;
}
- if (r.kind === 'error') {
- setFlash(extractErrorMessage(r.error));
+ if (r.kind === 'structural') {
+ void beginStructural(r.applyId);
return;
}
if (r.kind === 'needs-storage-change') {
@@ -144,11 +245,38 @@ async function onSave(): Promise<void> {
],
perform: async () => {
const ok = await editor.save({ allowStorageChange: true, force:
force.value });
- if (ok.kind === 'ok') setFlash(t('saved · {status}', { status:
ok.result.applyStatus }));
- else if (ok.kind === 'error') setFlash(extractErrorMessage(ok.error));
+ handleSaveOutcome(ok);
},
};
+ return;
+ }
+ if (r.kind === 'compile-failed') {
+ apply.value = null;
+ compileError.value = r.message;
+ return;
}
+ setFlash(extractErrorMessage(r.error));
+}
+
+async function onSave(): Promise<void> {
+ compileError.value = null;
+ const r = await editor.save({ force: force.value });
+ handleSaveOutcome(r);
+}
+
+// Revert-to-bundled has no applyId (OAP runs its pipeline inline), so it
+// still confirms by re-reading the rule after a `pending` (202 submitted).
+// The action row stays disabled via `busy` so the operator can't stack
+// retries on OAP's per-file lock.
+const applying = ref(false);
+async function trackApply(): Promise<void> {
+ applying.value = true;
+ setFlash(t('submitted — OAP is applying it; a structural apply can take a
minute…'));
+ const done = (r: RuleResponse | null): boolean =>
+ r != null && (r.status === 'BUNDLED' || r.status === 'STATIC');
+ const res = await editor.awaitApplied(done);
+ applying.value = false;
+ setFlash(res === 'applied' ? t('applied ✓') : t('still applying on OAP —
refresh in a moment to confirm'));
}
async function onInactivate(): Promise<void> {
@@ -157,10 +285,6 @@ async function onInactivate(): Promise<void> {
setFlash(t('inactivated · {status}', { status: r.result.applyStatus }));
return;
}
- if (r.kind === 'pending') {
- void trackApply('inactivate');
- return;
- }
if (r.kind === 'error') {
setFlash(extractErrorMessage(r.error));
}
@@ -174,7 +298,9 @@ async function onDeleteDefault(): Promise<void> {
return;
}
if (r.kind === 'pending') {
- void trackApply('delete');
+ // Plain delete is synchronous; a 202 is not expected here, but stay
+ // honest if OAP ever defers it.
+ setFlash(t('still applying on OAP — refresh in a moment to confirm'));
return;
}
if (r.kind === 'needs-inactivate-first') {
@@ -198,17 +324,41 @@ function onDeleteRevertToBundled(): void {
t('OAP runs the standard apply pipeline against the bundled YAML — this
is a schema change.'),
t('Runtime-only metrics that the bundled rule does not define will be
dropped from BanyanDB.'),
t('Bundled-only metrics will be installed.'),
+ ...(editor.original.value?.status === 'ACTIVE'
+ ? [t('An ACTIVE rule is inactivated first, then reverted.')]
+ : []),
t('Returns 400 no_bundled_twin if the rule has no bundled version on
disk.'),
],
perform: async () => {
- const r = await editor.deleteRule('revertToBundled');
+ let r = await editor.deleteRule('revertToBundled');
+ // OAP gates revert behind the row being INACTIVE (same two-step gate as
+ // delete). The operator already confirmed the revert, so inactivate and
+ // retry as one action rather than dead-ending with a 409.
+ if (r.kind === 'needs-inactivate-first') {
+ const ina = await editor.inactivate();
+ if (ina.kind === 'error') {
+ setFlash(extractErrorMessage(ina.error));
+ return;
+ }
+ r = await editor.deleteRule('revertToBundled');
+ }
+ // New OAP: revert is async with an applyId — drive the same phase
stepper
+ // as a structural save, hashing the bundled content that's being
applied.
+ if (r.kind === 'structural') {
+ void beginStructural(r.applyId, bundled.value?.content);
+ return;
+ }
if (r.kind === 'ok') {
setFlash(t('reverted · {status}', { status: r.result.applyStatus }));
await router.push({ name: 'catalog', params: { catalog: catalog.value
?? '' } });
return;
}
if (r.kind === 'pending') {
- void trackApply('revert');
+ void trackApply();
+ return;
+ }
+ if (r.kind === 'needs-inactivate-first') {
+ setFlash(t('rule is ACTIVE — inactivate first, then revert to
bundled'));
return;
}
if (r.kind === 'no-bundled-twin') {
@@ -282,7 +432,9 @@ const statusTone = computed(() => {
// the "diff vs. bundled" tab (the old `source === 'static'` clause was dead:
// RuleSource is only ever 'runtime' | 'bundled'). The fetched content is
// cached in `bundled`, so the diff tab then opens instantly. Guarded against
-// a stale fetch resolving after a newer rule was selected.
+// a stale fetch resolving after a newer rule was selected. Also resumes an
+// in-flight structural apply across a reload, or clears stale apply state
+// when navigating to a different rule.
watch(
[catalog, name],
() => {
@@ -295,6 +447,18 @@ watch(
if (catalog.value === c && name.value === n) bundled.value = b;
})
.catch(() => {});
+
+ // applyId lives in the URL only while a structural apply is non-terminal,
+ // so its presence on (re)mount means "was applying when the page was
+ // left" — resume the stepper. Otherwise this is a fresh rule view.
+ const aid = route.query.applyId;
+ const hash = route.query.hash;
+ if (typeof aid === 'string' && aid && typeof hash === 'string' && hash) {
+ resumeStructural(aid, hash);
+ } else {
+ apply.value = null;
+ compileError.value = null;
+ }
},
{ immediate: true },
);
@@ -321,7 +485,9 @@ const isOperatorRow = computed<boolean>(() => {
</Pill>
<Pill v-if="editor.dirty.value" tone="active">{{ t('unsaved') }}</Pill>
<Pill v-if="applying" tone="info">{{ t('applying…') }}</Pill>
- <Pill v-if="!applying && editor.lastApplyStatus.value" tone="info">
+ <!-- The structural-apply phase stepper owns the live status while a
+ structural apply is tracked; suppress the raw applyStatus pill
then. -->
+ <Pill v-if="!applying && !apply && editor.lastApplyStatus.value"
tone="info">
{{ editor.lastApplyStatus.value }}
</Pill>
<div class="ed__spacer" />
@@ -361,7 +527,7 @@ const isOperatorRow = computed<boolean>(() => {
<Btn
kind="primary"
- :disabled="!canWrite || !editor.dirty.value || editor.saving.value ||
applying"
+ :disabled="!canWrite || !editor.dirty.value || busy"
:data-testid="'editor-save'"
@click="onSave"
>
@@ -371,7 +537,7 @@ const isOperatorRow = computed<boolean>(() => {
the rule is already INACTIVE (or a pure bundled rule). -->
<Btn
v-if="editor.original.value?.status === 'ACTIVE'"
- :disabled="!canWrite || editor.saving.value || applying"
+ :disabled="!canWrite || busy"
@click="onInactivate"
>
{{ t('inactivate') }}
@@ -382,7 +548,7 @@ const isOperatorRow = computed<boolean>(() => {
<Btn
v-if="isOperatorRow && !hasBundledTwin"
kind="danger"
- :disabled="!canDelete || editor.saving.value || applying"
+ :disabled="!canDelete || busy"
@click="onDeleteDefault"
>
{{ t('delete') }}
@@ -390,7 +556,7 @@ const isOperatorRow = computed<boolean>(() => {
<Btn
v-if="isOperatorRow && hasBundledTwin"
kind="danger"
- :disabled="!canDelete || !canWriteStructural || editor.saving.value ||
applying"
+ :disabled="!canDelete || !canWriteStructural || busy"
:data-testid="'editor-revert'"
@click="onDeleteRevertToBundled"
>
@@ -398,7 +564,25 @@ const isOperatorRow = computed<boolean>(() => {
</Btn>
</div>
- <p v-if="flash" class="ed__flash" :data-testid="'editor-flash'">{{ flash
}}</p>
+ <!-- Structural-apply progress (phase stepper) takes the flash slot while
+ an apply is tracked; DEGRADED / FAILED keep it for recover / dismiss.
-->
+ <ApplyProgress
+ v-if="apply"
+ :phase="apply.phase"
+ :failure-reason="apply.failureReason"
+ :fence-laggards="apply.fenceLaggards"
+ :derived-from="apply.derivedFrom"
+ :can-recover="canWriteStructural"
+ :busy="busy"
+ @recover="onRecover"
+ @recheck="onRecheck"
+ @dismiss="onDismissApply"
+ />
+ <p v-else-if="flash" class="ed__flash" :data-testid="'editor-flash'">{{
flash }}</p>
+
+ <p v-if="compileError" class="ed__compile"
:data-testid="'editor-compile-error'">
+ {{ compileError }}
+ </p>
<div class="ed__editorWrap">
<div v-if="editor.loading.value" class="ed__placeholder">{{
t('loading…') }}</div>
@@ -406,10 +590,14 @@ const isOperatorRow = computed<boolean>(() => {
{{ t('Could not load: {err}', { err: editor.loadError.value }) }}
</div>
+ <!-- Read-only while a save / apply poll is in flight: a terminal
+ APPLIED refreshes the buffer from the server, which would silently
+ discard edits typed during the poll. -->
<MonacoYaml
v-else-if="diffMode === 'none'"
:model-value="editor.buffer.value"
:catalog="catalog"
+ :read-only="busy"
@update:model-value="(v: string) => (editor.buffer.value = v)"
@debug-click="onDebugClick"
/>
@@ -559,6 +747,19 @@ const isOperatorRow = computed<boolean>(() => {
color: var(--rr-info);
}
+.ed__compile {
+ margin: 0;
+ padding: 8px 12px;
+ background: var(--rr-bg2);
+ border-left: 2px solid var(--rr-err);
+ border-radius: var(--rr-radius-md);
+ font-family: var(--rr-font-mono);
+ font-size: var(--sw-fs-sm);
+ color: var(--rr-err);
+ white-space: pre-wrap;
+ word-break: break-word;
+}
+
.ed__editorWrap {
flex: 1 1 auto;
min-height: 320px;
diff --git a/apps/ui/src/features/operate/dsl/contentHash.ts
b/apps/ui/src/features/operate/dsl/contentHash.ts
new file mode 100644
index 0000000..425b8f8
--- /dev/null
+++ b/apps/ui/src/features/operate/dsl/contentHash.ts
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * SHA-256 (lowercase hex) of a rule's content — matches OAP's
+ * `ContentHash.sha256Hex` (UTF-8 bytes) so it can stand in as the durable
+ * identity when polling `/runtime/rule/status` after the applyId is gone
+ * (the page-reload resume path).
+ *
+ * Best-effort: `crypto.subtle` only exists in a secure context (https /
+ * localhost). When the UI is reached over plain http by IP/hostname it is
+ * undefined, so we return `''` rather than throw — the apply is then tracked
+ * by `applyId` alone (which works for the live session; only reload-resume
+ * after a long gap degrades). Never let a hash failure abort apply tracking.
+ */
+export async function sha256Hex(content: string): Promise<string> {
+ try {
+ if (!globalThis.crypto?.subtle) return '';
+ const bytes = new TextEncoder().encode(content);
+ const digest = await crypto.subtle.digest('SHA-256', bytes);
+ return Array.from(new Uint8Array(digest))
+ .map((b) => b.toString(16).padStart(2, '0'))
+ .join('');
+ } catch {
+ return '';
+ }
+}
diff --git a/apps/ui/src/features/operate/dsl/useRuleEditor.ts
b/apps/ui/src/features/operate/dsl/useRuleEditor.ts
index 4ccfad1..4f2cf1a 100644
--- a/apps/ui/src/features/operate/dsl/useRuleEditor.ts
+++ b/apps/ui/src/features/operate/dsl/useRuleEditor.ts
@@ -26,21 +26,38 @@
*/
import { computed, ref, watch, type Ref } from 'vue';
-import type { ApplyResult, Catalog, DeleteMode, RuleResponse } from
'@skywalking-horizon-ui/api-client';
+import type {
+ ApplyResult,
+ Catalog,
+ DeleteMode,
+ RuleResponse,
+ RuleStatusResponse,
+} from '@skywalking-horizon-ui/api-client';
+import { isTerminalPhase } from '@skywalking-horizon-ui/api-client';
import { bff, type BffApiError } from '@/api/client';
-// `pending` = the BFF returned 202 `submitted`: OAP accepted a structural
-// apply that runs past its admin request timeout, so the HTTP call can't
-// confirm completion. The caller polls (see `awaitApplied`) instead of
-// retrying — a retry would pile another waiter on OAP's per-file lock.
+// A STRUCTURAL `/addOrUpdate` returns 200 `structural_applied` + an `applyId`
+// at phase FENCING ("accepted, not yet durable") — the fence → persist →
+// commit → resume tail runs in OAP's background. `structural` hands the
+// applyId back so the caller polls `awaitPhase` until the phase is terminal.
+// `compile-failed` is the inline-diagnostic case (400 compile_failed).
export type SaveOutcome =
| { kind: 'ok'; result: ApplyResult }
- | { kind: 'pending'; result: ApplyResult }
+ | { kind: 'structural'; result: ApplyResult; applyId: string }
| { kind: 'needs-storage-change'; result: ApplyResult }
+ | { kind: 'compile-failed'; message: string }
| { kind: 'error'; error: BffApiError | Error };
+// `structural` = revert-to-bundled adopted the async apply API: OAP returns
+// `reverted_to_bundled` + an applyId and runs the schema change in the
+// background — the caller drives the phase stepper via `awaitPhase`, same as a
+// structural save. `pending` is the legacy fallback for an OAP that runs
revert
+// inline (no applyId) and times out into a 202 `submitted`: the caller
confirms
+// via `awaitApplied` (poll-by-reread) rather than retrying onto OAP's per-file
+// lock.
export type DeleteOutcome =
| { kind: 'ok'; result: ApplyResult }
+ | { kind: 'structural'; result: ApplyResult; applyId: string }
| { kind: 'pending'; result: ApplyResult }
| { kind: 'needs-inactivate-first'; result: ApplyResult }
| { kind: 'no-bundled-twin'; result: ApplyResult }
@@ -126,16 +143,27 @@ export function useRuleEditor(opts: UseRuleEditorOptions)
{
...args,
});
lastApplyStatus.value = result.applyStatus;
- if (result.applyStatus === 'submitted') return { kind: 'pending', result
};
- // Refresh the original to the just-pushed body so dirty resets.
+ // Structural apply is async: it's accepted at FENCING but not durable
+ // yet. Hand back the applyId; the caller polls /status and only then
+ // refreshes — re-reading now would still show the pre-apply row.
+ if (result.applyStatus === 'structural_applied' && result.applyId) {
+ return { kind: 'structural', result, applyId: result.applyId };
+ }
+ // no_change / filter_only_applied / filter_only_persisted — synchronous;
+ // refresh the original to the just-pushed body so dirty resets.
await load();
return { kind: 'ok', result };
} catch (err) {
- if (isApiError(err) && err.status === 409) {
+ if (isApiError(err)) {
const body = err.body as ApplyResult | undefined;
- if (body && body.applyStatus ===
'storage_change_requires_explicit_approval') {
+ if (err.status === 409 && body?.applyStatus ===
'storage_change_requires_explicit_approval') {
return { kind: 'needs-storage-change', result: body };
}
+ // A compile/parse failure is the operator's to fix in the editor —
+ // surface it inline rather than as a transient error toast.
+ if (err.status === 400 && body?.applyStatus === 'compile_failed') {
+ return { kind: 'compile-failed', message: body.message };
+ }
}
return { kind: 'error', error: err as Error };
} finally {
@@ -149,9 +177,10 @@ export function useRuleEditor(opts: UseRuleEditorOptions) {
}
saving.value = true;
try {
+ // Inactivate carries no schema change — it returns synchronously, so
+ // there's nothing async to poll.
const result = await client.dsl.inactivateRule(opts.catalog.value,
opts.name.value);
lastApplyStatus.value = result.applyStatus;
- if (result.applyStatus === 'submitted') return { kind: 'pending', result
};
await load();
return { kind: 'ok', result };
} catch (err) {
@@ -169,6 +198,15 @@ export function useRuleEditor(opts: UseRuleEditorOptions) {
try {
const result = await client.dsl.deleteRule(opts.catalog.value,
opts.name.value, mode);
lastApplyStatus.value = result.applyStatus;
+ // Revert-to-bundled is structural; a new OAP returns an applyId to poll
+ // (same phase machine as a structural save). Old OAP runs it inline and
+ // may time out into the legacy 202 `submitted` (poll-by-reread).
+ if (
+ result.applyId &&
+ (result.applyStatus === 'reverted_to_bundled' || result.applyStatus
=== 'structural_applied')
+ ) {
+ return { kind: 'structural', result, applyId: result.applyId };
+ }
if (result.applyStatus === 'submitted') return { kind: 'pending', result
};
return { kind: 'ok', result };
} catch (err) {
@@ -227,6 +265,55 @@ export function useRuleEditor(opts: UseRuleEditorOptions) {
return 'timeout';
}
+ /**
+ * Drive a STRUCTURAL apply to completion by polling `/runtime/rule/status`.
+ * `onPhase` fires on every poll so the caller can render a phase stepper.
+ * Polls by `applyId` (the live tracker) and carries `contentHash` so the
+ * server can degrade to the durable rule row if the applyId was evicted.
+ * Resolves with the terminal {@link RuleStatusResponse} (APPLIED / DEGRADED
+ * / FAILED) — or the last seen status if the budget elapses (OAP itself
+ * moves to a terminal phase by `deferredFenceTimeoutSeconds`, so a real
+ * apply lands well inside the default budget). On APPLIED / DEGRADED the
+ * durable row advanced, so it refreshes `original` (dirty resets); FAILED
+ * rolled back, so the operator's buffer is left intact to fix and retry.
+ * Stops early if the operator navigated to a different (catalog, name).
+ */
+ async function awaitPhase(
+ applyId: string,
+ contentHash: string,
+ onPhase: (status: RuleStatusResponse) => void,
+ o: { timeoutMs?: number; intervalMs?: number } = {},
+ ): Promise<RuleStatusResponse | null> {
+ if (!opts.catalog.value || !opts.name.value) return null;
+ const cat = opts.catalog.value;
+ const nm = opts.name.value;
+ const timeoutMs = o.timeoutMs ?? 200_000;
+ const intervalMs = o.intervalMs ?? 1_800;
+ const start = Date.now();
+ let last: RuleStatusResponse | null = null;
+ while (Date.now() - start < timeoutMs) {
+ if (opts.catalog.value !== cat || opts.name.value !== nm) return last;
+ try {
+ const status = await client.dsl.ruleStatus({ catalog: cat, name: nm,
applyId, contentHash });
+ last = status;
+ onPhase(status);
+ // Stop on a terminal phase, OR when the apply is no longer tracked
+ // (UNKNOWN / found:false) — e.g. the applyId was evicted (~1h TTL,
+ // main restart) and no durable row matched the contentHash. Polling
+ // on past that just burns the budget. APPLIED/DEGRADED advanced the
+ // durable row, so refresh; FAILED/UNKNOWN leave the buffer alone.
+ if (isTerminalPhase(status.phase) || !status.found || status.phase ===
'UNKNOWN') {
+ if (status.phase === 'APPLIED' || status.phase === 'DEGRADED') await
load();
+ return status;
+ }
+ } catch {
+ // transient read error mid-apply — keep polling.
+ }
+ await new Promise((r) => setTimeout(r, intervalMs));
+ }
+ return last;
+ }
+
// Auto-load whenever (catalog, name) settles.
watch(
[opts.catalog, opts.name],
@@ -251,6 +338,7 @@ export function useRuleEditor(opts: UseRuleEditorOptions) {
inactivate,
deleteRule,
awaitApplied,
+ awaitPhase,
};
}
diff --git a/apps/ui/src/i18n/locales/de.json b/apps/ui/src/i18n/locales/de.json
index a3e934b..ef3c8aa 100644
--- a/apps/ui/src/i18n/locales/de.json
+++ b/apps/ui/src/i18n/locales/de.json
@@ -1426,5 +1426,27 @@
"Comparing {n} instances across services": "Vergleiche {n} Instanzen über
Services hinweg",
"Comparing {n} endpoints across services": "Vergleiche {n} Endpunkte über
Services hinweg",
"Failed to load": "Laden fehlgeschlagen",
- "{n} locked · lock 1 more to compare": "{n} gesperrt · 1 weitere(n) sperren
zum Vergleichen"
+ "{n} locked · lock 1 more to compare": "{n} gesperrt · 1 weitere(n) sperren
zum Vergleichen",
+ "Applying schema change": "Schemaänderung wird angewendet",
+ "Schema change applied": "Schemaänderung angewendet",
+ "Compiled & schema applied": "Kompiliert & Schema angewendet",
+ "Confirming across the cluster…": "Wird clusterweit bestätigt…",
+ "Committing": "Wird übernommen",
+ "Done": "Fertig",
+ "Schema changes apply across the cluster in the background — usually
seconds, up to a few minutes if a node is slow. You can leave this page.":
"Schemaänderungen verteilen sich im Hintergrund über den Cluster — meist
Sekunden, bei einem langsamen Node bis zu wenige Minuten. Sie können diese
Seite verlassen.",
+ "Applied — cluster propagation unconfirmed": "Angewendet —
Cluster-Verteilung unbestätigt",
+ "Durable and applied, but cluster-wide schema propagation wasn’t confirmed
in time. The listed nodes catch up automatically on their next scan.":
"Dauerhaft gespeichert und angewendet, doch die clusterweite Schema-Verteilung
wurde nicht rechtzeitig bestätigt. Die aufgeführten Nodes ziehen beim nächsten
Scan automatisch nach.",
+ "Waiting on: {nodes}": "Ausstehend: {nodes}",
+ "Apply failed — rolled back": "Anwenden fehlgeschlagen — zurückgerollt",
+ "The change was rolled back; the cluster is still on the previous rule. Fix
the issue and save again.": "Die Änderung wurde zurückgerollt; der Cluster
läuft weiterhin mit der vorherigen Regel. Beheben Sie das Problem und speichern
Sie erneut.",
+ "applied (from stored state)": "angewendet (aus gespeichertem Zustand)",
+ "Force re-apply (recover)": "Erneut anwenden erzwingen (wiederherstellen)",
+ "Re-check": "Erneut prüfen",
+ "Force re-apply to recover": "Erneut anwenden erzwingen, um
wiederherzustellen",
+ "force re-apply to recover": "erneut anwenden erzwingen, um
wiederherzustellen",
+ "Re-applies the rule across the cluster to recover — this briefly pauses
collection for this rule’s metrics, even though the content is unchanged.":
"Wendet die Regel clusterweit erneut an, um sie wiederherzustellen — dies
pausiert kurz die Erfassung der Metriken dieser Regel, auch wenn der Inhalt
unverändert ist.",
+ "Use this to clear a stuck apply or coax laggard nodes to re-confirm the
schema.": "Damit lässt sich ein hängendes Anwenden lösen oder säumige Nodes zur
erneuten Bestätigung des Schemas bewegen.",
+ "Apply status is no longer tracked — reload to see the stored rule.":
"Apply-Status wird nicht mehr verfolgt — neu laden, um die gespeicherte Regel
anzuzeigen.",
+ "An ACTIVE rule is inactivated first, then reverted.": "Eine ACTIVE-Regel
wird zuerst inaktiviert und dann zurückgesetzt.",
+ "rule is ACTIVE — inactivate first, then revert to bundled": "Regel ist
ACTIVE — zuerst inaktivieren, dann auf Bundled zurücksetzen"
}
diff --git a/apps/ui/src/i18n/locales/en.json b/apps/ui/src/i18n/locales/en.json
index e7da311..8e3cc70 100644
--- a/apps/ui/src/i18n/locales/en.json
+++ b/apps/ui/src/i18n/locales/en.json
@@ -1411,6 +1411,28 @@
"submitted — OAP is applying it; a structural apply can take a minute…":
"submitted — OAP is applying it; a structural apply can take a minute…",
"applied ✓": "applied ✓",
"still applying on OAP — refresh in a moment to confirm": "still applying on
OAP — refresh in a moment to confirm",
+ "Applying schema change": "Applying schema change",
+ "Schema change applied": "Schema change applied",
+ "Compiled & schema applied": "Compiled & schema applied",
+ "Confirming across the cluster…": "Confirming across the cluster…",
+ "Committing": "Committing",
+ "Done": "Done",
+ "Schema changes apply across the cluster in the background — usually
seconds, up to a few minutes if a node is slow. You can leave this page.":
"Schema changes apply across the cluster in the background — usually seconds,
up to a few minutes if a node is slow. You can leave this page.",
+ "Applied — cluster propagation unconfirmed": "Applied — cluster propagation
unconfirmed",
+ "Durable and applied, but cluster-wide schema propagation wasn’t confirmed
in time. The listed nodes catch up automatically on their next scan.": "Durable
and applied, but cluster-wide schema propagation wasn’t confirmed in time. The
listed nodes catch up automatically on their next scan.",
+ "Waiting on: {nodes}": "Waiting on: {nodes}",
+ "Apply failed — rolled back": "Apply failed — rolled back",
+ "The change was rolled back; the cluster is still on the previous rule. Fix
the issue and save again.": "The change was rolled back; the cluster is still
on the previous rule. Fix the issue and save again.",
+ "applied (from stored state)": "applied (from stored state)",
+ "Force re-apply (recover)": "Force re-apply (recover)",
+ "Re-check": "Re-check",
+ "Force re-apply to recover": "Force re-apply to recover",
+ "force re-apply to recover": "force re-apply to recover",
+ "Re-applies the rule across the cluster to recover — this briefly pauses
collection for this rule’s metrics, even though the content is unchanged.":
"Re-applies the rule across the cluster to recover — this briefly pauses
collection for this rule’s metrics, even though the content is unchanged.",
+ "Use this to clear a stuck apply or coax laggard nodes to re-confirm the
schema.": "Use this to clear a stuck apply or coax laggard nodes to re-confirm
the schema.",
+ "Apply status is no longer tracked — reload to see the stored rule.": "Apply
status is no longer tracked — reload to see the stored rule.",
+ "An ACTIVE rule is inactivated first, then reverted.": "An ACTIVE rule is
inactivated first, then reverted.",
+ "rule is ACTIVE — inactivate first, then revert to bundled": "rule is ACTIVE
— inactivate first, then revert to bundled",
"Add to comparison": "Add to comparison",
"Remove from comparison": "Remove from comparison",
"Comparison is full ({max} max)": "Comparison is full ({max} max)",
diff --git a/apps/ui/src/i18n/locales/es.json b/apps/ui/src/i18n/locales/es.json
index 9a87719..547f466 100644
--- a/apps/ui/src/i18n/locales/es.json
+++ b/apps/ui/src/i18n/locales/es.json
@@ -1426,5 +1426,27 @@
"Comparing {n} instances across services": "Comparando {n} instancias entre
servicios",
"Comparing {n} endpoints across services": "Comparando {n} endpoints entre
servicios",
"Failed to load": "Error al cargar",
- "{n} locked · lock 1 more to compare": "{n} bloqueado(s) · bloquea 1 más
para comparar"
+ "{n} locked · lock 1 more to compare": "{n} bloqueado(s) · bloquea 1 más
para comparar",
+ "Applying schema change": "Aplicando cambio de esquema",
+ "Schema change applied": "Cambio de esquema aplicado",
+ "Compiled & schema applied": "Compilado y esquema aplicado",
+ "Confirming across the cluster…": "Confirmando en todo el clúster…",
+ "Committing": "Confirmando",
+ "Done": "Listo",
+ "Schema changes apply across the cluster in the background — usually
seconds, up to a few minutes if a node is slow. You can leave this page.": "Los
cambios de esquema se aplican en todo el clúster en segundo plano: normalmente
segundos, hasta unos minutos si un nodo va lento. Puedes salir de esta página.",
+ "Applied — cluster propagation unconfirmed": "Aplicado: propagación en el
clúster sin confirmar",
+ "Durable and applied, but cluster-wide schema propagation wasn’t confirmed
in time. The listed nodes catch up automatically on their next scan.": "Cambio
persistente y aplicado, pero no se confirmó a tiempo la propagación del esquema
en todo el clúster. Los nodos indicados se ponen al día solos en su próximo
escaneo.",
+ "Waiting on: {nodes}": "A la espera de: {nodes}",
+ "Apply failed — rolled back": "Error al aplicar: revertido",
+ "The change was rolled back; the cluster is still on the previous rule. Fix
the issue and save again.": "El cambio se revirtió; el clúster sigue con la
regla anterior. Corrige el problema y vuelve a guardar.",
+ "applied (from stored state)": "aplicado (desde el estado almacenado)",
+ "Force re-apply (recover)": "Forzar reaplicación (recuperar)",
+ "Re-check": "Volver a comprobar",
+ "Force re-apply to recover": "Forzar reaplicación para recuperar",
+ "force re-apply to recover": "forzar reaplicación para recuperar",
+ "Re-applies the rule across the cluster to recover — this briefly pauses
collection for this rule’s metrics, even though the content is unchanged.":
"Vuelve a aplicar la regla en todo el clúster para recuperarse: esto pausa
brevemente la recolección de las métricas de esta regla, aunque el contenido no
cambie.",
+ "Use this to clear a stuck apply or coax laggard nodes to re-confirm the
schema.": "Úsalo para desatascar una aplicación bloqueada o para que los nodos
rezagados vuelvan a confirmar el esquema.",
+ "Apply status is no longer tracked — reload to see the stored rule.": "El
estado de la aplicación ya no se rastrea — recarga para ver la regla
almacenada.",
+ "An ACTIVE rule is inactivated first, then reverted.": "Una regla ACTIVE se
inactiva primero y luego se revierte.",
+ "rule is ACTIVE — inactivate first, then revert to bundled": "la regla está
ACTIVE — inactívala primero y luego revierte a la bundled"
}
diff --git a/apps/ui/src/i18n/locales/fr.json b/apps/ui/src/i18n/locales/fr.json
index 4fc2ffb..99ed545 100644
--- a/apps/ui/src/i18n/locales/fr.json
+++ b/apps/ui/src/i18n/locales/fr.json
@@ -1426,5 +1426,27 @@
"Comparing {n} instances across services": "Comparaison de {n} instances
entre services",
"Comparing {n} endpoints across services": "Comparaison de {n} endpoints
entre services",
"Failed to load": "Échec du chargement",
- "{n} locked · lock 1 more to compare": "{n} verrouillé(s) · verrouillez-en 1
de plus pour comparer"
+ "{n} locked · lock 1 more to compare": "{n} verrouillé(s) · verrouillez-en 1
de plus pour comparer",
+ "Applying schema change": "Application du changement de schéma",
+ "Schema change applied": "Changement de schéma appliqué",
+ "Compiled & schema applied": "Compilé et schéma appliqué",
+ "Confirming across the cluster…": "Confirmation sur l’ensemble du cluster…",
+ "Committing": "Validation",
+ "Done": "Terminé",
+ "Schema changes apply across the cluster in the background — usually
seconds, up to a few minutes if a node is slow. You can leave this page.": "Les
changements de schéma se propagent en arrière-plan sur le cluster —
généralement en quelques secondes, jusqu’à quelques minutes si un nœud est
lent. Vous pouvez quitter cette page.",
+ "Applied — cluster propagation unconfirmed": "Appliqué — propagation au
cluster non confirmée",
+ "Durable and applied, but cluster-wide schema propagation wasn’t confirmed
in time. The listed nodes catch up automatically on their next scan.":
"Appliqué et durable, mais la propagation du schéma à l’ensemble du cluster n’a
pas pu être confirmée à temps. Les nœuds listés se mettent à jour
automatiquement lors de leur prochaine analyse.",
+ "Waiting on: {nodes}": "En attente de : {nodes}",
+ "Apply failed — rolled back": "Échec de l’application — annulé",
+ "The change was rolled back; the cluster is still on the previous rule. Fix
the issue and save again.": "Le changement a été annulé ; le cluster utilise
toujours la règle précédente. Corrigez le problème et enregistrez à nouveau.",
+ "applied (from stored state)": "appliqué (depuis l’état stocké)",
+ "Force re-apply (recover)": "Forcer la réapplication (rétablir)",
+ "Re-check": "Revérifier",
+ "Force re-apply to recover": "Forcer la réapplication pour rétablir",
+ "force re-apply to recover": "forcer la réapplication pour rétablir",
+ "Re-applies the rule across the cluster to recover — this briefly pauses
collection for this rule’s metrics, even though the content is unchanged.":
"Réapplique la règle sur l’ensemble du cluster pour rétablir — cela interrompt
brièvement la collecte des métriques de cette règle, même si le contenu est
inchangé.",
+ "Use this to clear a stuck apply or coax laggard nodes to re-confirm the
schema.": "À utiliser pour débloquer une application figée ou inciter les nœuds
en retard à reconfirmer le schéma.",
+ "Apply status is no longer tracked — reload to see the stored rule.":
"L’état de l’application n’est plus suivi — rechargez pour voir la règle
enregistrée.",
+ "An ACTIVE rule is inactivated first, then reverted.": "Une règle ACTIVE est
d’abord désactivée, puis rétablie.",
+ "rule is ACTIVE — inactivate first, then revert to bundled": "la règle est
ACTIVE — désactivez-la d’abord, puis rétablissez la version bundled"
}
diff --git a/apps/ui/src/i18n/locales/ja.json b/apps/ui/src/i18n/locales/ja.json
index e2ff61d..067f5cf 100644
--- a/apps/ui/src/i18n/locales/ja.json
+++ b/apps/ui/src/i18n/locales/ja.json
@@ -1426,5 +1426,27 @@
"Comparing {n} instances across services": "サービスをまたいで {n} 件のインスタンスを比較中",
"Comparing {n} endpoints across services": "サービスをまたいで {n} 件のエンドポイントを比較中",
"Failed to load": "読み込みに失敗しました",
- "{n} locked · lock 1 more to compare": "{n} 件ロック中 · 比較するにはもう 1 件ロック"
+ "{n} locked · lock 1 more to compare": "{n} 件ロック中 · 比較するにはもう 1 件ロック",
+ "Applying schema change": "スキーマ変更を適用中",
+ "Schema change applied": "スキーマ変更を適用しました",
+ "Compiled & schema applied": "コンパイルおよびスキーマ適用が完了",
+ "Confirming across the cluster…": "クラスタ全体で確認中…",
+ "Committing": "コミット中",
+ "Done": "完了",
+ "Schema changes apply across the cluster in the background — usually
seconds, up to a few minutes if a node is slow. You can leave this page.":
"スキーマ変更はバックグラウンドでクラスタ全体に適用されます —
通常は数秒、ノードの処理が遅い場合は数分かかることがあります。このページを離れても問題ありません。",
+ "Applied — cluster propagation unconfirmed": "適用済み — クラスタへの伝播は未確認",
+ "Durable and applied, but cluster-wide schema propagation wasn’t confirmed
in time. The listed nodes catch up automatically on their next scan.":
"永続化され適用も完了していますが、クラスタ全体へのスキーマ伝播は時間内に確認できませんでした。記載のノードは次回スキャン時に自動的に追従します。",
+ "Waiting on: {nodes}": "待機中: {nodes}",
+ "Apply failed — rolled back": "適用に失敗 — ロールバック済み",
+ "The change was rolled back; the cluster is still on the previous rule. Fix
the issue and save again.": "変更はロールバックされ、クラスタは以前のルールのままです。問題を修正してから再度保存してください。",
+ "applied (from stored state)": "適用済み(保存された状態より)",
+ "Force re-apply (recover)": "強制再適用(リカバリ)",
+ "Re-check": "再確認",
+ "Force re-apply to recover": "リカバリのため強制再適用",
+ "force re-apply to recover": "リカバリのため強制再適用",
+ "Re-applies the rule across the cluster to recover — this briefly pauses
collection for this rule’s metrics, even though the content is unchanged.":
"ルールをクラスタ全体に再適用してリカバリします — 内容は変わりませんが、このルールのメトリクス収集が一時的に停止します。",
+ "Use this to clear a stuck apply or coax laggard nodes to re-confirm the
schema.": "停止した適用の解消や、追従が遅れているノードにスキーマの再確認を促すために使用します。",
+ "Apply status is no longer tracked — reload to see the stored rule.":
"適用ステータスは追跡されていません — 再読み込みして保存済みのルールを確認してください。",
+ "An ACTIVE rule is inactivated first, then reverted.": "ACTIVE
のルールは、まず無効化してから元に戻します。",
+ "rule is ACTIVE — inactivate first, then revert to bundled": "ルールは ACTIVE です
— 先に無効化してから bundled に戻してください"
}
diff --git a/apps/ui/src/i18n/locales/ko.json b/apps/ui/src/i18n/locales/ko.json
index fdc38f7..fb6a072 100644
--- a/apps/ui/src/i18n/locales/ko.json
+++ b/apps/ui/src/i18n/locales/ko.json
@@ -1426,5 +1426,27 @@
"Comparing {n} instances across services": "여러 서비스에 걸쳐 인스턴스 {n}개 비교 중",
"Comparing {n} endpoints across services": "여러 서비스에 걸쳐 엔드포인트 {n}개 비교 중",
"Failed to load": "불러오지 못했습니다",
- "{n} locked · lock 1 more to compare": "{n}개 잠금 · 비교하려면 1개 더 잠그세요"
+ "{n} locked · lock 1 more to compare": "{n}개 잠금 · 비교하려면 1개 더 잠그세요",
+ "Applying schema change": "스키마 변경 적용 중",
+ "Schema change applied": "스키마 변경 적용 완료",
+ "Compiled & schema applied": "컴파일 및 스키마 적용 완료",
+ "Confirming across the cluster…": "클러스터 전체에서 확인 중…",
+ "Committing": "커밋 중",
+ "Done": "완료",
+ "Schema changes apply across the cluster in the background — usually
seconds, up to a few minutes if a node is slow. You can leave this page.": "스키마
변경은 백그라운드에서 클러스터 전체로 적용됩니다 — 보통 수 초, 느린 노드가 있으면 최대 수 분이 걸립니다. 이 페이지를 떠나도 됩니다.",
+ "Applied — cluster propagation unconfirmed": "적용됨 — 클러스터 전파 미확인",
+ "Durable and applied, but cluster-wide schema propagation wasn’t confirmed
in time. The listed nodes catch up automatically on their next scan.": "변경 사항은
durable하게 저장되어 적용되었지만, 클러스터 전체로의 스키마 전파가 제때 확인되지 않았습니다. 표시된 노드는 다음 스캔 시 자동으로
동기화됩니다.",
+ "Waiting on: {nodes}": "대기 중: {nodes}",
+ "Apply failed — rolled back": "적용 실패 — 롤백됨",
+ "The change was rolled back; the cluster is still on the previous rule. Fix
the issue and save again.": "변경 사항이 롤백되어 클러스터는 여전히 이전 규칙을 사용 중입니다. 문제를 해결한 뒤 다시
저장하세요.",
+ "applied (from stored state)": "적용됨 (저장된 상태 기준)",
+ "Force re-apply (recover)": "강제 재적용 (복구)",
+ "Re-check": "재확인",
+ "Force re-apply to recover": "강제 재적용으로 복구",
+ "force re-apply to recover": "강제 재적용으로 복구",
+ "Re-applies the rule across the cluster to recover — this briefly pauses
collection for this rule’s metrics, even though the content is unchanged.":
"규칙을 클러스터 전체에 다시 적용하여 복구합니다 — 내용은 그대로지만 이 규칙의 메트릭 수집이 잠시 중단됩니다.",
+ "Use this to clear a stuck apply or coax laggard nodes to re-confirm the
schema.": "멈춘 적용을 정리하거나 뒤처진 노드가 스키마를 다시 확인하도록 유도할 때 사용하세요.",
+ "Apply status is no longer tracked — reload to see the stored rule.": "적용
상태가 더 이상 추적되지 않습니다 — 다시 로드하여 저장된 규칙을 확인하세요.",
+ "An ACTIVE rule is inactivated first, then reverted.": "ACTIVE 규칙은 먼저 비활성화한
뒤 되돌립니다.",
+ "rule is ACTIVE — inactivate first, then revert to bundled": "규칙이 ACTIVE
상태입니다 — 먼저 비활성화한 뒤 bundled로 되돌리세요"
}
diff --git a/apps/ui/src/i18n/locales/pt.json b/apps/ui/src/i18n/locales/pt.json
index 6a76e35..a1bd3d6 100644
--- a/apps/ui/src/i18n/locales/pt.json
+++ b/apps/ui/src/i18n/locales/pt.json
@@ -1426,5 +1426,27 @@
"Comparing {n} instances across services": "Comparando {n} instâncias entre
serviços",
"Comparing {n} endpoints across services": "Comparando {n} endpoints entre
serviços",
"Failed to load": "Falha ao carregar",
- "{n} locked · lock 1 more to compare": "{n} bloqueado(s) · bloqueie mais 1
para comparar"
+ "{n} locked · lock 1 more to compare": "{n} bloqueado(s) · bloqueie mais 1
para comparar",
+ "Applying schema change": "Aplicando alteração de schema",
+ "Schema change applied": "Alteração de schema aplicada",
+ "Compiled & schema applied": "Compilado e schema aplicado",
+ "Confirming across the cluster…": "Confirmando em todo o cluster…",
+ "Committing": "Salvando",
+ "Done": "Concluído",
+ "Schema changes apply across the cluster in the background — usually
seconds, up to a few minutes if a node is slow. You can leave this page.": "As
alterações de schema se propagam pelo cluster em segundo plano — normalmente em
segundos, podendo levar alguns minutos se algum node estiver lento. Você pode
sair desta página.",
+ "Applied — cluster propagation unconfirmed": "Aplicado — propagação no
cluster não confirmada",
+ "Durable and applied, but cluster-wide schema propagation wasn’t confirmed
in time. The listed nodes catch up automatically on their next scan.":
"Persistido e aplicado, mas a propagação do schema por todo o cluster não foi
confirmada a tempo. Os nodes listados se atualizam automaticamente na próxima
varredura.",
+ "Waiting on: {nodes}": "Aguardando: {nodes}",
+ "Apply failed — rolled back": "Falha ao aplicar — revertido",
+ "The change was rolled back; the cluster is still on the previous rule. Fix
the issue and save again.": "A alteração foi revertida; o cluster continua na
regra anterior. Corrija o problema e salve novamente.",
+ "applied (from stored state)": "aplicado (a partir do estado armazenado)",
+ "Force re-apply (recover)": "Forçar reaplicação (recuperar)",
+ "Re-check": "Reverificar",
+ "Force re-apply to recover": "Forçar reaplicação para recuperar",
+ "force re-apply to recover": "forçar reaplicação para recuperar",
+ "Re-applies the rule across the cluster to recover — this briefly pauses
collection for this rule’s metrics, even though the content is unchanged.":
"Reaplica a regra em todo o cluster para recuperar — isso pausa brevemente a
coleta das métricas desta regra, mesmo que o conteúdo não tenha mudado.",
+ "Use this to clear a stuck apply or coax laggard nodes to re-confirm the
schema.": "Use isto para destravar uma aplicação presa ou induzir nodes
atrasados a reconfirmar o schema.",
+ "Apply status is no longer tracked — reload to see the stored rule.": "O
status da aplicação não é mais rastreado — recarregue para ver a regra
armazenada.",
+ "An ACTIVE rule is inactivated first, then reverted.": "Uma regra ACTIVE é
inativada primeiro e depois revertida.",
+ "rule is ACTIVE — inactivate first, then revert to bundled": "a regra está
ACTIVE — inative-a primeiro e depois reverta para a bundled"
}
diff --git a/apps/ui/src/i18n/locales/zh-CN.json
b/apps/ui/src/i18n/locales/zh-CN.json
index 6ab4fb6..2412473 100644
--- a/apps/ui/src/i18n/locales/zh-CN.json
+++ b/apps/ui/src/i18n/locales/zh-CN.json
@@ -1426,5 +1426,27 @@
"Comparing {n} instances across services": "正在跨服务对比 {n} 个实例",
"Comparing {n} endpoints across services": "正在跨服务对比 {n} 个端点",
"Failed to load": "加载失败",
- "{n} locked · lock 1 more to compare": "已锁定 {n} 个 · 再锁定 1 个即可对比"
+ "{n} locked · lock 1 more to compare": "已锁定 {n} 个 · 再锁定 1 个即可对比",
+ "Applying schema change": "正在应用 schema 变更",
+ "Schema change applied": "schema 变更已应用",
+ "Compiled & schema applied": "已编译并应用 schema",
+ "Confirming across the cluster…": "正在集群内确认…",
+ "Committing": "提交中",
+ "Done": "完成",
+ "Schema changes apply across the cluster in the background — usually
seconds, up to a few minutes if a node is slow. You can leave this page.":
"schema 变更会在后台应用到整个集群 —— 通常几秒即可完成,节点较慢时最多需要几分钟。可以离开此页面。",
+ "Applied — cluster propagation unconfirmed": "已应用 —— 集群同步未确认",
+ "Durable and applied, but cluster-wide schema propagation wasn’t confirmed
in time. The listed nodes catch up automatically on their next scan.":
"变更已持久化并应用,但未能及时确认集群范围内的 schema 同步。下列节点会在下次扫描时自动跟上。",
+ "Waiting on: {nodes}": "等待节点:{nodes}",
+ "Apply failed — rolled back": "应用失败 —— 已回滚",
+ "The change was rolled back; the cluster is still on the previous rule. Fix
the issue and save again.": "变更已回滚;集群仍在使用此前的规则。请修复问题后重新保存。",
+ "applied (from stored state)": "已应用(来自已存储状态)",
+ "Force re-apply (recover)": "强制重新应用(恢复)",
+ "Re-check": "重新检查",
+ "Force re-apply to recover": "强制重新应用以恢复",
+ "force re-apply to recover": "强制重新应用以恢复",
+ "Re-applies the rule across the cluster to recover — this briefly pauses
collection for this rule’s metrics, even though the content is unchanged.":
"重新将规则应用到整个集群以恢复 —— 即使内容未变,这也会短暂暂停采集该规则的指标。",
+ "Use this to clear a stuck apply or coax laggard nodes to re-confirm the
schema.": "用于清除卡住的应用,或促使滞后的节点重新确认 schema。",
+ "Apply status is no longer tracked — reload to see the stored rule.":
"应用状态已不再被跟踪 —— 重新加载以查看已存储的规则。",
+ "An ACTIVE rule is inactivated first, then reverted.": "ACTIVE
规则会先被停用,然后再回退。",
+ "rule is ACTIVE — inactivate first, then revert to bundled": "规则处于 ACTIVE 状态
—— 请先停用,然后再回退到 bundled"
}
diff --git a/docs/menu.yml b/docs/menu.yml
index c9562ba..0b33bca 100644
--- a/docs/menu.yml
+++ b/docs/menu.yml
@@ -61,6 +61,8 @@ catalog:
path: "/operate/data-retention"
- name: "OAP Configuration"
path: "/operate/oap-configuration"
+ - name: "Runtime Rules (DSL)"
+ path: "/operate/runtime-rules"
- name: "Metrics Inspect"
path: "/operate/inspect"
- name: "3D Infrastructure Map"
diff --git a/docs/operate/runtime-rules.md b/docs/operate/runtime-rules.md
new file mode 100644
index 0000000..6e79c81
--- /dev/null
+++ b/docs/operate/runtime-rules.md
@@ -0,0 +1,53 @@
+# Runtime Rules (DSL Management)
+
+Path: `/operate/dsl`. Verbs: `rule:read` to browse, `rule:write` /
`rule:write:structural` / `rule:delete` to change (granted by maintainer,
operator, admin).
+
+DSL Management lets you edit a connected OAP's analysis rules — MAL
(`otel-rules`, `telegraf-rules`, `log-mal-rules`) and LAL (`lal`) — at runtime,
without restarting the backend. You browse a catalog, open a rule in the
editor, and save; OAP applies the change live across the cluster. Bundled rules
shipped with OAP can be overridden, inactivated, or reverted to their bundled
version.
+
+## The two kinds of edit
+
+When you save, OAP classifies the change and one of two things happens:
+
+| Edit | What happens | What you see |
+|---|---|---|
+| Body / filter / tag only | Applied instantly on this node and picked up by
every other node on its next scan. No schema change, no collection gap. | A
brief "saved" confirmation. |
+| Structural — moves a metric's storage shape (scope, downsampling, or the
metric set added/removed) | OAP changes the backend schema and rolls it out
across the cluster. This runs in the background and can take from seconds to a
few minutes. | A live progress stepper (below). |
+
+A structural save is accepted immediately but is **not durable yet** — the
editor tracks it to completion so you know when the change is truly live. You
can leave the page while it runs; reopening the editor resumes the progress.
+
+## Structural apply progress
+
+The stepper walks through:
+
+`Compiled & schema applied → Confirming across the cluster → Committing → Done`
+
+`Confirming across the cluster` is the step that can take a while — OAP waits
for every storage node to pick up the new schema before committing. This is
expected; it is not stuck.
+
+The apply ends in one of three states:
+
+| State | Meaning | What to do |
+|---|---|---|
+| **Done (applied)** | Committed, durable, and confirmed across the cluster. |
Nothing — the change is live. |
+| **Applied — cluster propagation unconfirmed** (warning) | The change is
committed and durable, but one or more nodes hadn't confirmed the new schema
within OAP's fence budget. The editor lists those nodes. | Nothing required —
the listed nodes catch up automatically on their next scan. If a node stays
behind, check its health, then optionally **Force re-apply** (below). Reopening
the editor later shows the rule as applied. |
+| **Apply failed — rolled back** (error) | A pre-commit error stopped the
apply. The cluster stays on the previous rule and your edit is kept in the
editor. The failure reason is shown inline. | Read the reason. A compile error
appears as an inline diagnostic — fix the YAML and save again. For a transient
failure (e.g. storage briefly unavailable), retry once healthy, or use **Force
re-apply**. |
+
+A reload of the editor while an apply is still running resumes the live
progress; a reload after it finished shows the rule's stored state (a
propagation-unconfirmed apply then reads simply as applied, because the change
is durable).
+
+## Force re-apply (recover)
+
+When an apply is degraded or transiently failed, the editor offers **Force
re-apply (recover)**. It re-runs the apply across the cluster to re-confirm the
schema and un-stick any node still waiting — even when the rule content is
unchanged.
+
+Because it runs the full apply pipeline, a force re-apply **briefly pauses
collection for that rule's metrics**, so the editor asks you to confirm first.
Use it to clear a stuck apply or to coax a lagging node into re-confirming. It
does **not** help a compile error — fix the rule content for those.
+
+Force re-apply requires the `rule:write:structural` permission.
+
+## Inactivate, delete, revert
+
+- **Inactivate** stops a rule without removing it. Applies instantly.
+- **Delete** removes an operator-pushed rule that has no bundled version. A
rule must be inactivated before it can be deleted.
+- **Revert to bundled** discards an override and reinstalls the bundled
version. This is a schema change, so it shows the same structural-apply
progress stepper as a save (and the same Done / propagation-unconfirmed /
failed outcomes); metrics defined only by the override are dropped. Reverting
an ACTIVE rule inactivates it first automatically, so it takes one action.
+
+## Requirements
+
+- OAP admin port reachable from Horizon.
+- The `receiver-runtime-rule` module enabled on the connected OAP. The editor
shows a warning when it is not.
diff --git a/packages/api-client/src/index.ts b/packages/api-client/src/index.ts
index 0848749..100e338 100644
--- a/packages/api-client/src/index.ts
+++ b/packages/api-client/src/index.ts
@@ -229,6 +229,7 @@ export {
type RuntimeRuleClientOptions,
type AddOrUpdateArgs,
type GetRuleArgs,
+ type RuleStatusArgs,
type FetchLike,
} from './runtime-rule.js';
export { StatusClient, type StatusClientOptions, type NormalisedClusterNode }
from './status.js';
diff --git a/packages/api-client/src/runtime-rule.ts
b/packages/api-client/src/runtime-rule.ts
index e8e3914..0b51850 100644
--- a/packages/api-client/src/runtime-rule.ts
+++ b/packages/api-client/src/runtime-rule.ts
@@ -25,6 +25,7 @@ import type {
RuleResponse,
RuleSource,
RuleStatus,
+ RuleStatusResponse,
} from './types.js';
import { RuntimeRuleApiError } from './types.js';
@@ -64,6 +65,19 @@ export interface GetRuleArgs {
ifNoneMatch?: string;
}
+export interface RuleStatusArgs {
+ catalog: Catalog;
+ name: string;
+ /** The poll handle from a `structural_applied` response. Resolves the
+ * live tracker on the main. Omit it (e.g. after a page reload) to
+ * resolve by catalog+name(+contentHash), which degrades to the
+ * durable rule row. */
+ applyId?: string;
+ /** SHA-256 hex of the content whose apply you're confirming —
+ * disambiguates the durable-DAO fallback once the applyId is gone. */
+ contentHash?: string;
+}
+
/**
* Typed wrapper for the eight runtime-rule REST endpoints v1 binds to.
*
@@ -177,6 +191,20 @@ export class RuntimeRuleClient {
return this.expectApplyResult(res, url);
}
+ /** `GET /runtime/rule/status?catalog=&name=[&applyId=][&contentHash=]`.
+ * Reports the progress of a structural `/addOrUpdate` apply. ALWAYS
+ * 200 — `found: false` (phase `UNKNOWN`) when nothing matches, never
+ * 404. */
+ async status(args: RuleStatusArgs): Promise<RuleStatusResponse> {
+ const params: Record<string, string> = { catalog: args.catalog, name:
args.name };
+ if (args.applyId) params.applyId = args.applyId;
+ if (args.contentHash) params.contentHash = args.contentHash;
+ const url = this.url('/runtime/rule/status', params);
+ const res = await this.send(url, { method: 'GET' });
+ if (!res.ok) throw await this.toError(res, url);
+ return (await res.json()) as RuleStatusResponse;
+ }
+
/** `GET /runtime/rule/dump` or `/dump/{catalog}` — streams `tar.gz`. */
async dump(catalog?: Catalog): Promise<Response> {
const path = catalog
diff --git a/packages/api-client/src/types.ts b/packages/api-client/src/types.ts
index b0dbb6e..af7fc69 100644
--- a/packages/api-client/src/types.ts
+++ b/packages/api-client/src/types.ts
@@ -162,7 +162,14 @@ export interface NotModified {
export type ApplyStatus =
| 'no_change'
| 'filter_only_applied'
+ | 'filter_only_persisted'
| 'structural_applied'
+ | 'inactivated'
+ | 'static_tombstoned'
+ | 'already_inactive'
+ | 'not_found'
+ | 'deleted'
+ | 'reverted_to_bundled'
| 'persisted_apply_pending'
| 'compile_failed'
| 'empty_body'
@@ -172,6 +179,7 @@ export type ApplyStatus =
| 'no_bundled_twin'
| 'storage_change_requires_explicit_approval'
| 'requires_inactivate_first'
+ | 'requires_revert_to_bundled'
| 'ddl_verify_failed'
| 'apply_failed'
| 'persist_failed';
@@ -183,6 +191,83 @@ export interface ApplyResult {
catalog: string;
name: string;
message: string;
+ /** Present ONLY on `structural_applied`: the poll handle for
+ * {@link RuleStatusResponse}. The structural apply is accepted at
+ * phase `FENCING` (DDL fired, not yet durable) and the
+ * fence → persist → commit → resume tail runs in the background —
+ * poll `GET /runtime/rule/status` until a terminal phase. No other
+ * applyStatus carries an applyId (the sync paths have nothing to
+ * poll). */
+ applyId?: string;
+}
+
+// ── /runtime/rule/status (structural-apply progress) ───────────────
+
+/**
+ * Lifecycle phase of a structural `/addOrUpdate`, tracked by the cluster
+ * main. The HTTP apply call returns at {@link 'FENCING'}; the rest runs
+ * in the background:
+ *
+ * PENDING → DDL → FENCING → ROLLING_OUT → APPLIED
+ *
+ * with two terminal off-ramps and one unknown:
+ * - `DEGRADED` — committed + durable, but the cluster-wide schema fence
+ * didn't confirm in time (laggards listed in {@link
+ * RuleStatusResponse.fenceLaggards}) OR the local commit-tail threw.
+ * Forward-progress, NOT a revert; laggards self-converge on their
+ * next scan.
+ * - `FAILED` — a pre-commit error rolled the apply back; the cluster
+ * stays on the prior rule.
+ * - `UNKNOWN` — the applyId is no longer tracked (evicted after ~1h, or
+ * the main restarted). Re-query by `contentHash`, which degrades to
+ * the durable rule row.
+ */
+export type ApplyPhase =
+ | 'PENDING'
+ | 'DDL'
+ | 'FENCING'
+ | 'ROLLING_OUT'
+ | 'APPLIED'
+ | 'DEGRADED'
+ | 'FAILED'
+ | 'UNKNOWN';
+
+/** APPLIED / DEGRADED / FAILED — a poller stops here. */
+export function isTerminalPhase(phase: string): boolean {
+ return phase === 'APPLIED' || phase === 'DEGRADED' || phase === 'FAILED';
+}
+
+/**
+ * `GET /runtime/rule/status` response — ALWAYS HTTP 200 (`found: false`
+ * when nothing matches, never 404). Three server builders emit different
+ * field subsets, so everything past `found` + `phase` is conditionally
+ * present:
+ * - live tracker: `applyId`, `startedAtMs`, `updatedAtMs`, `servedBy`.
+ * - durable-DAO fallback (live status gone / main unreachable): a
+ * matching ACTIVE row reports `phase: 'APPLIED'` + `derivedFrom:
+ * 'durable-dao'` + `note`; NO applyId / timestamps / servedBy.
+ * - not found: `found: false`, `phase: 'UNKNOWN'`.
+ * `failureReason` appears on FAILED/DEGRADED; `fenceLaggards` only on a
+ * fence-non-confirm DEGRADED.
+ */
+export interface RuleStatusResponse {
+ found: boolean;
+ phase: ApplyPhase | (string & {});
+ applyId?: string;
+ catalog?: string;
+ name?: string;
+ contentHash?: string;
+ failureReason?: string;
+ /** Data-node ids that hadn't confirmed the new schema within the fence
+ * budget — present only on a fence-timeout DEGRADED. */
+ fenceLaggards?: string[];
+ startedAtMs?: number;
+ updatedAtMs?: number;
+ servedBy?: string;
+ /** `'durable-dao'` when the status was reconstructed from the durable
+ * rule row rather than live progress (e.g. after a page reload). */
+ derivedFrom?: string;
+ note?: string;
}
// ── /runtime/rule/delete ───────────────────────────────────────────