Copilot commented on code in PR #5834:
URL: https://github.com/apache/texera/pull/5834#discussion_r3448863663


##########
frontend/src/app/workspace/service/workflow-status/performance-metrics.ts:
##########
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { OperatorStatistics } from "../../types/execute-workflow.interface";
+
+/**
+ * Derived, normalized-ready per-operator performance metrics.
+ *
+ * This is the ground-truth model consumed by the workflow heat-map overlay. It
+ * is a flat, defensively-defaulted projection of the raw {@link 
OperatorStatistics}
+ * the backend streams over the websocket — every field is a finite, 
non-negative
+ * number so downstream scoring never has to re-validate.
+ */
+export interface OperatorPerformanceMetrics
+  extends Readonly<{
+    operatorId: string;
+    dataProcessingTimeNs: number;
+    controlProcessingTimeNs: number;
+    idleTimeNs: number;
+    inputRows: number;
+    outputRows: number;
+    inputSize: number;
+    outputSize: number;
+    numWorkers: number;
+  }> {}
+
+/**
+ * The three heat-map views. Each answers a different "where is the problem?"
+ * question; see {@link rawMetricForView} for the per-operator cost each one 
uses.
+ * String-valued so the selection serializes readably (e.g. into localStorage).
+ */
+export enum HeatmapView {
+  Runtime = "runtime",
+  Throughput = "throughput",
+  IoImbalance = "ioImbalance",
+}
+
+/**
+ * Coerce an untrusted numeric field (it arrives over the websocket) into a
+ * finite, non-negative number. Anything missing, non-numeric, NaN, infinite, 
or
+ * negative collapses to 0 so no NaN/Infinity can leak into the scoring math.
+ */
+function toFiniteNonNegative(value: number | undefined): number {
+  return typeof value === "number" && Number.isFinite(value) && value > 0 ? 
value : 0;
+}
+
+function clamp(value: number, min: number, max: number): number {
+  return Math.min(max, Math.max(min, value));
+}
+
+/**
+ * Project a single raw {@link OperatorStatistics} into the flat performance 
model,
+ * defaulting every optional/missing field to 0. Data and control processing 
time
+ * are kept separate (the Runtime view uses data time only).
+ */
+export function toPerformanceMetrics(operatorId: string, stats: 
OperatorStatistics): OperatorPerformanceMetrics {
+  return {
+    operatorId,
+    dataProcessingTimeNs: 
toFiniteNonNegative(stats.aggregatedDataProcessingTime),
+    controlProcessingTimeNs: 
toFiniteNonNegative(stats.aggregatedControlProcessingTime),
+    idleTimeNs: toFiniteNonNegative(stats.aggregatedIdleTime),
+    inputRows: toFiniteNonNegative(stats.aggregatedInputRowCount),
+    outputRows: toFiniteNonNegative(stats.aggregatedOutputRowCount),
+    inputSize: toFiniteNonNegative(stats.aggregatedInputSize),
+    outputSize: toFiniteNonNegative(stats.aggregatedOutputSize),
+    numWorkers: toFiniteNonNegative(stats.numWorkers),
+  };
+}
+
+/**
+ * Per-operator raw cost for a view, BEFORE normalization. Bottleneck-oriented:
+ * a higher cost means "hotter" (more of a problem).
+ *
+ * - Runtime:     data processing time — slower operators are hotter.
+ * - Throughput:  1 / outputRows — slow producers are hotter; no output -> 0 
(cold).
+ * - IoImbalance: clamp(1 - out/in) — row-dropping operators are hotter; an
+ *                amplifier (out > in) or a missing input clamps to 0 (cold).
+ *
+ * The metrics are already finite and non-negative (see {@link 
toPerformanceMetrics}),
+ * so this never produces NaN/Infinity.
+ */
+export function rawMetricForView(metrics: OperatorPerformanceMetrics, view: 
HeatmapView): number {
+  switch (view) {
+    case HeatmapView.Runtime:
+      return metrics.dataProcessingTimeNs;
+    case HeatmapView.Throughput:
+      return metrics.outputRows > 0 ? 1 / metrics.outputRows : 0;
+    case HeatmapView.IoImbalance:
+      return metrics.inputRows <= 0 ? 0 : clamp(1 - metrics.outputRows / 
metrics.inputRows, 0, 1);
+    default:
+      return 0;

Review Comment:
   IoImbalance currently clamps amplifiers (outputRows > inputRows) to 0 (cold) 
via clamp(1 - out/in, 0, 1). The linked design description says this view 
should capture operators that over/under-feed (imbalance in either direction). 
If amplifiers should also be highlighted, consider scoring based on deviation 
from 1 (e.g., |1 - out/in| with an upper clamp) instead of only row-dropping.



##########
frontend/src/app/workspace/service/workflow-status/performance-metrics.ts:
##########
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { OperatorStatistics } from "../../types/execute-workflow.interface";
+
+/**
+ * Derived, normalized-ready per-operator performance metrics.
+ *
+ * This is the ground-truth model consumed by the workflow heat-map overlay. It
+ * is a flat, defensively-defaulted projection of the raw {@link 
OperatorStatistics}
+ * the backend streams over the websocket — every field is a finite, 
non-negative
+ * number so downstream scoring never has to re-validate.
+ */
+export interface OperatorPerformanceMetrics
+  extends Readonly<{
+    operatorId: string;
+    dataProcessingTimeNs: number;
+    controlProcessingTimeNs: number;
+    idleTimeNs: number;
+    inputRows: number;
+    outputRows: number;
+    inputSize: number;
+    outputSize: number;
+    numWorkers: number;
+  }> {}
+
+/**
+ * The three heat-map views. Each answers a different "where is the problem?"
+ * question; see {@link rawMetricForView} for the per-operator cost each one 
uses.
+ * String-valued so the selection serializes readably (e.g. into localStorage).
+ */
+export enum HeatmapView {
+  Runtime = "runtime",
+  Throughput = "throughput",
+  IoImbalance = "ioImbalance",
+}
+
+/**
+ * Coerce an untrusted numeric field (it arrives over the websocket) into a
+ * finite, non-negative number. Anything missing, non-numeric, NaN, infinite, 
or
+ * negative collapses to 0 so no NaN/Infinity can leak into the scoring math.
+ */
+function toFiniteNonNegative(value: number | undefined): number {
+  return typeof value === "number" && Number.isFinite(value) && value > 0 ? 
value : 0;
+}
+
+function clamp(value: number, min: number, max: number): number {
+  return Math.min(max, Math.max(min, value));
+}
+
+/**
+ * Project a single raw {@link OperatorStatistics} into the flat performance 
model,
+ * defaulting every optional/missing field to 0. Data and control processing 
time
+ * are kept separate (the Runtime view uses data time only).
+ */

Review Comment:
   The Runtime view (and the surrounding docs) currently treat runtime cost as 
**dataProcessingTimeNs only**, but the linked design/issue text describes 
Runtime as data + control processing time (and backend code like CostEstimator 
also uses data+control). If Runtime is meant to reflect total CPU time, this 
should include controlProcessingTimeNs as well; otherwise the docs/issue 
references should be updated to match the intended semantics.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to