Copilot commented on code in PR #5714:
URL: https://github.com/apache/texera/pull/5714#discussion_r3447887586


##########
amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala:
##########
@@ -375,6 +375,104 @@ class SyncExecutionResource extends LazyLogging {
     }
   }
 
+  /**
+    * Blocks until every target operator's default external result port holds 
at least as many rows
+    * as its stats report, or until `timeoutMillis` elapses. Operators with no 
result storage are
+    * treated as ready.
+    */
+  private def awaitResultsPersisted(
+      executionId: ExecutionIdentity,
+      executionService: org.apache.texera.web.service.WorkflowExecutionService,
+      targetOperatorIds: List[String],
+      timeoutMillis: Long = 2000L,
+      pollIntervalMillis: Long = 25L
+  ): Unit = {
+    def expectedOutputCount(opId: String): Long =
+      expectedDefaultPortOutputCount(
+        executionService.executionStateStore.statsStore.getState,
+        opId
+      )
+
+    def committedCount(opId: String): Option[Long] =
+      committedDefaultPortCount(
+        op =>
+          WorkflowExecutionsResource
+            .getResultUriByLogicalPortId(executionId, OperatorIdentity(op), 
PortIdentity()),
+        uri =>
+          DocumentFactory
+            .openDocument(uri)
+            ._1
+            .asInstanceOf[VirtualDocument[Tuple]]
+            .getCount
+      )(opId)
+
+    awaitUntil(
+      targetOperatorIds,
+      expectedOutputCount,
+      committedCount,
+      timeoutMillis,
+      pollIntervalMillis,
+      () => System.currentTimeMillis(),
+      Thread.sleep
+    )
+  }
+
+  // Default external output port (PortIdentity()) row count from stats; 0 if 
absent.
+  private[resource] def expectedDefaultPortOutputCount(
+      stats: ExecutionStatsStore,
+      opId: String
+  ): Long =
+    stats.operatorInfo
+      .get(opId)
+      .flatMap { metrics =>
+        metrics.operatorStatistics.outputMetrics
+          .find(_.portId == PortIdentity())
+          .map(_.tupleMetrics.count)
+      }
+      .getOrElse(0L)
+
+  // Committed rows for the default result port; None when no storage, 0 when 
countOf throws.
+  private[resource] def committedDefaultPortCount(
+      resultUriOf: String => Option[URI],
+      countOf: URI => Long
+  )(opId: String): Option[Long] =
+    resultUriOf(opId).map { uri =>
+      try {
+        countOf(uri)
+      } catch {
+        case _: Exception => 0L
+      }
+    }
+
+  /**
+    * Blocks until every target operator is ready or `timeoutMillis` elapses, 
sleeping
+    * `pollIntervalMillis` between checks. An operator is ready when its 
expected count is
+    * non-positive, it has no committed count, or its committed count reaches 
the expected count.
+    * The clock and sleep are injected so tests can drive timing.
+    */
+  private[resource] def awaitUntil(
+      targetOperatorIds: List[String],
+      expectedCountOf: String => Long,
+      committedCountOf: String => Option[Long],
+      timeoutMillis: Long,
+      pollIntervalMillis: Long,
+      now: () => Long,
+      sleep: Long => Unit
+  ): Unit = {
+    if (targetOperatorIds.isEmpty) return
+
+    def ready: Boolean =
+      targetOperatorIds.forall { opId =>
+        val expected = expectedCountOf(opId)
+        expected <= 0 || committedCountOf(opId).forall(_ >= expected)
+      }
+
+    val deadline = now() + timeoutMillis
+    while (!ready && now() < deadline) {
+      sleep(pollIntervalMillis)
+    }

Review Comment:
   The poll loop can oversleep past the deadline by up to `pollIntervalMillis` 
(it always sleeps the full interval), and the condition evaluates `ready` 
before checking the deadline. If `committedCountOf` is expensive (opens 
documents), this does extra work and makes the timeout less strict. Consider 
checking the deadline first and sleeping only the remaining time.



##########
amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala:
##########
@@ -375,6 +375,104 @@ class SyncExecutionResource extends LazyLogging {
     }
   }
 
+  /**
+    * Blocks until every target operator's default external result port holds 
at least as many rows
+    * as its stats report, or until `timeoutMillis` elapses. Operators with no 
result storage are
+    * treated as ready.
+    */
+  private def awaitResultsPersisted(
+      executionId: ExecutionIdentity,
+      executionService: org.apache.texera.web.service.WorkflowExecutionService,
+      targetOperatorIds: List[String],
+      timeoutMillis: Long = 2000L,
+      pollIntervalMillis: Long = 25L
+  ): Unit = {
+    def expectedOutputCount(opId: String): Long =
+      expectedDefaultPortOutputCount(
+        executionService.executionStateStore.statsStore.getState,
+        opId
+      )
+
+    def committedCount(opId: String): Option[Long] =
+      committedDefaultPortCount(
+        op =>
+          WorkflowExecutionsResource
+            .getResultUriByLogicalPortId(executionId, OperatorIdentity(op), 
PortIdentity()),
+        uri =>
+          DocumentFactory
+            .openDocument(uri)
+            ._1
+            .asInstanceOf[VirtualDocument[Tuple]]
+            .getCount
+      )(opId)
+
+    awaitUntil(
+      targetOperatorIds,
+      expectedOutputCount,
+      committedCount,
+      timeoutMillis,
+      pollIntervalMillis,
+      () => System.currentTimeMillis(),

Review Comment:
   `awaitResultsPersisted` uses `System.currentTimeMillis()` to drive the 
timeout. Wall-clock time is not monotonic (e.g., NTP adjustments), which can 
make the poll exit too early or wait longer than intended. Use a monotonic 
clock (e.g., `System.nanoTime`) for deadline calculations.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to