aglinxinyuan commented on code in PR #4912:
URL: https://github.com/apache/texera/pull/4912#discussion_r3180064618


##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sink/ProgressiveUtilsSpec.scala:
##########
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.sink
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema, 
Tuple}
+import org.scalatest.flatspec.AnyFlatSpec
+
+class ProgressiveUtilsSpec extends AnyFlatSpec {
+
+  // --- helpers 
---------------------------------------------------------------
+
+  private val baseSchema: Schema = new Schema(
+    new Attribute("id", AttributeType.INTEGER),
+    new Attribute("name", AttributeType.STRING)
+  )
+
+  // outputSchema = flag column prepended to baseSchema
+  private val outputSchema: Schema = new Schema(
+    ProgressiveUtils.insertRetractFlagAttr,
+    new Attribute("id", AttributeType.INTEGER),
+    new Attribute("name", AttributeType.STRING)
+  )
+
+  private def baseTuple(id: Int, name: String): Tuple =
+    Tuple
+      .builder(baseSchema)
+      .add(new Attribute("id", AttributeType.INTEGER), Int.box(id))
+      .add(new Attribute("name", AttributeType.STRING), name)
+      .build()
+
+  // --- insertRetractFlagAttr 
-------------------------------------------------
+
+  "ProgressiveUtils.insertRetractFlagAttr" should "be a BOOLEAN attribute 
named __internal_is_insertion" in {
+    val attr = ProgressiveUtils.insertRetractFlagAttr
+    assert(attr.getName == "__internal_is_insertion")
+    assert(attr.getType == AttributeType.BOOLEAN)
+  }
+
+  // --- addInsertionFlag / addRetractionFlag 
----------------------------------
+
+  "ProgressiveUtils.addInsertionFlag" should "prepend the flag column with 
value true" in {
+    val flagged = ProgressiveUtils.addInsertionFlag(baseTuple(1, "alice"), 
outputSchema)
+    assert(flagged.getSchema == outputSchema)
+    
assert(flagged.getField[Boolean](ProgressiveUtils.insertRetractFlagAttr.getName)
 == true)
+    assert(flagged.getField[Integer]("id") == 1)
+    assert(flagged.getField[String]("name") == "alice")
+  }
+
+  "ProgressiveUtils.addRetractionFlag" should "prepend the flag column with 
value false" in {
+    val flagged = ProgressiveUtils.addRetractionFlag(baseTuple(2, "bob"), 
outputSchema)
+    
assert(flagged.getField[Boolean](ProgressiveUtils.insertRetractFlagAttr.getName)
 == false)
+    assert(flagged.getField[Integer]("id") == 2)
+    assert(flagged.getField[String]("name") == "bob")
+  }
+
+  it should "fail an assertion if the input tuple already has the flag column" 
in {
+    val alreadyFlagged = ProgressiveUtils.addInsertionFlag(baseTuple(3, "x"), 
outputSchema)
+    val ex = intercept[AssertionError] {
+      ProgressiveUtils.addRetractionFlag(alreadyFlagged, outputSchema)
+    }
+    // Don't pin the exact message; locking in the AssertionError is enough.
+    assert(ex != null)
+  }
+
+  // --- isInsertion 
-----------------------------------------------------------
+
+  "ProgressiveUtils.isInsertion" should "return true for an unflagged tuple" 
in {
+    // Tuples without the flag column default to insertion (the unflagged
+    // default in the engine is "+").
+    assert(ProgressiveUtils.isInsertion(baseTuple(1, "x")))
+  }
+
+  it should "return true when the flag column is present and true" in {
+    val flagged = ProgressiveUtils.addInsertionFlag(baseTuple(1, "x"), 
outputSchema)
+    assert(ProgressiveUtils.isInsertion(flagged))
+  }
+
+  it should "return false when the flag column is present and false" in {
+    val flagged = ProgressiveUtils.addRetractionFlag(baseTuple(1, "x"), 
outputSchema)
+    assert(!ProgressiveUtils.isInsertion(flagged))
+  }
+
+  // --- getTupleFlagAndValue 
--------------------------------------------------
+
+  "ProgressiveUtils.getTupleFlagAndValue" should "split an insertion-flagged 
tuple into (true, base tuple)" in {

Review Comment:
   Made the call: opened #4920 to track adding a `materialize(stream): 
Set[Tuple]` (or similar) API as a follow-up. Keeping this PR scoped to testing 
the existing `ProgressiveUtils` surface — adding a new public API plus its 
tests would expand the change beyond what the PR title and existing reviewers 
signed up for. The follow-up issue captures the suggested shape and open 
questions (ordering preserved? handling of out-of-order retractions?) so an 
implementer can pick it up cleanly.
   
   Resolving this thread; flag back if you'd rather have it folded into this PR 
after all.



##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sink/ProgressiveUtilsSpec.scala:
##########
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.sink
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema, 
Tuple}
+import org.scalatest.flatspec.AnyFlatSpec
+
+class ProgressiveUtilsSpec extends AnyFlatSpec {
+
+  // --- helpers 
---------------------------------------------------------------
+
+  private val baseSchema: Schema = new Schema(
+    new Attribute("id", AttributeType.INTEGER),
+    new Attribute("name", AttributeType.STRING)
+  )
+
+  // outputSchema = flag column prepended to baseSchema
+  private val outputSchema: Schema = new Schema(
+    ProgressiveUtils.insertRetractFlagAttr,
+    new Attribute("id", AttributeType.INTEGER),
+    new Attribute("name", AttributeType.STRING)
+  )
+
+  private def baseTuple(id: Int, name: String): Tuple =
+    Tuple
+      .builder(baseSchema)
+      .add(new Attribute("id", AttributeType.INTEGER), Int.box(id))
+      .add(new Attribute("name", AttributeType.STRING), name)
+      .build()
+
+  // --- insertRetractFlagAttr 
-------------------------------------------------
+
+  "ProgressiveUtils.insertRetractFlagAttr" should "be a BOOLEAN attribute 
named __internal_is_insertion" in {
+    val attr = ProgressiveUtils.insertRetractFlagAttr
+    assert(attr.getName == "__internal_is_insertion")
+    assert(attr.getType == AttributeType.BOOLEAN)
+  }
+
+  // --- addInsertionFlag / addRetractionFlag 
----------------------------------
+
+  "ProgressiveUtils.addInsertionFlag" should "prepend the flag column with 
value true" in {
+    val flagged = ProgressiveUtils.addInsertionFlag(baseTuple(1, "alice"), 
outputSchema)
+    assert(flagged.getSchema == outputSchema)
+    
assert(flagged.getField[Boolean](ProgressiveUtils.insertRetractFlagAttr.getName)
 == true)
+    assert(flagged.getField[Integer]("id") == 1)
+    assert(flagged.getField[String]("name") == "alice")
+  }
+
+  "ProgressiveUtils.addRetractionFlag" should "prepend the flag column with 
value false" in {
+    val flagged = ProgressiveUtils.addRetractionFlag(baseTuple(2, "bob"), 
outputSchema)
+    
assert(flagged.getField[Boolean](ProgressiveUtils.insertRetractFlagAttr.getName)
 == false)
+    assert(flagged.getField[Integer]("id") == 2)
+    assert(flagged.getField[String]("name") == "bob")
+  }
+
+  it should "fail an assertion if addRetractionFlag is called on an 
already-flagged tuple" in {
+    val alreadyFlagged = ProgressiveUtils.addInsertionFlag(baseTuple(3, "x"), 
outputSchema)
+    intercept[AssertionError] {
+      ProgressiveUtils.addRetractionFlag(alreadyFlagged, outputSchema)
+    }
+  }
+
+  it should "fail an assertion if addInsertionFlag is called on an 
already-flagged tuple" in {
+    // Symmetric guard: both addInsertionFlag and addRetractionFlag carry the
+    // same `assert(!containsAttribute(flagAttr))` precondition, and either
+    // one may be called on already-flagged data, so each path should fail.
+    val alreadyFlagged = ProgressiveUtils.addRetractionFlag(baseTuple(4, "y"), 
outputSchema)
+    intercept[AssertionError] {
+      ProgressiveUtils.addInsertionFlag(alreadyFlagged, outputSchema)
+    }
+  }
+
+  // --- isInsertion 
-----------------------------------------------------------
+
+  "ProgressiveUtils.isInsertion" should "return true for an unflagged tuple" 
in {
+    // Tuples without the flag column default to insertion (the unflagged
+    // default in the engine is "+").
+    assert(ProgressiveUtils.isInsertion(baseTuple(1, "x")))
+  }
+
+  it should "return true when the flag column is present and true" in {
+    val flagged = ProgressiveUtils.addInsertionFlag(baseTuple(1, "x"), 
outputSchema)
+    assert(ProgressiveUtils.isInsertion(flagged))
+  }
+
+  it should "return false when the flag column is present and false" in {
+    val flagged = ProgressiveUtils.addRetractionFlag(baseTuple(1, "x"), 
outputSchema)
+    assert(!ProgressiveUtils.isInsertion(flagged))
+  }
+
+  // --- getTupleFlagAndValue 
--------------------------------------------------
+
+  "ProgressiveUtils.getTupleFlagAndValue" should "split an insertion-flagged 
tuple into (true, base tuple)" in {
+    val flagged = ProgressiveUtils.addInsertionFlag(baseTuple(1, "alice"), 
outputSchema)
+    val (flag, stripped) = ProgressiveUtils.getTupleFlagAndValue(flagged)
+    assert(flag)
+    assert(stripped.getSchema.getAttributeNames == List("id", "name"))
+    assert(stripped.getField[Integer]("id") == 1)
+    assert(stripped.getField[String]("name") == "alice")
+  }
+
+  it should "split a retraction-flagged tuple into (false, base tuple)" in {
+    val flagged = ProgressiveUtils.addRetractionFlag(baseTuple(2, "bob"), 
outputSchema)
+    val (flag, stripped) = ProgressiveUtils.getTupleFlagAndValue(flagged)
+    assert(!flag)
+    assert(stripped.getField[Integer]("id") == 2)
+    assert(stripped.getField[String]("name") == "bob")
+  }
+
+  it should "treat an unflagged tuple as insertion and pass the original 
schema through unchanged" in {
+    // For a tuple that doesn't carry the flag column, isInsertion returns
+    // true and getPartialSchema returns the same schema (filterNot removes
+    // nothing). The values must round-trip intact.
+    val raw = baseTuple(3, "carol")
+    val (flag, stripped) = ProgressiveUtils.getTupleFlagAndValue(raw)
+    assert(flag)
+    assert(stripped.getSchema.getAttributeNames == List("id", "name"))
+    assert(stripped.getField[Integer]("id") == 3)
+    assert(stripped.getField[String]("name") == "carol")
+  }
+
+  // --- typed payload round-trips --------------------------------------------
+  // Nothing in `addInsertionFlag` / `getTupleFlagAndValue` is type-specific —
+  // they only care about the BOOLEAN flag column they prepend / strip — but
+  // it is worth pinning that arbitrary AttributeType payload columns survive
+  // the flag → strip → unflag round-trip across the engine's value types.
+
+  private def flagRoundTrip(payloadAttr: Attribute, payloadValue: AnyRef): 
(Boolean, AnyRef) = {
+    val payloadSchema = new Schema(payloadAttr)
+    val flaggedSchema = new Schema(ProgressiveUtils.insertRetractFlagAttr, 
payloadAttr)
+    val raw = Tuple.builder(payloadSchema).add(payloadAttr, 
payloadValue).build()
+    val flagged = ProgressiveUtils.addInsertionFlag(raw, flaggedSchema)
+    val (flag, stripped) = ProgressiveUtils.getTupleFlagAndValue(flagged)
+    (flag, stripped.getField[AnyRef](payloadAttr.getName))
+  }
+
+  "Flag round-trip" should "preserve INTEGER payload values" in {
+    val (flag, value) =
+      flagRoundTrip(new Attribute("v", AttributeType.INTEGER), Int.box(42))
+    assert(flag)
+    assert(value == Int.box(42))
+  }
+
+  it should "preserve LONG payload values" in {
+    val (flag, value) =
+      flagRoundTrip(new Attribute("v", AttributeType.LONG), 
Long.box(9876543210L))
+    assert(flag)
+    assert(value == Long.box(9876543210L))
+  }
+
+  it should "preserve DOUBLE payload values" in {
+    val (flag, value) =
+      flagRoundTrip(new Attribute("v", AttributeType.DOUBLE), 
Double.box(3.14159))
+    assert(flag)
+    assert(value == Double.box(3.14159))
+  }
+
+  it should "preserve BOOLEAN payload values (distinct from the flag column)" 
in {
+    // The flag column is also BOOLEAN; this verifies the implementation
+    // selects the correct attribute by name, not by type.
+    val (flag, value) =
+      flagRoundTrip(new Attribute("active", AttributeType.BOOLEAN), 
Boolean.box(false))
+    assert(flag, "outer flag must still be insertion")
+    assert(value == Boolean.box(false), "inner BOOLEAN payload must be 
preserved")
+  }
+
+  it should "preserve TIMESTAMP payload values" in {
+    val ts = new java.sql.Timestamp(1_700_000_000_000L)
+    val (flag, value) =
+      flagRoundTrip(new Attribute("ts", AttributeType.TIMESTAMP), ts)
+    assert(flag)
+    assert(value == ts)
+  }
+
+  it should "preserve BINARY payload values" in {
+    val bytes = Array[Byte](0, 1, 2, 3, -1)
+    val (flag, value) =
+      flagRoundTrip(new Attribute("blob", AttributeType.BINARY), bytes)
+    assert(flag)
+    // Tuple stores the same array reference through the round-trip (no copy
+    // semantics in the flag/strip path), so reference equality is the
+    // observable contract here.
+    assert(value.asInstanceOf[Array[Byte]] eq bytes)

Review Comment:
   Done in c1783d1751 — switched the BINARY assertion from reference equality 
(`eq`) to `sameElements`. The Tuple contract already uses `sameElements` for 
Array[Byte], and reference equality would have over-constrained the flag/strip 
path against any future copy-on-write change.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to