This is an automated email from the ASF dual-hosted git repository.

aglinxinyuan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new 1c6021c543 test(workflow-core): add unit test coverage for TupleUtils 
(#4910)
1c6021c543 is described below

commit 1c6021c543669271f8d441354db6a830ed0446bc
Author: Xinyuan Lin <[email protected]>
AuthorDate: Mon May 4 01:09:58 2026 -0700

    test(workflow-core): add unit test coverage for TupleUtils (#4910)
    
    ### What changes were proposed in this PR?
    
    Adds `TupleUtilsSpec` covering `TupleUtils`
    
(common/workflow-core/src/main/scala/org/apache/texera/amber/core/tuple/TupleUtils.scala).
    Today the only exercise of `tuple2json` / `json2tuple` is a single
    round-trip inside `TupleSpec`; this PR adds dedicated edge-case
    coverage.
    
    The new spec pins:
    - `tuple2json`: emits one JSON field per schema attribute in the
    schema's declared order; `fieldVals` are indexed via
    `schema.getIndex(name)` so a reordered schema selects different slots;
    null field values become JSON null; an empty schema yields an empty JSON
    object.
    - `json2tuple`: schema is inferred from a flat JSON object's keys +
    types; values round-trip with `tuple2json`; non-object roots (e.g. JSON
    arrays) yield an empty tuple rather than silently succeeding with a
    populated one; malformed JSON throws.
    
    No production code changed; this is test-only.
    
    ### Any related issues, documentation, discussions?
    
    Closes #4909
    
    ### How was this PR tested?
    
    Added 8 new unit tests in `TupleUtilsSpec`. Verified locally:
    
    ```
    sbt 'WorkflowCore/Test/testOnly 
org.apache.texera.amber.core.tuple.TupleUtilsSpec'
    # → Tests: succeeded 8, failed 0
    
    sbt 'WorkflowCore/Test/scalafmtCheck'
    # → clean
    ```
    
    ### Was this PR authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code
    
    ---------
    
    Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
 .../texera/amber/core/tuple/TupleUtilsSpec.scala   | 110 +++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git 
a/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala
 
b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala
new file mode 100644
index 0000000000..3ac9a50ac1
--- /dev/null
+++ 
b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.core.tuple
+
+import org.scalatest.flatspec.AnyFlatSpec
+
+import scala.jdk.CollectionConverters._
+
+class TupleUtilsSpec extends AnyFlatSpec {
+
+  // --- tuple2json 
------------------------------------------------------------
+
+  "TupleUtils.tuple2json" should "emit one JSON field per schema attribute, in 
the schema's declared order" in {
+    val schema = new Schema(
+      new Attribute("id", AttributeType.INTEGER),
+      new Attribute("name", AttributeType.STRING)
+    )
+    val node = TupleUtils.tuple2json(schema, Array[Any](Int.box(7), "alice"))
+    // Field iteration order on Jackson ObjectNode follows insertion order,
+    // which mirrors the schema's getAttributeNames order.
+    assert(node.fieldNames().asScala.toList == List("id", "name"))
+    assert(node.get("id").asInt() == 7)
+    assert(node.get("name").asText() == "alice")
+  }
+
+  it should "emit JSON null for null field values" in {
+    val schema = new Schema(new Attribute("v", AttributeType.STRING))
+    val node = TupleUtils.tuple2json(schema, Array[Any](null))
+    assert(node.get("v").isNull)
+  }
+
+  it should "respect schema.getIndex when fieldVals is laid out positionally" 
in {
+    // Re-ordering the schema must change which slot of fieldVals each
+    // attribute pulls from, because tuple2json indexes fieldVals via
+    // schema.getIndex(attrName).
+    val schema = new Schema(
+      new Attribute("b", AttributeType.STRING),
+      new Attribute("a", AttributeType.STRING)
+    )
+    val node = TupleUtils.tuple2json(schema, Array[Any]("first", "second"))
+    assert(node.get("b").asText() == "first")
+    assert(node.get("a").asText() == "second")
+  }
+
+  it should "produce an empty object for an empty schema" in {
+    val node = TupleUtils.tuple2json(new Schema(), Array.empty[Any])
+    assert(node.size() == 0)
+  }
+
+  // --- json2tuple 
------------------------------------------------------------
+
+  "TupleUtils.json2tuple" should "infer a schema from a flat JSON object's 
keys and types" in {
+    val tuple = TupleUtils.json2tuple("""{"name": "bob", "age": 30}""")
+    val names = tuple.getSchema.getAttributeNames.toSet
+    assert(names == Set("name", "age"))
+    assert(tuple.getField[Any]("name") == "bob")
+    // age is parsed via inferSchemaFromRows; the inferred type for "30" is
+    // a numeric type — assert we can read the field rather than locking in
+    // the precise inferred AttributeType.
+    assert(tuple.getField[Any]("age").toString == "30")
+  }
+
+  it should "round-trip a schema-and-values through tuple2json → json2tuple" 
in {
+    val schema = new Schema(
+      new Attribute("city", AttributeType.STRING),
+      new Attribute("score", AttributeType.INTEGER)
+    )
+    val original = TupleUtils.tuple2json(schema, Array[Any]("Irvine", 
Int.box(42))).toString
+    val parsed = TupleUtils.json2tuple(original)
+    val reSerialized =
+      TupleUtils.tuple2json(parsed.getSchema, 
parsed.getFields.toArray.asInstanceOf[Array[Any]])
+    // The exact column order isn't part of the json2tuple contract (it builds
+    // schemaFieldNames from a Set), so compare by JSON-tree equality.
+    val mapper = org.apache.texera.amber.util.JSONUtils.objectMapper
+    assert(mapper.readTree(reSerialized.toString) == mapper.readTree(original))
+  }
+
+  it should "drop non-object roots (e.g. a JSON array) into an empty tuple" in 
{
+    // The implementation only collects fields when the root `isObject`. A
+    // non-object root leaves `fieldNames` empty, so the result is a tuple
+    // over an empty schema with no fields — observed contract is no-throw,
+    // empty result.
+    val tuple = TupleUtils.json2tuple("""[1, 2, 3]""")
+    assert(tuple.getSchema.getAttributes.isEmpty)
+    assert(tuple.getFields.isEmpty)
+  }
+
+  it should "throw when given malformed JSON" in {
+    intercept[Exception] {
+      TupleUtils.json2tuple("{ this is not json }")
+    }
+  }
+}

Reply via email to