This is an automated email from the ASF dual-hosted git repository.
aglinxinyuan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new 1c6021c543 test(workflow-core): add unit test coverage for TupleUtils
(#4910)
1c6021c543 is described below
commit 1c6021c543669271f8d441354db6a830ed0446bc
Author: Xinyuan Lin <[email protected]>
AuthorDate: Mon May 4 01:09:58 2026 -0700
test(workflow-core): add unit test coverage for TupleUtils (#4910)
### What changes were proposed in this PR?
Adds `TupleUtilsSpec` covering `TupleUtils`
(common/workflow-core/src/main/scala/org/apache/texera/amber/core/tuple/TupleUtils.scala).
Today the only exercise of `tuple2json` / `json2tuple` is a single
round-trip inside `TupleSpec`; this PR adds dedicated edge-case
coverage.
The new spec pins:
- `tuple2json`: emits one JSON field per schema attribute in the
schema's declared order; `fieldVals` are indexed via
`schema.getIndex(name)` so a reordered schema selects different slots;
null field values become JSON null; an empty schema yields an empty JSON
object.
- `json2tuple`: schema is inferred from a flat JSON object's keys +
types; values round-trip with `tuple2json`; non-object roots (e.g. JSON
arrays) yield an empty tuple rather than silently succeeding with a
populated one; malformed JSON throws.
No production code changed; this is test-only.
### Any related issues, documentation, discussions?
Closes #4909
### How was this PR tested?
Added 8 new unit tests in `TupleUtilsSpec`. Verified locally:
```
sbt 'WorkflowCore/Test/testOnly
org.apache.texera.amber.core.tuple.TupleUtilsSpec'
# → Tests: succeeded 8, failed 0
sbt 'WorkflowCore/Test/scalafmtCheck'
# → clean
```
### Was this PR authored or co-authored using generative AI tooling?
Generated-by: Claude Code
---------
Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
.../texera/amber/core/tuple/TupleUtilsSpec.scala | 110 +++++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git
a/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala
b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala
new file mode 100644
index 0000000000..3ac9a50ac1
--- /dev/null
+++
b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.core.tuple
+
+import org.scalatest.flatspec.AnyFlatSpec
+
+import scala.jdk.CollectionConverters._
+
+class TupleUtilsSpec extends AnyFlatSpec {
+
+ // --- tuple2json
------------------------------------------------------------
+
+ "TupleUtils.tuple2json" should "emit one JSON field per schema attribute, in
the schema's declared order" in {
+ val schema = new Schema(
+ new Attribute("id", AttributeType.INTEGER),
+ new Attribute("name", AttributeType.STRING)
+ )
+ val node = TupleUtils.tuple2json(schema, Array[Any](Int.box(7), "alice"))
+ // Field iteration order on Jackson ObjectNode follows insertion order,
+ // which mirrors the schema's getAttributeNames order.
+ assert(node.fieldNames().asScala.toList == List("id", "name"))
+ assert(node.get("id").asInt() == 7)
+ assert(node.get("name").asText() == "alice")
+ }
+
+ it should "emit JSON null for null field values" in {
+ val schema = new Schema(new Attribute("v", AttributeType.STRING))
+ val node = TupleUtils.tuple2json(schema, Array[Any](null))
+ assert(node.get("v").isNull)
+ }
+
+ it should "respect schema.getIndex when fieldVals is laid out positionally"
in {
+ // Re-ordering the schema must change which slot of fieldVals each
+ // attribute pulls from, because tuple2json indexes fieldVals via
+ // schema.getIndex(attrName).
+ val schema = new Schema(
+ new Attribute("b", AttributeType.STRING),
+ new Attribute("a", AttributeType.STRING)
+ )
+ val node = TupleUtils.tuple2json(schema, Array[Any]("first", "second"))
+ assert(node.get("b").asText() == "first")
+ assert(node.get("a").asText() == "second")
+ }
+
+ it should "produce an empty object for an empty schema" in {
+ val node = TupleUtils.tuple2json(new Schema(), Array.empty[Any])
+ assert(node.size() == 0)
+ }
+
+ // --- json2tuple
------------------------------------------------------------
+
+ "TupleUtils.json2tuple" should "infer a schema from a flat JSON object's
keys and types" in {
+ val tuple = TupleUtils.json2tuple("""{"name": "bob", "age": 30}""")
+ val names = tuple.getSchema.getAttributeNames.toSet
+ assert(names == Set("name", "age"))
+ assert(tuple.getField[Any]("name") == "bob")
+ // age is parsed via inferSchemaFromRows; the inferred type for "30" is
+ // a numeric type — assert we can read the field rather than locking in
+ // the precise inferred AttributeType.
+ assert(tuple.getField[Any]("age").toString == "30")
+ }
+
+ it should "round-trip a schema-and-values through tuple2json → json2tuple"
in {
+ val schema = new Schema(
+ new Attribute("city", AttributeType.STRING),
+ new Attribute("score", AttributeType.INTEGER)
+ )
+ val original = TupleUtils.tuple2json(schema, Array[Any]("Irvine",
Int.box(42))).toString
+ val parsed = TupleUtils.json2tuple(original)
+ val reSerialized =
+ TupleUtils.tuple2json(parsed.getSchema,
parsed.getFields.toArray.asInstanceOf[Array[Any]])
+ // The exact column order isn't part of the json2tuple contract (it builds
+ // schemaFieldNames from a Set), so compare by JSON-tree equality.
+ val mapper = org.apache.texera.amber.util.JSONUtils.objectMapper
+ assert(mapper.readTree(reSerialized.toString) == mapper.readTree(original))
+ }
+
+ it should "drop non-object roots (e.g. a JSON array) into an empty tuple" in
{
+ // The implementation only collects fields when the root `isObject`. A
+ // non-object root leaves `fieldNames` empty, so the result is a tuple
+ // over an empty schema with no fields — observed contract is no-throw,
+ // empty result.
+ val tuple = TupleUtils.json2tuple("""[1, 2, 3]""")
+ assert(tuple.getSchema.getAttributes.isEmpty)
+ assert(tuple.getFields.isEmpty)
+ }
+
+ it should "throw when given malformed JSON" in {
+ intercept[Exception] {
+ TupleUtils.json2tuple("{ this is not json }")
+ }
+ }
+}