(texera) branch xinyuan-input-source-operator updated: add test cases

linxinyuan Sun, 12 Apr 2026 18:08:20 -0700

This is an automated email from the ASF dual-hosted git repository.

linxinyuan pushed a commit to branch xinyuan-input-source-operator
in repository https://gitbox.apache.org/repos/asf/texera.git



The following commit(s) were added to refs/heads/xinyuan-input-source-operator 
by this push:
     new 2931a6ade0 add test cases
2931a6ade0 is described below

commit 2931a6ade0ff24db860385353a171171aa1c2451
Author: Xinyuan Lin <[email protected]>
AuthorDate: Sun Apr 12 18:07:18 2026 -0700

    add test cases
---
 .../scan/file/InputFileScanSourceOpDescSpec.scala  | 72 ++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/InputFileScanSourceOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/InputFileScanSourceOpDescSpec.scala
new file mode 100644
index 0000000000..596af85bb4
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/InputFileScanSourceOpDescSpec.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.file
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema, 
SchemaEnforceable, Tuple}
+import org.apache.texera.amber.operator.TestOperators
+import org.apache.texera.amber.operator.source.scan.{FileAttributeType, 
FileDecodingMethod}
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.BeforeAndAfter
+import org.scalatest.flatspec.AnyFlatSpec
+
+class InputFileScanSourceOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
+
+  private val inputSchema = new Schema(new Attribute("filename", 
AttributeType.STRING))
+
+  var inputFileScanSourceOpDesc: InputFileScanSourceOpDesc = _
+
+  before {
+    inputFileScanSourceOpDesc = new InputFileScanSourceOpDesc()
+    inputFileScanSourceOpDesc.fileEncoding = FileDecodingMethod.UTF_8
+  }
+
+  it should "infer schema with single column representing each line of text" 
in {
+    val inferredSchema: Schema = inputFileScanSourceOpDesc.sourceSchema()
+
+    assert(inferredSchema.getAttributes.length == 1)
+    assert(inferredSchema.getAttribute("line").getType == AttributeType.STRING)
+  }
+
+  it should "read first 5 lines from the input file path tuple into output 
tuples" in {
+    inputFileScanSourceOpDesc.attributeType = FileAttributeType.STRING
+    inputFileScanSourceOpDesc.fileScanLimit = Option(5)
+
+    val inputTuple = Tuple(inputSchema, 
Array[Any](TestOperators.TestTextFilePath))
+    val inputFileScanSourceOpExec =
+      new 
InputFileScanSourceOpExec(objectMapper.writeValueAsString(inputFileScanSourceOpDesc))
+
+    inputFileScanSourceOpExec.open()
+    val processedTuple: Iterator[Tuple] = inputFileScanSourceOpExec
+      .processTuple(inputTuple, 0)
+      .map(tupleLike =>
+        tupleLike
+          .asInstanceOf[SchemaEnforceable]
+          .enforceSchema(inputFileScanSourceOpDesc.sourceSchema())
+      )
+
+    assert(processedTuple.next().getField("line").equals("line1"))
+    assert(processedTuple.next().getField("line").equals("line2"))
+    assert(processedTuple.next().getField("line").equals("line3"))
+    assert(processedTuple.next().getField("line").equals("line4"))
+    assert(processedTuple.next().getField("line").equals("line5"))
+    
assertThrows[java.util.NoSuchElementException](processedTuple.next().getField("line"))
+    inputFileScanSourceOpExec.close()
+  }
+}

(texera) branch xinyuan-input-source-operator updated: add test cases

Reply via email to