This is an automated email from the ASF dual-hosted git repository.
linxinyuan pushed a commit to branch xinyuan-input-source-operator
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/xinyuan-input-source-operator
by this push:
new 2931a6ade0 add test cases
2931a6ade0 is described below
commit 2931a6ade0ff24db860385353a171171aa1c2451
Author: Xinyuan Lin <[email protected]>
AuthorDate: Sun Apr 12 18:07:18 2026 -0700
add test cases
---
.../scan/file/InputFileScanSourceOpDescSpec.scala | 72 ++++++++++++++++++++++
1 file changed, 72 insertions(+)
diff --git
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/InputFileScanSourceOpDescSpec.scala
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/InputFileScanSourceOpDescSpec.scala
new file mode 100644
index 0000000000..596af85bb4
--- /dev/null
+++
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/InputFileScanSourceOpDescSpec.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.file
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema,
SchemaEnforceable, Tuple}
+import org.apache.texera.amber.operator.TestOperators
+import org.apache.texera.amber.operator.source.scan.{FileAttributeType,
FileDecodingMethod}
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.BeforeAndAfter
+import org.scalatest.flatspec.AnyFlatSpec
+
+class InputFileScanSourceOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
+
+ private val inputSchema = new Schema(new Attribute("filename",
AttributeType.STRING))
+
+ var inputFileScanSourceOpDesc: InputFileScanSourceOpDesc = _
+
+ before {
+ inputFileScanSourceOpDesc = new InputFileScanSourceOpDesc()
+ inputFileScanSourceOpDesc.fileEncoding = FileDecodingMethod.UTF_8
+ }
+
+ it should "infer schema with single column representing each line of text"
in {
+ val inferredSchema: Schema = inputFileScanSourceOpDesc.sourceSchema()
+
+ assert(inferredSchema.getAttributes.length == 1)
+ assert(inferredSchema.getAttribute("line").getType == AttributeType.STRING)
+ }
+
+ it should "read first 5 lines from the input file path tuple into output
tuples" in {
+ inputFileScanSourceOpDesc.attributeType = FileAttributeType.STRING
+ inputFileScanSourceOpDesc.fileScanLimit = Option(5)
+
+ val inputTuple = Tuple(inputSchema,
Array[Any](TestOperators.TestTextFilePath))
+ val inputFileScanSourceOpExec =
+ new
InputFileScanSourceOpExec(objectMapper.writeValueAsString(inputFileScanSourceOpDesc))
+
+ inputFileScanSourceOpExec.open()
+ val processedTuple: Iterator[Tuple] = inputFileScanSourceOpExec
+ .processTuple(inputTuple, 0)
+ .map(tupleLike =>
+ tupleLike
+ .asInstanceOf[SchemaEnforceable]
+ .enforceSchema(inputFileScanSourceOpDesc.sourceSchema())
+ )
+
+ assert(processedTuple.next().getField("line").equals("line1"))
+ assert(processedTuple.next().getField("line").equals("line2"))
+ assert(processedTuple.next().getField("line").equals("line3"))
+ assert(processedTuple.next().getField("line").equals("line4"))
+ assert(processedTuple.next().getField("line").equals("line5"))
+
assertThrows[java.util.NoSuchElementException](processedTuple.next().getField("line"))
+ inputFileScanSourceOpExec.close()
+ }
+}