This is an automated email from the ASF dual-hosted git repository.
linxinyuan pushed a commit to branch xinyuan-dataset-selector
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/xinyuan-dataset-selector by
this push:
new 79f48fa301 update
79f48fa301 is described below
commit 79f48fa301604199d6664209bb03e3016816f08f
Author: Xinyuan Lin <[email protected]>
AuthorDate: Mon Apr 13 16:25:29 2026 -0700
update
---
.../scala/org/apache/texera/amber/operator/LogicalOp.scala | 4 ++--
...ectorSourceOpDesc.scala => FileListerSourceOpDesc.scala} | 6 +++---
...ectorSourceOpExec.scala => FileListerSourceOpExec.scala} | 6 +++---
...rceOpDescSpec.scala => FileListerSourceOpDescSpec.scala} | 10 +++++-----
.../operator_images/{DatasetSelector.png => FileLister.png} | Bin
5 files changed, 13 insertions(+), 13 deletions(-)
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
index 7ccbb073d6..3a0f0a5c4b 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
@@ -75,7 +75,7 @@ import
org.apache.texera.amber.operator.source.apis.twitter.v2.{
TwitterFullArchiveSearchSourceOpDesc,
TwitterSearchSourceOpDesc
}
-import
org.apache.texera.amber.operator.source.dataset.DatasetSelectorSourceOpDesc
+import org.apache.texera.amber.operator.source.dataset.FileListerSourceOpDesc
import org.apache.texera.amber.operator.source.fetcher.URLFetcherOpDesc
import org.apache.texera.amber.operator.source.scan.FileScanSourceOpDesc
import org.apache.texera.amber.operator.source.scan.arrow.ArrowSourceOpDesc
@@ -159,7 +159,7 @@ trait StateTransferFunc
new Type(value = classOf[IfOpDesc], name = "If"),
new Type(value = classOf[SankeyDiagramOpDesc], name = "SankeyDiagram"),
new Type(value = classOf[IcicleChartOpDesc], name = "IcicleChart"),
- new Type(value = classOf[DatasetSelectorSourceOpDesc], name =
"DatasetSelector"),
+ new Type(value = classOf[FileListerSourceOpDesc], name = "FileLister"),
new Type(value = classOf[CSVScanSourceOpDesc], name = "CSVFileScan"),
// disabled the ParallelCSVScanSourceOpDesc so that it does not confuse
user. it can be re-enabled when doing experiments.
// new Type(value = classOf[ParallelCSVScanSourceOpDesc], name =
"ParallelCSVFileScan"),
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/DatasetSelectorSourceOpDesc.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/FileListerSourceOpDesc.scala
similarity index 93%
rename from
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/DatasetSelectorSourceOpDesc.scala
rename to
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/FileListerSourceOpDesc.scala
index 386d26e1e0..1101dce7e0 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/DatasetSelectorSourceOpDesc.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/FileListerSourceOpDesc.scala
@@ -29,7 +29,7 @@ import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants,
OperatorInfo}
import org.apache.texera.amber.util.JSONUtils.objectMapper
-class DatasetSelectorSourceOpDesc extends LogicalOp {
+class FileListerSourceOpDesc extends LogicalOp {
@JsonProperty(required = true)
@JsonSchemaTitle("Dataset")
@@ -45,7 +45,7 @@ class DatasetSelectorSourceOpDesc extends LogicalOp {
executionId,
operatorIdentifier,
OpExecWithClassName(
-
"org.apache.texera.amber.operator.source.dataset.DatasetSelectorSourceOpExec",
+
"org.apache.texera.amber.operator.source.dataset.FileListerSourceOpExec",
objectMapper.writeValueAsString(this)
)
)
@@ -59,7 +59,7 @@ class DatasetSelectorSourceOpDesc extends LogicalOp {
override def operatorInfo: OperatorInfo =
OperatorInfo(
- userFriendlyName = "Dataset Selector",
+ userFriendlyName = "File Lister",
operatorDescription = "Select a dataset version and output one filename
tuple per file",
operatorGroupName = OperatorGroupConstants.INPUT_GROUP,
inputPorts = List.empty,
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/DatasetSelectorSourceOpExec.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/FileListerSourceOpExec.scala
similarity index 91%
rename from
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/DatasetSelectorSourceOpExec.scala
rename to
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/FileListerSourceOpExec.scala
index 6a53da4767..f715a0943f 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/DatasetSelectorSourceOpExec.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/dataset/FileListerSourceOpExec.scala
@@ -28,10 +28,10 @@ import
org.apache.texera.dao.jooq.generated.tables.Dataset.DATASET
import
org.apache.texera.dao.jooq.generated.tables.DatasetVersion.DATASET_VERSION
import org.apache.texera.dao.jooq.generated.tables.User.USER
-class DatasetSelectorSourceOpExec private[dataset] (descString: String)
+class FileListerSourceOpExec private[dataset] (descString: String)
extends SourceOperatorExecutor {
- private val desc: DatasetSelectorSourceOpDesc =
- objectMapper.readValue(descString, classOf[DatasetSelectorSourceOpDesc])
+ private val desc: FileListerSourceOpDesc =
+ objectMapper.readValue(descString, classOf[FileListerSourceOpDesc])
override def produceTuple(): Iterator[TupleLike] = {
val Seq(_, ownerEmail, datasetName, versionName) =
diff --git
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/dataset/DatasetSelectorSourceOpDescSpec.scala
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/dataset/FileListerSourceOpDescSpec.scala
similarity index 81%
rename from
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/dataset/DatasetSelectorSourceOpDescSpec.scala
rename to
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/dataset/FileListerSourceOpDescSpec.scala
index 5ef53fb8d6..a5aa744ff5 100644
---
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/dataset/DatasetSelectorSourceOpDescSpec.scala
+++
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/dataset/FileListerSourceOpDescSpec.scala
@@ -22,10 +22,10 @@ package org.apache.texera.amber.operator.source.dataset
import org.apache.texera.amber.core.tuple.AttributeType
import org.scalatest.flatspec.AnyFlatSpec
-class DatasetSelectorSourceOpDescSpec extends AnyFlatSpec {
+class FileListerSourceOpDescSpec extends AnyFlatSpec {
- "DatasetSelectorSourceOpDesc" should "expose a filename output column" in {
- val opDesc = new DatasetSelectorSourceOpDesc()
+ "FileListerSourceOpDesc" should "expose a filename output column" in {
+ val opDesc = new FileListerSourceOpDesc()
val outputSchema = opDesc.getExternalOutputSchemas(Map.empty).values.head
@@ -34,9 +34,9 @@ class DatasetSelectorSourceOpDescSpec extends AnyFlatSpec {
}
it should "use the expected operator metadata" in {
- val opDesc = new DatasetSelectorSourceOpDesc()
+ val opDesc = new FileListerSourceOpDesc()
- assert(opDesc.operatorInfo.userFriendlyName == "Dataset Selector")
+ assert(opDesc.operatorInfo.userFriendlyName == "File Lister")
assert(opDesc.operatorInfo.inputPorts.isEmpty)
assert(opDesc.operatorInfo.outputPorts.length == 1)
}
diff --git a/frontend/src/assets/operator_images/DatasetSelector.png
b/frontend/src/assets/operator_images/FileLister.png
similarity index 100%
rename from frontend/src/assets/operator_images/DatasetSelector.png
rename to frontend/src/assets/operator_images/FileLister.png