mengw15 commented on code in PR #4375:
URL: https://github.com/apache/texera/pull/4375#discussion_r3077121609
##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/csv/CSVScanSourceOpDescSpec.scala:
##########
@@ -128,4 +128,27 @@ class CSVScanSourceOpDescSpec extends AnyFlatSpec with
BeforeAndAfter {
)
}
+ it should "use comma as the default delimiter when customDelimiter is not
set for parallel CSV" in {
Review Comment:
Please add symmetric tests for CSVScanSourceOpDesc as well
##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/csv/CSVScanSourceOpDescSpec.scala:
##########
@@ -128,4 +128,27 @@ class CSVScanSourceOpDescSpec extends AnyFlatSpec with
BeforeAndAfter {
)
}
+ it should "use comma as the default delimiter when customDelimiter is not
set for parallel CSV" in {
+
+ // When no customDelimiter is provided (None), getPhysicalOp must not
throw a
+ // NoSuchElementException. It should fall back to the default comma
delimiter.
+ parallelCsvScanSourceOpDesc.customDelimiter = None
+
+ // Should not throw
+ parallelCsvScanSourceOpDesc.getPhysicalOp(DEFAULT_WORKFLOW_ID,
DEFAULT_EXECUTION_ID)
+
+ // Default delimiter should have been applied
+ assert(parallelCsvScanSourceOpDesc.customDelimiter.contains(","))
+ }
+
+ it should "use comma as the default delimiter when customDelimiter is empty
string for parallel CSV" in {
+
+ // An explicitly empty delimiter should also fall back to the default
comma.
Review Comment:
same here, please remove the redundant comment here
##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/csv/CSVScanSourceOpDescSpec.scala:
##########
@@ -128,4 +128,27 @@ class CSVScanSourceOpDescSpec extends AnyFlatSpec with
BeforeAndAfter {
)
}
+ it should "use comma as the default delimiter when customDelimiter is not
set for parallel CSV" in {
+
+ // When no customDelimiter is provided (None), getPhysicalOp must not
throw a
+ // NoSuchElementException. It should fall back to the default comma
delimiter.
+ parallelCsvScanSourceOpDesc.customDelimiter = None
+
+ // Should not throw
Review Comment:
Please either remove this comment or replace it with an assertion.
##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/csv/CSVScanSourceOpDescSpec.scala:
##########
@@ -128,4 +128,27 @@ class CSVScanSourceOpDescSpec extends AnyFlatSpec with
BeforeAndAfter {
)
}
+ it should "use comma as the default delimiter when customDelimiter is not
set for parallel CSV" in {
+
+ // When no customDelimiter is provided (None), getPhysicalOp must not
throw a
Review Comment:
The test is already called "use comma as the default delimiter when
customDelimiter is not set for parallel CSV", which states the behavior under
test. I would suggest remove the redundant comments here
##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/csv/CSVScanSourceOpDescSpec.scala:
##########
@@ -128,4 +128,27 @@ class CSVScanSourceOpDescSpec extends AnyFlatSpec with
BeforeAndAfter {
)
}
+ it should "use comma as the default delimiter when customDelimiter is not
set for parallel CSV" in {
+
+ // When no customDelimiter is provided (None), getPhysicalOp must not
throw a
+ // NoSuchElementException. It should fall back to the default comma
delimiter.
+ parallelCsvScanSourceOpDesc.customDelimiter = None
+
+ // Should not throw
+ parallelCsvScanSourceOpDesc.getPhysicalOp(DEFAULT_WORKFLOW_ID,
DEFAULT_EXECUTION_ID)
+
+ // Default delimiter should have been applied
Review Comment:
ditto
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]