stevedlawrence commented on code in PR #1511:
URL: https://github.com/apache/daffodil/pull/1511#discussion_r2230885396
##########
daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/Util.scala:
##########
@@ -65,7 +66,9 @@ object Util {
val scala2Regex = "org\\.scala-lang\\.scala-library-([0-9.]+)\\.jar".r
val versionRegex = ".*-([0-9.]+)\\.jar".r
- val jarFiles =
Paths.get("daffodil-cli/target/universal/stage/lib").toFile.listFiles().toSeq
+ val jarFiles = ArraySeq.unsafeWrapArray(
+ Paths.get("daffodil-cli/target/universal/stage/lib").toFile.listFiles()
+ )
Review Comment:
I would say it's fine to use .toSeq in test code where performance isn't
critical. Readability is probably more important in cases like this, especially
if it avoids an unsafe wrap.
##########
daffodil-core/src/main/scala/org/apache/daffodil/core/dsom/SchemaComponent.scala:
##########
@@ -74,7 +75,9 @@ trait SchemaComponent
val optAttr =
xml.attribute(XMLUtils.EXT_NS_APACHE,
"suppressSchemaDefinitionWarnings").map { _.text }
val warnStrs: Seq[String] =
- optAttr.map { _.trim.split("\\s+").toSeq }.getOrElse { Seq.empty }
+ optAttr.map { w => ArraySeq.unsafeWrapArray(w.trim.split("\\s+"))
}.getOrElse {
+ Seq.empty
+ }
Review Comment:
Previous is probably preferred, not performance critical.
##########
daffodil-codegen-c/src/main/scala/org/apache/daffodil/codegen/c/DaffodilCCodeGenerator.scala:
##########
@@ -217,7 +218,7 @@ class DaffodilCCodeGenerator(root: Root) extends
api.CodeGenerator {
}
val command = commands.find(inPath)
if (command.isDefined)
- command.get.split(' ').toSeq
+ ArraySeq.unsafeWrapArray(command.get.split(' '))
Review Comment:
toSeq is probably preferred here. There's is not performance critical.
##########
daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/InfosetImpl.scala:
##########
@@ -1702,17 +1702,15 @@ sealed class DIComplex(override val erd:
ElementRuntimeData)
getChild(erd.dpathElementCompileInfo.namedQName, tunable)
}
- private def noQuerySupportCheck(nodes: Seq[DINode], nqn: NamedQName): Unit =
{
- if (nodes.length > 1) {
+ private def noQuerySupportCheck(nodes: Iterable[DINode], nqn: NamedQName):
Unit = {
+ if (nodes.size > 1) {
// might be more than one result
// but we have to rule out there being an empty DIArray
- val withoutEmptyArrays = nodes.filter { node =>
- node match {
- case a: DIArray if a.length == 0 => false
- case _ => true
- }
+ val withoutEmptyArrays = nodes.filter {
Review Comment:
This is old code, but this code is really just counting so `filter` is
adding unnecessary overhead since it will create a new list. Since this section
is already being changed, I think we can change the filter to count, e.g.
```scala
val nonEmptyNodesCount = nodes.count {
case a: DIArray if a.length == 0 => false
case _ => true
}
if (nonEmptyNodesCount > 1) ...
```
##########
daffodil-core/src/main/scala/org/apache/daffodil/runtime1/layers/LayerRuntimeCompiler.scala:
##########
@@ -75,10 +76,10 @@ class LayerRuntimeCompiler {
val allMethods = c.getMethods
val optParamSetter = allMethods.find { _.getName == varParamSetter }
val allVarResultGetters =
- ListSet(allMethods.filter { m =>
+ ListSet(ArraySeq.unsafeWrapArray(allMethods.filter { m =>
Review Comment:
toSeq is fine, not performance critical and harder to read without.
##########
daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala:
##########
@@ -422,12 +422,12 @@ class DFDLTestSuite private[tdml] (
val str = (ts \ "@defaultConfig").text
str
}
- lazy val defaultImplementations = {
+ lazy val defaultImplementations: Array[String] = {
val str = (ts \ "@defaultImplementations").text
if (str == "") defaultImplementationsDefault
else {
// parse the str to get a list of strings
- str.split("""\s+""").toSeq
Review Comment:
.toSeq is fine here for readability.
##########
daffodil-core/src/main/scala/org/apache/daffodil/runtime1/udf/UserDefinedFunctionService.scala:
##########
@@ -135,9 +135,9 @@ object UserDefinedFunctionService {
Logger.log.warn(
s"User Defined Function Provider ignored:
${provider.getClass.getName}. No User Defined Functions found."
)
- Seq()
+ Array.empty[Class[_]]
} else {
- functionClasses.toSeq
Review Comment:
.toSeq is probably fine here. Not performance critical and more clear.
##########
daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/InfosetImpl.scala:
##########
@@ -1892,12 +1890,12 @@ sealed class DIComplex(override val erd:
ElementRuntimeData)
* @return
*/
def findChild(qname: NamedQName, enableLinearSearchIfNotFound: Boolean):
Maybe[DINode] = {
- val fastSeq = nameToChildNodeLookup.get(qname)
- if (fastSeq != null) {
+ val fastBuf = nameToChildNodeLookup.get(qname)
+ if (fastBuf != null) {
// Daffodil does not support query expressions yet, so there should only
// be one item in the list
- noQuerySupportCheck(fastSeq.toSeq, qname)
Review Comment:
This is a good change. This section is all about being as fast as possible
during runtime, so avoiding the toSeq here might have some value and doesn't
hurt readability.
##########
daffodil-codegen-c/src/main/scala/org/apache/daffodil/codegen/c/DaffodilCCodeGenerator.scala:
##########
@@ -124,7 +125,7 @@ class DaffodilCCodeGenerator(root: Root) extends
api.CodeGenerator {
val cgState = new CodeGeneratorState(root)
DaffodilCCodeGenerator.generateCode(root.document, cgState)
diagnostics = new java.util.LinkedList[api.Diagnostic](diagnostics)
Review Comment:
Mentioned in another PR, but I'm not sure LinkedList the best choice for
diagnostics due to it's mutability. I think maybe the best approach might be to
use Seq internally for diagnostics (which gives us constant prepend and
immutability), and only when API users call getDiagnostics do we call .asJava
to wrap it with a more Java-friendly java.util.List.
Then this code would avoid asJava completely and just do a normal Seq
prepend.
##########
daffodil-core/src/main/scala/org/apache/daffodil/lib/util/SimpleNamedServiceLoader.scala:
##########
@@ -66,19 +67,16 @@ object SimpleNamedServiceLoader {
}
}
}
- val instancesFound: Map[String, Seq[T]] = instanceBuf.toSeq.groupBy {
_.name() }
- val instanceMap: java.util.Map[String, T] = instancesFound.toSeq
- .flatMap {
- case (name, Seq(lc)) => Some((name, lc))
- case (name, seq) => {
- Logger.log.warn(
- s"Duplicate classes for $thingName found. Ignored: ${seq.map {
Misc.getNameFromClass(_) }.mkString(", ")}."
- )
- None
- }
+ val instancesFound: Map[String, mutable.Buffer[T]] = instanceBuf.groupBy {
_.name() }
Review Comment:
I think .toSeq is fine here. This isn't performance critical. It's also
kindof nice since it switches to immutable data structure once we have found
all the instances.
##########
daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/RunnerFactory.scala:
##########
@@ -50,8 +49,7 @@ object Runner {
compileAllTopLevel: Boolean = false,
defaultRoundTripDefault: RoundTrip = defaultRoundTripDefaultDefault,
defaultValidationDefault: String = defaultValidationDefaultDefault,
- defaultImplementationsDefault: Seq[String] =
- ArraySeq.unsafeWrapArray(defaultImplementationsDefaultDefault)
+ defaultImplementationsDefault: Array[String] =
defaultImplementationsDefaultDefault
Review Comment:
This isn't performance critical, I'd suggest we change this back to a
Seq[String], and change defaultImmplementationsDefaultDefault to do something
like:
```scala
def defaultImplementationsDefaultDefault =
TDMLImplementation.values.map(_.toString).toSeq
```
The .toSeq is not a big deal in this case and it avoids the unsafewraparray.
##########
daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/InfosetImpl.scala:
##########
@@ -1702,17 +1702,15 @@ sealed class DIComplex(override val erd:
ElementRuntimeData)
getChild(erd.dpathElementCompileInfo.namedQName, tunable)
}
- private def noQuerySupportCheck(nodes: Seq[DINode], nqn: NamedQName): Unit =
{
- if (nodes.length > 1) {
+ private def noQuerySupportCheck(nodes: Iterable[DINode], nqn: NamedQName):
Unit = {
Review Comment:
Suggest we make this `ArrayBuffer[DINode]`. The fast lookup stuff
intentionally uses ArrayBuffers for performance, so let's be explicit about
this to avoid ever accidentally adding back a .toSeq.
##########
daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala:
##########
@@ -414,21 +418,21 @@ class DaffodilTDMLDFDLProcessor private (private var dp:
api.DataProcessor)
}
private def verifySameDiagnostics(
- seqDiagExpected: Seq[String],
- seqDiagActual: Seq[String]
+ expected: java.util.List[String],
+ actual: java.util.List[String]
Review Comment:
Suggest we stick with Seq. More readable, and this is already going to be
slow so not a big deal.
##########
daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala:
##########
@@ -319,8 +319,10 @@ class DaffodilTDMLDFDLProcessor private (private var dp:
api.DataProcessor)
if (!actual.isError && !errorHandler.isError) {
verifySameParseOutput(outputter.xmlStream, saxOutputStream)
}
- val dpParseDiag =
actual.getDiagnostics.asScala.map(_.toString()).toSeq
- val saxParseDiag =
errorHandler.getDiagnostics.asScala.map(_.toString()).toSeq
+ val dpParseDiag = new java.util.ArrayList[String]
+ actual.getDiagnostics.forEach(d => dpParseDiag.add(d.toString))
+ val saxParseDiag = new java.util.ArrayList[String]
+ errorHandler.getDiagnostics.forEach(d =>
saxParseDiag.add(d.toString))
Review Comment:
I think the previous code is fine and more readable here. This code is only
used for regression testing when DAFFODIL_TDML_API_INFOSETS is "all". When that
is enabled, we're going to parse with all infosets which will have a lots of
extra overhead, so this little bit of .toSeq doesn't matter. Readability is
more important here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]