This is an automated email from the ASF dual-hosted git repository. slawrence pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/master by this push: new ae9ebd3 Improve path expression error handling ae9ebd3 is described below commit ae9ebd391aacad320917b60a34936a066acd072e Author: Steve Lawrence <slawre...@apache.org> AuthorDate: Tue Aug 17 11:01:50 2021 -0400 Improve path expression error handling - The context of a dfdl:occursCountKind expression evaluation is actually not the element that the property lives on, but on the elements parent. This is because the element isn't created until the number of occurrences are known. This means that such expressions must either be absolute, or start with an ".." upward move. We previously had an assertion that checked this, but there's nothing actually preventing expressions without upward moves. This assertion is instead changed to an SDE. - The conversions for path expressions were applied only on the last downward step. But this meant that if an expression path was made up of only upward steps, no conversions would occur. Such expressions are rarely reasonable, but are legal, and should usually result in an error. But because no conversions happened, where certain types of error checks occur, it meant it was possible to miss certain compile-time convertability checks, allowing for expressions to fail at runtime with a NullPointerExceptions. This modifies conversions to be applied to the entire path, instead of on the last downward step. This doesn't change what conversions are applied, only ensures that they are always applied. - Additional refactorings related to path steps to make it a little more clear what's going on. DAFFODIL-2553 --- .../org/apache/daffodil/dpath/Expression.scala | 90 +++++++------ .../section23/dfdl_expressions/expressions.tdml | 140 ++++++++++++++++++++- .../dfdl_expressions/TestDFDLExpressions.scala | 4 + 3 files changed, 186 insertions(+), 48 deletions(-) diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/dpath/Expression.scala b/daffodil-core/src/main/scala/org/apache/daffodil/dpath/Expression.scala index 0aa0616..fa44ca0 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/dpath/Expression.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/dpath/Expression.scala @@ -716,7 +716,7 @@ case class UnaryExpression(op: String, exp: Expression) abstract class PathExpression() extends Expression { - def steps: Seq[StepExpression] + def steps: List[StepExpression] override def text = steps.map { _.text }.mkString("/") @@ -753,7 +753,7 @@ case class RootPathExpression(relPath: Option[RelativePathExpression]) override def text = "/" + super.text - lazy val steps = relPath.map { _.steps }.getOrElse(Nil) + override lazy val steps = relPath.map { _.steps }.getOrElse(Nil) override lazy val inherentType = { if (!(steps == Nil)) steps.last.inherentType @@ -779,63 +779,63 @@ case class RootPathExpression(relPath: Option[RelativePathExpression]) override lazy val compiledDPath = { val rel = relPath.map { rp => rp.compiledDPath.ops.toList }.getOrElse(Nil) + // note that no conversion is needed here since the compiledDPath of the + // relative path includes the necessary conversions val cdp = new CompiledDPath(ToRoot +: rel) cdp } } -case class RelativePathExpression(steps1: List[StepExpression], isEvaluatedAbove: Boolean) +case class RelativePathExpression(stepsRaw: List[StepExpression], isEvaluatedAbove: Boolean) extends PathExpression() { lazy val isAbsolutePath = { parent != null && parent.isInstanceOf[RootPathExpression] } - /** - * We must adjust the steps for the isEvaluatedAbove case. - * That's when something like dfdl:occursCount is written as { ../c }. - * In that case, it's written on an element decl as if it were accessing a - * peer, but in fact the expression is evaluated before any instances of - * the array are even allocated, so we must remove an ".." up move from - * every relative path contained inside the expression, except when - * part of an absolute path. - */ - private lazy val adjustedSteps = { - if (parent.isInstanceOf[RootPathExpression]) steps2 - else { - // not an absolute path. - if (isEvaluatedAbove) { - // in this case, the relative path must begin with ".." to be - // meaningful. - Assert.invariant(steps2(0).isInstanceOf[Up]) - steps2.tail // trim off the UP move at the start. - } else steps2 - } - } - override lazy val compiledDPath: CompiledDPath = { - val cps = adjustedSteps.map { - _.compiledDPath - } - val ops = cps.map { - _.ops.toList + + val stepsToEvaluate = { + if (parent.isInstanceOf[RootPathExpression] || !isEvaluatedAbove) steps + else { + // This expression a relative expression that is actually evaluated + // above the element it is defined on. This means it's something like + // dfdl:occursCount with an expression like { ../c }. In cases like + // this, the property is written on an element decl as if it were + // accessing a peer, but in fact the expression is evaluated before any + // instances of the array are even allocated, so the first up ".." move + // in this relative path is implied and so should not actually be + // evaluated. Such an expression must begin with an upward step to even + // be considered valid, so we also error if it doesn't exist. + if (!steps(0).isInstanceOf[Up]) { + SDE("""Path expression must be absolute or begin with a "../" upward step: %s""", this.text) + } + steps.tail + } } - val res = new CompiledDPath(ops.flatten) + + // All the steps are individual CompiledDPaths, let's optmize those out and + // just create a single CompiledDpath that contains all those flattened + // operations. + val ops = stepsToEvaluate.flatMap { _.compiledDPath.ops } + + // add the appropriate conversions based on the inherent type of this path + // expression. Individual steps do not need a conversion, we only need to + // convert the result of the path expression + val res = new CompiledDPath(ops ++ conversions) res } override lazy val steps = { - steps2 - } - - // remove any spurious "." in relative paths so "./.././.." becomes "../.." - // corner case "././././." should behave like "." - val steps2 = { - val noSelfSteps = steps1.filter { case Self(None) => false; case _ => true } - val res = - if (noSelfSteps.length == 0) List(Self(None)) // we need one "." - else noSelfSteps - res + // Optimize out all self steps, since those don't change the expression at all + val noSelfSteps = stepsRaw.filter { case Self(None) => false; case _ => true } + if (noSelfSteps.length == 0) { + // If this path expression was all self steps, all steps were removed. We + // still need at least one step, so replace it with a self step + List(Self(None)) + } else { + noSelfSteps + } } override lazy val children: List[Expression] = steps @@ -1051,7 +1051,7 @@ sealed abstract class DownStepExpression(s: String, predArg: Option[PredicateExp sealed abstract class SelfStepExpression(s: String, predArg: Option[PredicateExpression]) extends DownStepExpression(s, predArg) { - override lazy val compiledDPath = new CompiledDPath(SelfMove +: conversions) + override lazy val compiledDPath = new CompiledDPath(SelfMove) override def text = "." protected def stepElementDefs: Seq[DPathElementCompileInfo] = { @@ -1149,9 +1149,7 @@ case class NamedStep(s: String, predArg: Option[PredicateExpression]) override lazy val compiledDPath = { val d = downwardStep - val conv = conversions - val c = (if (isLastStep) conv else Nil) - val res = new CompiledDPath(d +: c) + val res = new CompiledDPath(d) res } diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml index 7a68085..9b99d35 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml @@ -1314,7 +1314,67 @@ </xs:sequence> </xs:complexType> </xs:element> - + + <xs:element name="ocke5"> + <xs:complexType> + <xs:sequence> + <xs:element name="arr1" maxOccurs="unbounded"> + <xs:complexType> + <xs:sequence> + <xs:element name="field1" type="xs:string" /> + </xs:sequence> + </xs:complexType> + </xs:element> + <xs:element name="arr2" type="xs:string" maxOccurs="unbounded" + dfdl:occursCountKind="expression" + dfdl:occursCount="{ xs:unsignedLong(does/not/exist) }" /> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="ocke6"> + <xs:complexType> + <xs:sequence> + <xs:element name="arr1" maxOccurs="unbounded"> + <xs:complexType> + <xs:sequence> + <xs:element name="field1" type="xs:string" /> + </xs:sequence> + </xs:complexType> + </xs:element> + <xs:element name="arr2" type="xs:string" maxOccurs="unbounded" + dfdl:occursCountKind="expression" + dfdl:occursCount="{ xs:unsignedLong(../ex:arr1[does/not/exist]/field1) }" /> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="ocke7"> + <xs:complexType> + <xs:sequence> + <xs:element name="arr2" maxOccurs="unbounded" + dfdl:occursCountKind="expression" + dfdl:occursCount="{ xs:unsignedLong(ex:count) }"> + <xs:complexType> + <xs:sequence> + <xs:element name="count" type="xs:int" /> + </xs:sequence> + </xs:complexType> + </xs:element> + </xs:sequence> + </xs:complexType> + </xs:element> + + <xs:element name="ocke8"> + <xs:complexType> + <xs:sequence> + <xs:element name="arr" type="xs:string" maxOccurs="unbounded" + dfdl:occursCountKind="expression" + dfdl:occursCount="{ xs:unsignedLong(..) }" /> + </xs:sequence> + </xs:complexType> + </xs:element> + <xs:element name="expr_space" dfdl:lengthKind="implicit"> <xs:complexType> <xs:sequence dfdl:separator=","> @@ -1627,7 +1687,83 @@ </tdml:dfdlInfoset> </tdml:infoset> </tdml:parserTestCase> - + + <!-- + Test Name: ocke_step_dne + Schema: expressions-Embedded.dfdl.xsd + Purpose: This test demonstrates that we get an appropriate error message when an + occursCountKind expression references a non existent path step + --> + <tdml:parserTestCase name="ocke_step_dne" root="ocke5" + model="expressions-Embedded.dfdl.xsd" description="occursCountKind expression - DFDL-23-011R"> + + <tdml:document><![CDATA[]]></tdml:document> + <tdml:errors> + <tdml:error>Schema Definition Error</tdml:error> + <tdml:error>Path expression</tdml:error> + <tdml:error>absolute</tdml:error> + <tdml:error>upward step</tdml:error> + <tdml:error>does/not/exist</tdml:error> + </tdml:errors> + </tdml:parserTestCase> + + <!-- + Test Name: ocke_array_index_step_dne + Schema: expressions-Embedded.dfdl.xsd + Purpose: This test demonstrates that we get an appropriate error message when an + occursCountKind expression references a non existent path step inside + an array index + --> + <tdml:parserTestCase name="ocke_array_index_step_dne" root="ocke6" + model="expressions-Embedded.dfdl.xsd" description="occursCountKind expression - DFDL-23-011R"> + + <tdml:document><![CDATA[]]></tdml:document> + <tdml:errors> + <tdml:error>Schema Definition Error</tdml:error> + <tdml:error>Path expression</tdml:error> + <tdml:error>absolute</tdml:error> + <tdml:error>upward step</tdml:error> + <tdml:error>does/not/exist</tdml:error> + </tdml:errors> + </tdml:parserTestCase> + + <!-- + Test Name: ocke_non_upward + Schema: expressions-Embedded.dfdl.xsd + Purpose: This test demonstrates that we get an appropriate error message when an + occursCountKind expression uses a path that does not go upward + --> + <tdml:parserTestCase name="ocke_non_upward" root="ocke7" + model="expressions-Embedded.dfdl.xsd" description="occursCountKind expression - DFDL-23-011R"> + + <tdml:document><![CDATA[]]></tdml:document> + <tdml:errors> + <tdml:error>Schema Definition Error</tdml:error> + <tdml:error>Path expression</tdml:error> + <tdml:error>absolute</tdml:error> + <tdml:error>upward step</tdml:error> + <tdml:error>ex:count</tdml:error> + </tdml:errors> + </tdml:parserTestCase> + + <!-- + Test Name: ocke_single_upward + Schema: expressions-Embedded.dfdl.xsd + Purpose: This test demonstrates that we get an appropriate error message when an + occursCountKind expression uses a path that is only a single upward step + --> + <tdml:parserTestCase name="ocke_single_upward" root="ocke8" + model="expressions-Embedded.dfdl.xsd" description="occursCountKind expression - DFDL-23-011R"> + + <tdml:document><![CDATA[]]></tdml:document> + <tdml:errors> + <tdml:error>Schema Definition Error</tdml:error> + <tdml:error>Complex</tdml:error> + <tdml:error>cannot be converted</tdml:error> + <tdml:error>xs:unsignedLong</tdml:error> + </tdml:errors> + </tdml:parserTestCase> + <!-- Test Name: internal_space_preserved Schema: expressions-Embedded.dfdl.xsd diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section23/dfdl_expressions/TestDFDLExpressions.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section23/dfdl_expressions/TestDFDLExpressions.scala index 97f48af..67a4574 100644 --- a/daffodil-test/src/test/scala/org/apache/daffodil/section23/dfdl_expressions/TestDFDLExpressions.scala +++ b/daffodil-test/src/test/scala/org/apache/daffodil/section23/dfdl_expressions/TestDFDLExpressions.scala @@ -233,6 +233,10 @@ class TestDFDLExpressions { @Test def test_ocke_rel2(): Unit = { runner.runOneTest("ocke_rel2") } @Test def test_ocke_rel3(): Unit = { runner.runOneTest("ocke_rel3") } @Test def test_ocke_rel4(): Unit = { runner.runOneTest("ocke_rel4") } + @Test def test_ocke_step_dne(): Unit = { runner.runOneTest("ocke_step_dne") } + @Test def test_ocke_array_index_step_dne(): Unit = { runner.runOneTest("ocke_array_index_step_dne") } + @Test def test_ocke_non_upward(): Unit = { runner.runOneTest("ocke_non_upward") } + @Test def test_ocke_single_upward(): Unit = { runner.runOneTest("ocke_single_upward") } @Test def test_internal_space_preserved(): Unit = { runner.runOneTest("internal_space_preserved") } @Test def test_internal_space_preserved2(): Unit = { runner.runOneTest("internal_space_preserved2") } @Test def test_internal_space_preserved3a(): Unit = { runner.runOneTest("internal_space_preserved3a") }