tuxji commented on code in PR #1192:
URL: https://github.com/apache/daffodil/pull/1192#discussion_r1539495695


##########
daffodil-core/src/main/scala/org/apache/daffodil/core/dsom/ElementBase.scala:
##########
@@ -960,12 +962,73 @@ trait ElementBase
     typeDef.optRestriction.flatMap { _.enumerationValues }
   }
 
+  final lazy val length: java.math.BigDecimal = {
+    Assert.invariant(hasLength)
+    schemaDefinitionUnless(
+      isSimpleType,
+      "Facet length is allowed only on simple types or types derived from 
simple types.",
+    )
+    typeDef match {
+      case _ if hasRepType => {
+        // this length is only used for the length of the representation. The
+        // values used for facet restrictions during limited validation come 
from elsewhere
+        repTypeElementDecl.length
+      }
+      case prim: PrimitiveType => {
+        val pt = prim.primType
+        schemaDefinitionWhen(
+          (pt == PrimType.String || pt == PrimType.HexBinary) && lengthKind == 
LengthKind.Implicit,
+          "Facet length must be defined for type %s with 
lengthKind='implicit'",
+          pt.name,
+        )
+        //
+        // We handle text numbers by getting a stringValue first, then
+        // we convert to the number type.
+        //
+        // This means we cannot check and SDE here on incorrect simple type.
+        zeroBD
+      }
+      case st: SimpleTypeDefBase if st.optRestriction.isDefined => {
+        val r = st.optRestriction.get
+        val pt = st.primType
+        val typeOK = pt == PrimType.String || pt == PrimType.HexBinary
+        schemaDefinitionWhen(
+          !typeOK && hasLength,
+          "Facet length is not allowed on types derived from type %s.\nIt is 
allowed only on types derived from string and hexBinary.",
+          pt.name,
+        )
+        val res = (hasLength, lengthKind) match {
+          case (false, LengthKind.Implicit) =>
+            SDE(
+              "When lengthKind='implicit', length facet must be specified.",
+            )
+          case (true, _) => r.lengthValue
+          case (false, _) => zeroBD
+          case _ => Assert.impossible()
+        }
+        res
+      }
+      case st: SimpleTypeDefBase => {
+        Assert.invariant(st.optRestriction.isEmpty)
+        zeroBD
+      }
+    }
+  }
+
   /**
    * Compute minLength and maxLength together to share error-checking
    * and case dispatch that would otherwise have to be repeated.
+   *
+   * Also set them to the value of length, in the case we've used the length 
facet
    */
-  final lazy val (minLength: java.math.BigDecimal, maxLength: 
java.math.BigDecimal) =
-    computeMinMaxLength
+  final lazy val (minLength: java.math.BigDecimal, maxLength: 
java.math.BigDecimal) = {
+    // if hasLength is true, and min/maxLength is queried, set it to length
+    if (hasLength) {
+      (length, length)
+    } else {
+      computeMinMaxLength
+    }
+  }
   // TODO: why are we using java.math.BigDecimal, when scala has a much
   // nicer decimal class?

Review Comment:
   While we're all reviewing this PR, can anyone explain why we're using 
java.math.BigDecimal, or should we change this comment to `// TODO: Consider 
replacing all uses of java.math.BigDecimal with Scala's much nicer decimal 
class`?



##########
daffodil-test/src/test/resources/org/apache/daffodil/section02/validation_errors/Validation.tdml:
##########
@@ -407,8 +424,33 @@
         </xs:restriction>
       </xs:simpleType>
     </xs:element>
-    
-    <xs:element name="e3_2" dfdl:lengthKind="delimited">
+
+    <xs:element name="e2_6" dfdl:lengthKind="delimited" 
dfdl:encoding="iso-8859-1">
+        <xs:simpleType>
+            <xs:restriction base="xs:hexBinary">
+                <xs:minLength value="7"/>
+                <xs:maxLength value="7"/>
+            </xs:restriction>
+        </xs:simpleType>
+    </xs:element>
+
+    <xs:element name="e2_7" dfdl:lengthKind="delimited" 
dfdl:encoding="iso-8859-1">
+        <xs:simpleType>
+            <xs:restriction base="xs:hexBinary">
+                <xs:length value="7"/>
+            </xs:restriction>
+        </xs:simpleType>
+    </xs:element>
+
+    <xs:element name="e2_8" dfdl:lengthKind="delimited" 
dfdl:encoding="iso-8859-1">
+        <xs:simpleType>
+            <xs:restriction base="xs:hexBinary">
+                <xs:length value="0"/>
+            </xs:restriction>
+        </xs:simpleType>
+    </xs:element>

Review Comment:
   Please note that e2_8 is a type derived from hexBinary like the others, but 
the comment on the test case that uses e2_8 says the type "int" which indicates 
either e2_8 should be a type derived from int or that comment is incorrect.  
   
   Also please note that element names like e2_2, etc., are not well chosen or 
easily maintainable names.  You can't insert new elements between them easily 
without having to renumber or tack on new numbers.  In fact, the numbers tacked 
on the original element names seem like they were intended to provide some 
information about the facets, not just give some kind of ordering to the 
elements, see this example below:
   
   ```scala
       <xs:element name="e2_2" dfdl:lengthKind="delimited">
         <xs:simpleType>
           <xs:restriction base="ex:stMinLength_1">
             <xs:minLength value="2" />
           </xs:restriction>
         </xs:simpleType>
       </xs:element>
   ```
   
   A name like `ex:stMinLength_1` is much more clear that it's defining a 
string with a minLength of 1.  So, these tests would become easier to maintain 
if they used similarly descriptive names such as `hex_length_0` instead of 
`e2_8`.



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/RuntimeData.scala:
##########
@@ -407,7 +416,32 @@ final class SimpleTypeRuntimeData(
     // Note: dont check occurs counts // if(!checkMinMaxOccurs(e, 
pstate.arrayIterationPos)) { return java.lang.Boolean.FALSE }
     OK
   }
+  private def checkLength(
+    diNode: DISimple,
+    lenValue: java.math.BigDecimal,
+    e: ThrowsSDE,
+    primType: PrimType,
+  ): java.lang.Boolean = {
+    val lenAsLong = lenValue.longValueExact()
+    primType match {
+      case PrimType.String => {
+        val data = diNode.dataValue.getString
+        val dataLen = data.length.toLong
+        val isDataLengthEqual = dataLen.compareTo(lenAsLong) == 0
+        if (isDataLengthEqual) java.lang.Boolean.TRUE
+        else java.lang.Boolean.FALSE
+      }
+      case PrimType.HexBinary => {
+        val data = diNode.dataValue.getByteArray
 
+        val dataLen = data.length.toLong
+        val isDataLengthEqual = dataLen.compareTo(lenAsLong) == 0
+        if (isDataLengthEqual) java.lang.Boolean.TRUE
+        else java.lang.Boolean.FALSE
+      }
+      case _ => e.SDE("Length facet is only valid for string and hexBinary.")

Review Comment:
   This line is not covered by tests, codecov says, so an earlier check 
prevents execution from coming here.  I'd still leave the check in for the 
fallback "match everything else" case, but put the word "Facet" first and spell 
length with a lowercase l.



##########
daffodil-test/src/test/resources/org/apache/daffodil/section02/validation_errors/Validation.tdml:
##########
@@ -643,7 +685,63 @@
 
                </tdml:validationErrors>
   </tdml:parserTestCase>
-  
+
+    <tdml:parserTestCase name="validation_testFacets_01"
+                         root="e2_6" model="TestFacets"
+                         description="validates hexBinary against 
min/maxLength facet"
+                         validation="limited">
+
+    <tdml:document>
+        <tdml:documentPart type="byte">deadbeefdafada</tdml:documentPart>
+    </tdml:document>
+
+        <tdml:infoset>
+            <tdml:dfdlInfoset>
+                <ex:e2_6>DEADBEEFDAFADA</ex:e2_6>
+            </tdml:dfdlInfoset>
+        </tdml:infoset>
+
+    </tdml:parserTestCase>
+
+    <tdml:parserTestCase name="validation_testFacets_02"
+                         root="e2_7" model="TestFacets"
+                         description="validates hexBinary against length facet"
+                         validation="limited">
+
+    <tdml:document>
+        <tdml:documentPart type="byte">deadbeefdafada</tdml:documentPart>
+    </tdml:document>
+
+        <tdml:infoset>
+            <tdml:dfdlInfoset>
+                <ex:e2_7>DEADBEEFDAFADA</ex:e2_7>
+            </tdml:dfdlInfoset>
+        </tdml:infoset>
+    </tdml:parserTestCase>
+
+    <tdml:parserTestCase name="validation_testFacets_03"
+                         root="e2_8" model="TestFacets"
+                         description="checks that int doesn't work with length 
facet"

Review Comment:
   Either this description or the entire test is incorrect since `e2_8` is 
really a `hex_length_0`.



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/dsom/Facets.scala:
##########
@@ -93,8 +96,27 @@ trait Facets { self: Restriction =>
   final lazy val localMaxInclusiveValue: String = maxInclusive(xml)
   final lazy val localMinExclusiveValue: String = minExclusive(xml)
   final lazy val localMaxExclusiveValue: String = maxExclusive(xml)
-  final lazy val localMinLengthValue: String = minLength(xml)
-  final lazy val localMaxLengthValue: String = maxLength(xml)
+  final lazy val localLengthValue: String = length(xml)
+  final lazy val localMinLengthValue: String = {
+    val ml = minLength(xml)
+    // Xerces checks for the case where length and min/maxLength are used 
together,
+    // so we won't get to this code in those cases unless Xerces validation is 
turned off
+    Assert.usage(
+      ml.isEmpty || localLengthValue.isEmpty,
+      "Length and minLength cannot be specified together",

Review Comment:
   Agreed, and let's put the word "facets" first so we don't have to capitalize 
length (because the length facet always begins with the lowercase letter `l`):  
"Facets length and maxLength ..."



##########
daffodil-test/src/test/resources/org/apache/daffodil/section02/validation_errors/Validation.tdml:
##########
@@ -1921,7 +2019,139 @@
 
   </tdml:parserTestCase>
 
-  <tdml:parserTestCase name="floatExclusiveValid"
+    <tdml:parserTestCase name="validation_inputValueCalc_04"
+                         root="e2_3" model="inputValueCalc"
+                         description="Section 17 - the value created using 
inputValueCalc is validated using minLength/maxLength facets"
+                         validation="limited">
+
+        <tdml:document></tdml:document>
+
+        <tdml:infoset>
+            <tdml:dfdlInfoset>
+                <e2_3>string</e2_3>
+            </tdml:dfdlInfoset>
+        </tdml:infoset>
+
+        <tdml:validationErrors>
+            <tdml:error>Validation Error</tdml:error>
+            <tdml:error>failed facet checks due to: facet minLength 
(7)</tdml:error>
+        </tdml:validationErrors>
+
+    </tdml:parserTestCase>
+
+    <tdml:parserTestCase name="validation_inputValueCalc_05"
+                         root="e2_4" model="inputValueCalc"
+                         description="Section 17 - the value created using 
inputValueCalc is validated using length facets"
+                         validation="limited">
+
+        <tdml:document></tdml:document>
+
+        <tdml:infoset>
+            <tdml:dfdlInfoset>
+                <e2_4>string</e2_4>
+            </tdml:dfdlInfoset>
+        </tdml:infoset>
+
+        <tdml:validationErrors>
+            <tdml:error>Validation Error</tdml:error>
+            <tdml:error>failed facet checks due to: facet length 
(7)</tdml:error>
+        </tdml:validationErrors>
+
+    </tdml:parserTestCase>
+
+    <tdml:parserTestCase name="validation_inputValueCalc_06"
+                         root="e2_3" model="inputValueCalc"
+                         description="Section 17 - the value created using 
inputValueCalc is validated using minLength/maxLength facets"
+                         validation="on">
+
+        <tdml:document></tdml:document>
+
+        <tdml:infoset>
+            <tdml:dfdlInfoset>
+                <e2_3>string</e2_3>
+            </tdml:dfdlInfoset>
+        </tdml:infoset>
+
+        <tdml:validationErrors>
+            <tdml:error>Validation Error</tdml:error>
+            <tdml:error>failed facet checks due to: facet minLength 
(7)</tdml:error>
+        </tdml:validationErrors>
+
+    </tdml:parserTestCase>
+
+    <tdml:parserTestCase name="validation_inputValueCalc_07"
+                         root="e2_4" model="inputValueCalc"
+                         description="Section 17 - the value created using 
inputValueCalc is validated using length facets"
+                         validation="on">
+
+        <tdml:document></tdml:document>
+
+        <tdml:infoset>
+            <tdml:dfdlInfoset>
+                <e2_4>string</e2_4>
+            </tdml:dfdlInfoset>
+        </tdml:infoset>
+
+        <tdml:validationErrors>
+            <tdml:error>Validation Error</tdml:error>
+            <tdml:error>failed facet checks due to: facet length 
(7)</tdml:error>
+        </tdml:validationErrors>
+
+    </tdml:parserTestCase>
+
+    <tdml:defineSchema name="TestFacets2">
+        <xs:include 
schemaLocation="/org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
+        <dfdl:format ref="ex:GeneralFormat" />
+        <xs:element name="e2_5" dfdl:lengthKind="delimited" 
dfdl:encoding="ISO-8859-1">
+            <xs:simpleType>
+                <xs:restriction base="xs:hexBinary">
+                    <xs:length value="7"/>
+                    <xs:minLength value="7"/>
+                    <xs:maxLength value="7"/>
+                </xs:restriction>
+            </xs:simpleType>
+        </xs:element>
+    </tdml:defineSchema>
+
+    <tdml:parserTestCase name="validation_testFacets2_01"
+                         root="e2_5" model="TestFacets2"
+                         description="checks that length cannot be used with 
min/maxLength"
+                         validation="limited">
+
+        <tdml:document>
+            <tdml:documentPart type="byte">deadbeef</tdml:documentPart>
+        </tdml:document>
+
+        <!-- this is a Xerces Error -->

Review Comment:
   Is Xerces used in both validation modes "limited" and "on"??  I thought the 
point of "limited" validation was to validate faster by using only Daffodil 
itself, skipping calling Xerces since the user doesn't want to wait for Xerces 
to validate the infoset.



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/dsom/ElementBase.scala:
##########
@@ -974,7 +1037,7 @@ trait ElementBase
   private def computeMinMaxLength: (java.math.BigDecimal, 
java.math.BigDecimal) = {
     schemaDefinitionUnless(
       isSimpleType,
-      "Facets minLength and maxLength are allowed only on types string and 
hexBinary.",
+      "Facets minLength and maxLength are allowed only on simple types or 
types derived from simple types.",

Review Comment:
   I see the check for the correct type (string or hexBinary) and corresponding 
error message is performed on lines 994 and 1065 of this ElementBase.scala's 
class already (note: Steve already suggested merging all checks together into 
one function, computeMinMaxLength, so these two checks will be merged together 
later):
   
   ```scala
   // length at line 994
           val typeOK = pt == PrimType.String || pt == PrimType.HexBinary
           schemaDefinitionWhen(
             !typeOK && hasLength,
             "Facet length is not allowed on types derived from type %s.\nIt is 
allowed only on types derived from string and hexBinary.",
             pt.name,
           )
   // minLength, maxLength at line 1065
           val typeOK = pt == PrimType.String || pt == PrimType.HexBinary
           schemaDefinitionWhen(
             !typeOK && (hasMinLength || hasMaxLength),
             "Facets minLength and maxLength are not allowed on types derived 
from type %s.\nThey are allowed only on typed derived from string and 
hexBinary.",
             pt.name,
           )
   ```
   
   Therefore, what we have here is a preliminary check saying you can't use the 
min/max/length facets on a complex type.  Calling `isSimpleType` is what's 
being checked here.  I do agree with Steve, the error message should not say 
the facets are allowed on all simple types - a more correct message would say 
`Facets length, minLength, maxLength are not allowed on complex types.` 



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/dsom/Facets.scala:
##########
@@ -93,8 +96,27 @@ trait Facets { self: Restriction =>
   final lazy val localMaxInclusiveValue: String = maxInclusive(xml)
   final lazy val localMinExclusiveValue: String = minExclusive(xml)
   final lazy val localMaxExclusiveValue: String = maxExclusive(xml)
-  final lazy val localMinLengthValue: String = minLength(xml)
-  final lazy val localMaxLengthValue: String = maxLength(xml)
+  final lazy val localLengthValue: String = length(xml)
+  final lazy val localMinLengthValue: String = {
+    val ml = minLength(xml)
+    // Xerces checks for the case where length and min/maxLength are used 
together,
+    // so we won't get to this code in those cases unless Xerces validation is 
turned off
+    Assert.usage(
+      ml.isEmpty || localLengthValue.isEmpty,
+      "Length and minLength cannot be specified together",
+    )
+    ml
+  }
+  final lazy val localMaxLengthValue: String = {
+    val ml = maxLength(xml)
+    // Xerces checks for the case where length and min/maxLength are used 
together,
+    // so we won't get to this code in those cases unless Xerces validation is 
turned off
+    Assert.usage(
+      ml.isEmpty || localLengthValue.isEmpty,
+      "Length and maxLength cannot be specified together",

Review Comment:
   Ditto, "Facets length and maxLength ..."



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/dsom/ElementBase.scala:
##########
@@ -960,12 +962,73 @@ trait ElementBase
     typeDef.optRestriction.flatMap { _.enumerationValues }
   }
 
+  final lazy val length: java.math.BigDecimal = {

Review Comment:
   Agreed, something like:
   
   ```scala
   final lazy val (length: java.math.BigDecimal, minLength: 
java.math.BigDecimal, maxLength: java.math.BigDecimal) =
       computeMinMaxLength
   ```



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/dsom/ElementBase.scala:
##########
@@ -711,8 +712,8 @@ trait ElementBase
       Assert.invariant(repElement.lengthKind =:= LengthKind.Implicit)
       // it's a string with implicit length. get from facets
       schemaDefinitionUnless(
-        repElement.hasMaxLength,
-        "String with dfdl:lengthKind='implicit' must have an XSD maxLength 
facet value.",
+        repElement.hasMaxLength || repElement.hasLength,
+        "String with dfdl:lengthKind='implicit' must have an XSD maxLength or 
XSD Length facet value.",

Review Comment:
   Even better, could dispense with XSD (some people won't know XSD means XML 
Schema Language) and shorten the rest to just `must have a length or maxLength 
facet value.`  



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/RuntimeData.scala:
##########
@@ -345,7 +346,15 @@ final class SimpleTypeRuntimeData(
         return Error("facet enumeration(s): 
%s".format(e.enumerationValues.mkString(",")))
       }
     }
-
+    // Check length
+    e.length.foreach { length =>
+      val lenAsLong = length.longValue()
+      val isLengthGreaterThanEqToZero = lenAsLong.compareTo(0L) >= 0
+      if (isLengthGreaterThanEqToZero) {

Review Comment:
   Codecov also warns that line 353 is not covered by tests, so that may be 
another clue that this code is redundant.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to