terminator alignment [daffodil]

via GitHub Fri, 19 Dec 2025 08:54:21 -0800


stevedlawrence commented on code in PR #1604:
URL: https://github.com/apache/daffodil/pull/1604#discussion_r2635332294



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)

Review Comment:
   MTA is an alignment thing, so I think these MTA approx functions what to 
return `AligmentMultipleOf` instead of `LengthApprox`.



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)
+          case Postfix => LengthMultipleOf(0)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorPostfixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => LengthMultipleOf(0)
+          case Postfix => LengthMultipleOf(s.knownEncodingAlignmentInBits)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorLengthApprox = this.optLexicalParent match {
+    case Some(s: SequenceTermBase) if s.hasSeparator =>
+      getEncodingLengthApprox(s)
+    case _ => LengthMultipleOf(0)

Review Comment:
   Instead of `LengthMultipleOf(0)`, it might be more clear to make this 
`LengthExact(0)`. I imigine all the math ends up the same so it probably 
doesn't really matter, but it feels a bit odd to say something has a length 
that is a multiple of zero.



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)
+          case Postfix => LengthMultipleOf(0)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorPostfixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => LengthMultipleOf(0)
+          case Postfix => LengthMultipleOf(s.knownEncodingAlignmentInBits)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorLengthApprox = this.optLexicalParent match {
+    case Some(s: SequenceTermBase) if s.hasSeparator =>
+      getEncodingLengthApprox(s)
+    case _ => LengthMultipleOf(0)
+  }
+
+  private lazy val initiatorMTAApprox =
+    if (this.hasInitiator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val initiatorLengthApprox = if (this.hasInitiator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private lazy val terminatorMTAApprox =
+    if (this.hasTerminator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val terminatorLengthApprox = if (this.hasTerminator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private def getEncodingLengthApprox(t: Term) = {
+    if (t.isKnownEncoding) {
+      val dfdlCharset = t.charsetEv.optConstant.get
+      val fixedWidth =
+        if (dfdlCharset.maybeFixedWidth.isDefined) 
dfdlCharset.maybeFixedWidth.get else 8
+      LengthMultipleOf(fixedWidth)
+    } else {
+      // runtime encoding, which must be a byte-length encoding
+      LengthMultipleOf(8)
+    }
+  }
+
+  /**
+   * prior alignment with leading skip includes the prefix/infix separator MTA 
and length,
+   * any leading skips
+   */
   private lazy val priorAlignmentWithLeadingSkipApprox: AlignmentMultipleOf = {
-    priorAlignmentApprox + leadingSkipApprox
+    val priorAlignmentWithSeparatorApprox = priorAlignmentApprox
+      + separatorPrefixMTAApprox
+      + separatorLengthApprox
+    val leadingAlignmentApprox = (priorAlignmentWithSeparatorApprox
+      + leadingSkipApprox)
+    leadingAlignmentApprox
+  }
+
+  /**
+   * The length of the prefix length quasi element.
+   *
+   * This is only relevant for quasi elements, or prefixed terms.
+   */
+  private lazy val prefixLengthElementLength = {
+    this match {
+      case eb: ElementBase => {
+        if (eb.isInstanceOf[QuasiElementDeclBase])
+          LengthExact(
+            
eb.asInstanceOf[QuasiElementDeclBase].elementLengthInBitsEv.constValue.get
+          )
+        else if (eb.lengthKind == Prefixed)
+          LengthExact(
+            this
+              .asInstanceOf[ElementBase]
+              .prefixedLengthElementDecl
+              .elementLengthInBitsEv
+              .constValue
+              .get
+          )

Review Comment:
   This assumes quasi elements and prefix length elements have constant 
lengths, but I'm not sure that's always true. For example, I think we allow a 
prefix length to have a prefix length, in which case one of the prefix lenghts 
isn't constant. I think prefix lengths is already broken in a number of ways, 
so maybe this doesn't have to necessarily have be fixed as part of this.



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)
+          case Postfix => LengthMultipleOf(0)
+        }
+      case _ => LengthMultipleOf(0)

Review Comment:
   I think this really wants to be AlignmentApprox(1) (MTA is alignment after 
all so using length isn't quite right) and then with the new `+` operator 
mentioned above it will work as expected. This is because 1 will always evenly 
divide the existing alignment so the existing alignment will be used. Same with 
the other MTA things.
   
   In fact, I *think* `AligmentApprox(0)` never wants to be used except for the 
root element, since it's kindof a 1-based thing.



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -260,37 +399,53 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
         //
         val (lastChildren, couldBeLast) = mg.potentialLastChildren
         val lastApproxesConsideringChildren: Seq[AlignmentMultipleOf] = 
lastChildren.map { lc =>
-          //
-          // for each possible last child, add its ending alignment
-          // to our trailing skip to get where it would leave off were
-          // it the actual last child.
-          //
+          // for each possible last child, gather its endingAlignmentApprox
           val lceaa = lc.endingAlignmentApprox
-          val res = lceaa + trailingSkipApprox
-          res
+          lceaa
         }
         val optApproxIfNoChildren =
           //
           // gather possibilities for this item itself
           //
-          if (couldBeLast)
+          if (couldBeLast) {
             //
             // if this model group could be last, then consider
             // if none of its content was present.
-            // We'd just have the contentStart, nothing, and the trailing Skip.
+            // We'd just have the contentStart
             //
-            Some(contentStartAlignment + trailingSkipApprox)
-          else
+            val res = Some(contentStartAlignment)
+            res
+          } else
             // can't be last, no possibilities to gather.
             None
         val lastApproxes = lastApproxesConsideringChildren ++ 
optApproxIfNoChildren
-        Assert.invariant(lastApproxes.nonEmpty)
-        val res = lastApproxes.reduce { _ * _ }
+        // take all the gathered possibilities and add the ending alignment
+        // to terminator MTA/length and our trailing skip to get where it 
would leave off were
+        // each the actual last child.
+        val lastApproxesFinal = lastApproxes.map {
+          _ * terminatorMTAApprox
+            + terminatorLengthApprox
+            + trailingSkipApprox
+        }
+        Assert.invariant(lastApproxesFinal.nonEmpty)
+        val res = lastApproxesFinal.reduce {
+          _ * _

Review Comment:
   I think right here is the only place we really want to use the * operator. 
The goal of this is to combine a bunch of different alignemtns that could 
happen at the same place time due to choice or optional elements, so this finds 
the the common alignment they all have. 



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)
+          case Postfix => LengthMultipleOf(0)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorPostfixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => LengthMultipleOf(0)
+          case Postfix => LengthMultipleOf(s.knownEncodingAlignmentInBits)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorLengthApprox = this.optLexicalParent match {
+    case Some(s: SequenceTermBase) if s.hasSeparator =>
+      getEncodingLengthApprox(s)
+    case _ => LengthMultipleOf(0)
+  }
+
+  private lazy val initiatorMTAApprox =
+    if (this.hasInitiator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val initiatorLengthApprox = if (this.hasInitiator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private lazy val terminatorMTAApprox =
+    if (this.hasTerminator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val terminatorLengthApprox = if (this.hasTerminator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private def getEncodingLengthApprox(t: Term) = {
+    if (t.isKnownEncoding) {
+      val dfdlCharset = t.charsetEv.optConstant.get
+      val fixedWidth =
+        if (dfdlCharset.maybeFixedWidth.isDefined) 
dfdlCharset.maybeFixedWidth.get else 8
+      LengthMultipleOf(fixedWidth)
+    } else {
+      // runtime encoding, which must be a byte-length encoding
+      LengthMultipleOf(8)
+    }
+  }
+
+  /**
+   * prior alignment with leading skip includes the prefix/infix separator MTA 
and length,
+   * any leading skips
+   */
   private lazy val priorAlignmentWithLeadingSkipApprox: AlignmentMultipleOf = {
-    priorAlignmentApprox + leadingSkipApprox
+    val priorAlignmentWithSeparatorApprox = priorAlignmentApprox
+      + separatorPrefixMTAApprox
+      + separatorLengthApprox
+    val leadingAlignmentApprox = (priorAlignmentWithSeparatorApprox
+      + leadingSkipApprox)
+    leadingAlignmentApprox
+  }
+
+  /**
+   * The length of the prefix length quasi element.
+   *
+   * This is only relevant for quasi elements, or prefixed terms.
+   */
+  private lazy val prefixLengthElementLength = {
+    this match {
+      case eb: ElementBase => {
+        if (eb.isInstanceOf[QuasiElementDeclBase])
+          LengthExact(
+            
eb.asInstanceOf[QuasiElementDeclBase].elementLengthInBitsEv.constValue.get
+          )
+        else if (eb.lengthKind == Prefixed)
+          LengthExact(
+            this
+              .asInstanceOf[ElementBase]
+              .prefixedLengthElementDecl
+              .elementLengthInBitsEv
+              .constValue
+              .get
+          )
+        else
+          LengthExact(0)
+      }
+      case _ => LengthExact(0)
+    }
   }
 
+  /**
+   * The alignment of the value of the term.
+   *
+   * This is only relevant for simple elements.
+   */
+  private lazy val valueMTAApprox = {
+    this match {
+      case eb: ElementBase if eb.isSimpleType => {
+        AlignmentMultipleOf(eb.knownEncodingAlignmentInBits)
+      }
+      case _ => AlignmentMultipleOf(0)
+    }
+  }
+
+  /**
+   * This alignment is made up of the alignment of the term itself,
+   * the initiator MTA and length, the prefix length quasi
+   * element length, and the value MTA (we add it for optimization
+   * but is extremely difficult to test, as other things such
+   * as unparsers will provide MTA, even elementSpecifiedLengthApprox
+   * considers the encoding also).
+   */
   protected lazy val contentStartAlignment: AlignmentMultipleOf = {
-    if (priorAlignmentWithLeadingSkipApprox % alignmentApprox == 0) {
-      // alignment won't be needed, continue using prior alignment as start 
alignment
-      priorAlignmentWithLeadingSkipApprox
-    } else {
-      // alignment will be needed, it will be forced to be aligned to 
alignmentApprox
+    val leftFramingApprox =
       alignmentApprox
-    }
+        * initiatorMTAApprox
+        + initiatorLengthApprox
+    val csa = (leftFramingApprox + prefixLengthElementLength) * valueMTAApprox

Review Comment:
   I think this also wants to use the new + operator.



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -21,17 +21,24 @@ import org.apache.daffodil.core.dsom.ElementBase
 import org.apache.daffodil.core.dsom.ModelGroup
 import org.apache.daffodil.core.dsom.QuasiElementDeclBase
 import org.apache.daffodil.core.dsom.Root
+import org.apache.daffodil.core.dsom.SequenceTermBase
 import org.apache.daffodil.core.dsom.Term
 import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.schema.annotation.props.gen.AlignmentKind
 import org.apache.daffodil.lib.schema.annotation.props.gen.AlignmentUnits
 import org.apache.daffodil.lib.schema.annotation.props.gen.LengthKind
+import org.apache.daffodil.lib.schema.annotation.props.gen.LengthKind.Explicit
+import org.apache.daffodil.lib.schema.annotation.props.gen.LengthKind.Prefixed
 import org.apache.daffodil.lib.schema.annotation.props.gen.LengthUnits
+import org.apache.daffodil.lib.schema.annotation.props.gen.SeparatorPosition
 import org.apache.daffodil.lib.util.Math
 
 case class AlignmentMultipleOf(nBits: Long) {
+  // To combine AlignmentMultipleOfs, use *
   def *(that: AlignmentMultipleOf) = AlignmentMultipleOf(Math.gcd(nBits, 
that.nBits))
   def %(that: AlignmentMultipleOf) = nBits % that.nBits
+

Review Comment:
   Some of these changes are making me wonder if we need a `def +(that: 
AlignmentMultipleOf)` that has slightly different logic than gcd.
   
   I believe the intention of `*` was to combine alignments that could all 
happen at the same point in data (e.g. a choice of elements, optional siblings 
elements). 
   
   But in the new code, we are taking an existing approximate alignment, and 
potentially adding a new alignment to it, which is slightly different.
   
   For example, say we are currently at 2-byte alignment 
(`AlignmentMultipleof(16)`) and we are adding an element that needs to be 
1-byte (`AlignmentMultipleOf(8)`). In this case, we do not need to perform any 
alignment here because we are already byte align (we are actually 2-byte 
aligned), and our logic handles that correctly.
   
   But the contentStartAlignment of that new element still really wants to be 
2-byte aligned, so it should remain `AlignmentMultipleOf(16)`. But right now, 
using `*` it's alignment will be `AlignmentMultilpeOf(8)` since `gcd(16,8) => 
8`. So we actually have less information about our true alignment, and could 
miss out on future optimizations.
   
   So it feels like a `+` operation might want to be something like
   
   ```scala
   def *(that: AlignmentMultipleOf) = if (this.nBits % that.nBits == 0) this 
else that
   ```
   
   The idea is that if our new alignment (`that`) evenly divides our existing 
alignment (`this`), then no alignment is actually needed, and we can keep our 
more accurate current alignment. Otherwise we use the new alignment.  
   



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)
+          case Postfix => LengthMultipleOf(0)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorPostfixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => LengthMultipleOf(0)
+          case Postfix => LengthMultipleOf(s.knownEncodingAlignmentInBits)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorLengthApprox = this.optLexicalParent match {
+    case Some(s: SequenceTermBase) if s.hasSeparator =>
+      getEncodingLengthApprox(s)
+    case _ => LengthMultipleOf(0)
+  }
+
+  private lazy val initiatorMTAApprox =
+    if (this.hasInitiator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val initiatorLengthApprox = if (this.hasInitiator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private lazy val terminatorMTAApprox =
+    if (this.hasTerminator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val terminatorLengthApprox = if (this.hasTerminator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private def getEncodingLengthApprox(t: Term) = {
+    if (t.isKnownEncoding) {
+      val dfdlCharset = t.charsetEv.optConstant.get
+      val fixedWidth =
+        if (dfdlCharset.maybeFixedWidth.isDefined) 
dfdlCharset.maybeFixedWidth.get else 8
+      LengthMultipleOf(fixedWidth)
+    } else {
+      // runtime encoding, which must be a byte-length encoding
+      LengthMultipleOf(8)
+    }
+  }
+
+  /**
+   * prior alignment with leading skip includes the prefix/infix separator MTA 
and length,
+   * any leading skips
+   */
   private lazy val priorAlignmentWithLeadingSkipApprox: AlignmentMultipleOf = {
-    priorAlignmentApprox + leadingSkipApprox
+    val priorAlignmentWithSeparatorApprox = priorAlignmentApprox
+      + separatorPrefixMTAApprox

Review Comment:
   Note that with the new + operator, this does want to stay a  `+` and not use 
the old `*` operator. Since this isn't combining potential alignments, this is 
taking an existing alignment and adding to it.



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)
+          case Postfix => LengthMultipleOf(0)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorPostfixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => LengthMultipleOf(0)
+          case Postfix => LengthMultipleOf(s.knownEncodingAlignmentInBits)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorLengthApprox = this.optLexicalParent match {
+    case Some(s: SequenceTermBase) if s.hasSeparator =>
+      getEncodingLengthApprox(s)
+    case _ => LengthMultipleOf(0)
+  }
+
+  private lazy val initiatorMTAApprox =
+    if (this.hasInitiator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val initiatorLengthApprox = if (this.hasInitiator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private lazy val terminatorMTAApprox =
+    if (this.hasTerminator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val terminatorLengthApprox = if (this.hasTerminator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private def getEncodingLengthApprox(t: Term) = {
+    if (t.isKnownEncoding) {
+      val dfdlCharset = t.charsetEv.optConstant.get
+      val fixedWidth =
+        if (dfdlCharset.maybeFixedWidth.isDefined) 
dfdlCharset.maybeFixedWidth.get else 8
+      LengthMultipleOf(fixedWidth)
+    } else {
+      // runtime encoding, which must be a byte-length encoding
+      LengthMultipleOf(8)
+    }
+  }
+
+  /**
+   * prior alignment with leading skip includes the prefix/infix separator MTA 
and length,
+   * any leading skips
+   */
   private lazy val priorAlignmentWithLeadingSkipApprox: AlignmentMultipleOf = {
-    priorAlignmentApprox + leadingSkipApprox
+    val priorAlignmentWithSeparatorApprox = priorAlignmentApprox
+      + separatorPrefixMTAApprox
+      + separatorLengthApprox
+    val leadingAlignmentApprox = (priorAlignmentWithSeparatorApprox
+      + leadingSkipApprox)
+    leadingAlignmentApprox
+  }
+
+  /**
+   * The length of the prefix length quasi element.
+   *
+   * This is only relevant for quasi elements, or prefixed terms.
+   */
+  private lazy val prefixLengthElementLength = {
+    this match {
+      case eb: ElementBase => {
+        if (eb.isInstanceOf[QuasiElementDeclBase])
+          LengthExact(
+            
eb.asInstanceOf[QuasiElementDeclBase].elementLengthInBitsEv.constValue.get
+          )
+        else if (eb.lengthKind == Prefixed)
+          LengthExact(
+            this
+              .asInstanceOf[ElementBase]
+              .prefixedLengthElementDecl
+              .elementLengthInBitsEv
+              .constValue
+              .get
+          )
+        else
+          LengthExact(0)
+      }
+      case _ => LengthExact(0)
+    }
   }
 
+  /**
+   * The alignment of the value of the term.
+   *
+   * This is only relevant for simple elements.
+   */
+  private lazy val valueMTAApprox = {
+    this match {
+      case eb: ElementBase if eb.isSimpleType => {
+        AlignmentMultipleOf(eb.knownEncodingAlignmentInBits)
+      }
+      case _ => AlignmentMultipleOf(0)
+    }
+  }
+
+  /**
+   * This alignment is made up of the alignment of the term itself,
+   * the initiator MTA and length, the prefix length quasi
+   * element length, and the value MTA (we add it for optimization
+   * but is extremely difficult to test, as other things such
+   * as unparsers will provide MTA, even elementSpecifiedLengthApprox
+   * considers the encoding also).
+   */
   protected lazy val contentStartAlignment: AlignmentMultipleOf = {
-    if (priorAlignmentWithLeadingSkipApprox % alignmentApprox == 0) {
-      // alignment won't be needed, continue using prior alignment as start 
alignment
-      priorAlignmentWithLeadingSkipApprox
-    } else {
-      // alignment will be needed, it will be forced to be aligned to 
alignmentApprox
+    val leftFramingApprox =
       alignmentApprox
-    }
+        * initiatorMTAApprox
+        + initiatorLengthApprox
+    val csa = (leftFramingApprox + prefixLengthElementLength) * valueMTAApprox
+    csa
   }
 
+  /**
+   * Accounts for the start of the content alignment, element length,
+   * terminator MTA/Length and trailing skip
+   */
   protected lazy val endingAlignmentApprox: AlignmentMultipleOf = {
     this match {
       case eb: ElementBase => {
-        if (eb.isComplexType && eb.lengthKind == LengthKind.Implicit) {
-          eb.complexType.group.endingAlignmentApprox + trailingSkipApprox
+        val res = if (eb.isComplexType && eb.lengthKind == 
LengthKind.Implicit) {
+          eb.complexType.group.endingAlignmentApprox
         } else {
           // simple type or complex type with specified length
-          contentStartAlignment + (elementSpecifiedLengthApprox + 
trailingSkipApprox)
+          contentStartAlignment + elementSpecifiedLengthApprox
         }
+        val cea = res * terminatorMTAApprox

Review Comment:
   Wants to be the new + operator



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)
+          case Postfix => LengthMultipleOf(0)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorPostfixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => LengthMultipleOf(0)
+          case Postfix => LengthMultipleOf(s.knownEncodingAlignmentInBits)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorLengthApprox = this.optLexicalParent match {
+    case Some(s: SequenceTermBase) if s.hasSeparator =>
+      getEncodingLengthApprox(s)
+    case _ => LengthMultipleOf(0)
+  }
+
+  private lazy val initiatorMTAApprox =
+    if (this.hasInitiator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val initiatorLengthApprox = if (this.hasInitiator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private lazy val terminatorMTAApprox =
+    if (this.hasTerminator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val terminatorLengthApprox = if (this.hasTerminator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private def getEncodingLengthApprox(t: Term) = {
+    if (t.isKnownEncoding) {
+      val dfdlCharset = t.charsetEv.optConstant.get
+      val fixedWidth =
+        if (dfdlCharset.maybeFixedWidth.isDefined) 
dfdlCharset.maybeFixedWidth.get else 8
+      LengthMultipleOf(fixedWidth)
+    } else {
+      // runtime encoding, which must be a byte-length encoding
+      LengthMultipleOf(8)
+    }
+  }
+
+  /**
+   * prior alignment with leading skip includes the prefix/infix separator MTA 
and length,
+   * any leading skips
+   */
   private lazy val priorAlignmentWithLeadingSkipApprox: AlignmentMultipleOf = {
-    priorAlignmentApprox + leadingSkipApprox
+    val priorAlignmentWithSeparatorApprox = priorAlignmentApprox
+      + separatorPrefixMTAApprox
+      + separatorLengthApprox
+    val leadingAlignmentApprox = (priorAlignmentWithSeparatorApprox
+      + leadingSkipApprox)
+    leadingAlignmentApprox
+  }
+
+  /**
+   * The length of the prefix length quasi element.
+   *
+   * This is only relevant for quasi elements, or prefixed terms.
+   */
+  private lazy val prefixLengthElementLength = {
+    this match {
+      case eb: ElementBase => {
+        if (eb.isInstanceOf[QuasiElementDeclBase])
+          LengthExact(
+            
eb.asInstanceOf[QuasiElementDeclBase].elementLengthInBitsEv.constValue.get
+          )
+        else if (eb.lengthKind == Prefixed)
+          LengthExact(
+            this
+              .asInstanceOf[ElementBase]
+              .prefixedLengthElementDecl
+              .elementLengthInBitsEv
+              .constValue
+              .get
+          )
+        else
+          LengthExact(0)
+      }
+      case _ => LengthExact(0)
+    }
   }
 
+  /**
+   * The alignment of the value of the term.
+   *
+   * This is only relevant for simple elements.
+   */
+  private lazy val valueMTAApprox = {
+    this match {
+      case eb: ElementBase if eb.isSimpleType => {

Review Comment:
   Doesn't this also only apply if representation is text? For example, if this 
is a binary number then there is no MTA. I'm not sure if there are other cases 
where it does/does not apply (e.g. zoned numbers have to be aligned, but maybe 
that's not MTA?)



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)
+          case Postfix => LengthMultipleOf(0)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorPostfixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => LengthMultipleOf(0)
+          case Postfix => LengthMultipleOf(s.knownEncodingAlignmentInBits)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorLengthApprox = this.optLexicalParent match {
+    case Some(s: SequenceTermBase) if s.hasSeparator =>
+      getEncodingLengthApprox(s)
+    case _ => LengthMultipleOf(0)
+  }
+
+  private lazy val initiatorMTAApprox =
+    if (this.hasInitiator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val initiatorLengthApprox = if (this.hasInitiator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private lazy val terminatorMTAApprox =
+    if (this.hasTerminator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val terminatorLengthApprox = if (this.hasTerminator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private def getEncodingLengthApprox(t: Term) = {
+    if (t.isKnownEncoding) {
+      val dfdlCharset = t.charsetEv.optConstant.get
+      val fixedWidth =
+        if (dfdlCharset.maybeFixedWidth.isDefined) 
dfdlCharset.maybeFixedWidth.get else 8
+      LengthMultipleOf(fixedWidth)
+    } else {
+      // runtime encoding, which must be a byte-length encoding
+      LengthMultipleOf(8)
+    }
+  }
+
+  /**
+   * prior alignment with leading skip includes the prefix/infix separator MTA 
and length,
+   * any leading skips
+   */
   private lazy val priorAlignmentWithLeadingSkipApprox: AlignmentMultipleOf = {
-    priorAlignmentApprox + leadingSkipApprox
+    val priorAlignmentWithSeparatorApprox = priorAlignmentApprox
+      + separatorPrefixMTAApprox
+      + separatorLengthApprox
+    val leadingAlignmentApprox = (priorAlignmentWithSeparatorApprox
+      + leadingSkipApprox)
+    leadingAlignmentApprox
+  }
+
+  /**
+   * The length of the prefix length quasi element.
+   *
+   * This is only relevant for quasi elements, or prefixed terms.
+   */
+  private lazy val prefixLengthElementLength = {
+    this match {
+      case eb: ElementBase => {
+        if (eb.isInstanceOf[QuasiElementDeclBase])
+          LengthExact(
+            
eb.asInstanceOf[QuasiElementDeclBase].elementLengthInBitsEv.constValue.get
+          )
+        else if (eb.lengthKind == Prefixed)
+          LengthExact(
+            this
+              .asInstanceOf[ElementBase]
+              .prefixedLengthElementDecl
+              .elementLengthInBitsEv
+              .constValue
+              .get
+          )
+        else
+          LengthExact(0)
+      }
+      case _ => LengthExact(0)
+    }
   }
 
+  /**
+   * The alignment of the value of the term.
+   *
+   * This is only relevant for simple elements.
+   */
+  private lazy val valueMTAApprox = {
+    this match {
+      case eb: ElementBase if eb.isSimpleType => {
+        AlignmentMultipleOf(eb.knownEncodingAlignmentInBits)
+      }
+      case _ => AlignmentMultipleOf(0)
+    }
+  }
+
+  /**
+   * This alignment is made up of the alignment of the term itself,
+   * the initiator MTA and length, the prefix length quasi
+   * element length, and the value MTA (we add it for optimization
+   * but is extremely difficult to test, as other things such
+   * as unparsers will provide MTA, even elementSpecifiedLengthApprox
+   * considers the encoding also).
+   */
   protected lazy val contentStartAlignment: AlignmentMultipleOf = {
-    if (priorAlignmentWithLeadingSkipApprox % alignmentApprox == 0) {
-      // alignment won't be needed, continue using prior alignment as start 
alignment
-      priorAlignmentWithLeadingSkipApprox
-    } else {
-      // alignment will be needed, it will be forced to be aligned to 
alignmentApprox
+    val leftFramingApprox =
       alignmentApprox
-    }
+        * initiatorMTAApprox

Review Comment:
   This wants use the new + operator.



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -228,29 +235,161 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
       }
     }.value
 
+  private lazy val separatorPrefixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => 
LengthMultipleOf(s.knownEncodingAlignmentInBits)
+          case Postfix => LengthMultipleOf(0)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorPostfixMTAApprox =
+    this.optLexicalParent match {
+      case Some(s: SequenceTermBase) if s.hasSeparator =>
+        import SeparatorPosition.*
+        s.separatorPosition match {
+          case Prefix | Infix => LengthMultipleOf(0)
+          case Postfix => LengthMultipleOf(s.knownEncodingAlignmentInBits)
+        }
+      case _ => LengthMultipleOf(0)
+    }
+
+  private lazy val separatorLengthApprox = this.optLexicalParent match {
+    case Some(s: SequenceTermBase) if s.hasSeparator =>
+      getEncodingLengthApprox(s)
+    case _ => LengthMultipleOf(0)
+  }
+
+  private lazy val initiatorMTAApprox =
+    if (this.hasInitiator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val initiatorLengthApprox = if (this.hasInitiator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private lazy val terminatorMTAApprox =
+    if (this.hasTerminator) {
+      AlignmentMultipleOf(this.knownEncodingAlignmentInBits)
+    } else {
+      AlignmentMultipleOf(0)
+    }
+
+  private lazy val terminatorLengthApprox = if (this.hasTerminator) {
+    getEncodingLengthApprox(this)
+  } else {
+    LengthMultipleOf(0)
+  }
+
+  private def getEncodingLengthApprox(t: Term) = {
+    if (t.isKnownEncoding) {
+      val dfdlCharset = t.charsetEv.optConstant.get
+      val fixedWidth =
+        if (dfdlCharset.maybeFixedWidth.isDefined) 
dfdlCharset.maybeFixedWidth.get else 8
+      LengthMultipleOf(fixedWidth)
+    } else {
+      // runtime encoding, which must be a byte-length encoding
+      LengthMultipleOf(8)
+    }
+  }
+
+  /**
+   * prior alignment with leading skip includes the prefix/infix separator MTA 
and length,
+   * any leading skips
+   */
   private lazy val priorAlignmentWithLeadingSkipApprox: AlignmentMultipleOf = {
-    priorAlignmentApprox + leadingSkipApprox
+    val priorAlignmentWithSeparatorApprox = priorAlignmentApprox
+      + separatorPrefixMTAApprox
+      + separatorLengthApprox
+    val leadingAlignmentApprox = (priorAlignmentWithSeparatorApprox
+      + leadingSkipApprox)
+    leadingAlignmentApprox
+  }
+
+  /**
+   * The length of the prefix length quasi element.
+   *
+   * This is only relevant for quasi elements, or prefixed terms.
+   */
+  private lazy val prefixLengthElementLength = {
+    this match {
+      case eb: ElementBase => {
+        if (eb.isInstanceOf[QuasiElementDeclBase])
+          LengthExact(
+            
eb.asInstanceOf[QuasiElementDeclBase].elementLengthInBitsEv.constValue.get
+          )
+        else if (eb.lengthKind == Prefixed)
+          LengthExact(
+            this
+              .asInstanceOf[ElementBase]
+              .prefixedLengthElementDecl
+              .elementLengthInBitsEv
+              .constValue
+              .get
+          )
+        else
+          LengthExact(0)
+      }
+      case _ => LengthExact(0)
+    }
   }
 
+  /**
+   * The alignment of the value of the term.
+   *
+   * This is only relevant for simple elements.
+   */
+  private lazy val valueMTAApprox = {
+    this match {
+      case eb: ElementBase if eb.isSimpleType => {
+        AlignmentMultipleOf(eb.knownEncodingAlignmentInBits)
+      }
+      case _ => AlignmentMultipleOf(0)
+    }
+  }
+
+  /**
+   * This alignment is made up of the alignment of the term itself,
+   * the initiator MTA and length, the prefix length quasi
+   * element length, and the value MTA (we add it for optimization
+   * but is extremely difficult to test, as other things such
+   * as unparsers will provide MTA, even elementSpecifiedLengthApprox
+   * considers the encoding also).
+   */
   protected lazy val contentStartAlignment: AlignmentMultipleOf = {
-    if (priorAlignmentWithLeadingSkipApprox % alignmentApprox == 0) {
-      // alignment won't be needed, continue using prior alignment as start 
alignment
-      priorAlignmentWithLeadingSkipApprox
-    } else {
-      // alignment will be needed, it will be forced to be aligned to 
alignmentApprox
+    val leftFramingApprox =
       alignmentApprox

Review Comment:
   I think maybe this actually does want to take into account the 
prirorAlignmentWithLeadingSkip, but do so with the new `+` operator. For 
example, if the prior alignment was 16 and this.alignmentApprox is 8, we'll 
keep the 16-bit alignment moving forward, which is the more accurate alignment 
and still satisfies alignmentApprox.
   
   Note that the + operator has similar logic to what we used to have (i.e. if 
alignApprox divides priorAlignmentWithLeadingSkipApprox then we keep using the 
prior).



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -321,23 +476,24 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
           case LengthKind.Delimited => encodingLengthApprox
           case LengthKind.Pattern => encodingLengthApprox
           case LengthKind.EndOfParent => LengthMultipleOf(1) // NYI
-          case LengthKind.Prefixed => LengthMultipleOf(1) // NYI
+          case LengthKind.Prefixed => {
+            val prefixElem = eb.prefixedLengthElementDecl
+            if (prefixElem.lengthKind == Explicit) {
+              LengthExact(
+                prefixElem.elementLengthInBitsEv.optConstant.get.get
+              ) + prefixLengthElementLength
+            } else {
+              getEncodingLengthApprox(prefixElem)

Review Comment:
   Prefix elements dont' necessarily have encoding. They could just be binary 
numbers. And I think it's possible for prefix lengths to have 
lengthKind="prefixed", in which case this would have to be 
`LengthMultipleOf(lengthUnits)` or something?



##########
daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/AlignedMixin.scala:
##########
@@ -260,37 +399,53 @@ trait AlignedMixin extends GrammarMixin { self: Term =>
         //
         val (lastChildren, couldBeLast) = mg.potentialLastChildren
         val lastApproxesConsideringChildren: Seq[AlignmentMultipleOf] = 
lastChildren.map { lc =>
-          //
-          // for each possible last child, add its ending alignment
-          // to our trailing skip to get where it would leave off were
-          // it the actual last child.
-          //
+          // for each possible last child, gather its endingAlignmentApprox
           val lceaa = lc.endingAlignmentApprox
-          val res = lceaa + trailingSkipApprox
-          res
+          lceaa
         }
         val optApproxIfNoChildren =
           //
           // gather possibilities for this item itself
           //
-          if (couldBeLast)
+          if (couldBeLast) {
             //
             // if this model group could be last, then consider
             // if none of its content was present.
-            // We'd just have the contentStart, nothing, and the trailing Skip.
+            // We'd just have the contentStart
             //
-            Some(contentStartAlignment + trailingSkipApprox)
-          else
+            val res = Some(contentStartAlignment)
+            res
+          } else
             // can't be last, no possibilities to gather.
             None
         val lastApproxes = lastApproxesConsideringChildren ++ 
optApproxIfNoChildren
-        Assert.invariant(lastApproxes.nonEmpty)
-        val res = lastApproxes.reduce { _ * _ }
+        // take all the gathered possibilities and add the ending alignment
+        // to terminator MTA/length and our trailing skip to get where it 
would leave off were
+        // each the actual last child.
+        val lastApproxesFinal = lastApproxes.map {
+          _ * terminatorMTAApprox

Review Comment:
   This wants to be a +



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Add logic for separator/initiator/prefixed/terminator alignment [daffodil]

Reply via email to