dev book.xml extensions.xml

keiron Thu, 07 Nov 2002 00:17:07 -0800

keiron      2002/11/07 00:15:01

  Modified:    src/documentation/content/xdocs book.xml
               src/documentation/content/xdocs/dev book.xml extensions.xml
  Added:       src/documentation/content/xdocs/design architecture.xml
                        areas.xml book.xml breakpos.xml embedding.xml
                        extending.xml fotree.xml index.xml layout.xml
                        optimise.xml properties.xml renderers.xml
                        status.xml useragent.xml
  Log:
  converted design docs to forrest
  needs updating
  
  Revision  Changes    Path
  1.5       +11 -3     xml-fop/src/documentation/content/xdocs/book.xml
  
  Index: book.xml
  ===================================================================
  RCS file: /home/cvs/xml-fop/src/documentation/content/xdocs/book.xml,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- book.xml  4 Nov 2002 16:20:51 -0000       1.4
  +++ book.xml  7 Nov 2002 08:15:01 -0000       1.5
  @@ -13,32 +13,40 @@
             <menu-item label="Download" href="download.html"/>
             <menu-item label="Release Notes" href="relnotes.html"/>
             <menu-item label="Getting Help" href="gethelp.html"/>
  +          <menu-item label="Examples" href="examples.html"/>
  +        </menu>
   
  +        <menu label="Project">
             <menu-item label="Status" href="status.html"/>
             <menu-item label="Changes" href="changes.html"/>
             <menu-item label="Todo" href="todo.html"/>
  +        </menu>
   
  +        <menu label="Using FOP">
             <menu-item label="Running" href="running.html"/>
             <menu-item label="Embedding" href="embedding.html"/>
             <menu-item label="Output Formats" href="output.html"/>
             <menu-item label="Implemented" href="implemented.html"/>
             <menu-item label="Limitations" href="limitations.html"/>
  +        </menu>
   
  +        <menu label="Extras">
             <menu-item label="SVG" href="svg.html"/>
             <menu-item label="Extensions" href="extensions.html"/>
             <menu-item label="Fonts" href="fonts.html"/>
             <menu-item label="Configuration" href="configuration.html"/>
  +        </menu>
   
  +        <menu label="Developing">
             <menu-item label="Getting Involved" href="involved.html"/>
             <menu-item label="Compiling" href="compiling.html"/>
             <menu-item label="Testing" href="testing.html"/>
  +        </menu>
   
  +        <menu label="Resources">
             <menu-item label="Bugs" href="bugs.html"/>
             <menu-item label="Resources" href="resources.html"/>
             <menu-item label="License" href="license.html"/>
  -
  -          <menu-item label="Examples" href="examples.html"/>
  -
             <external label="Patch queue" 
href="http://nagoya.apache.org/bugzilla/buglist.cgi?bug_status=NEW&amp;bug_status=ASSIGNED&amp;bug_status=REOPENED&amp;email1=&amp;emailtype1=substring&amp;emailassigned_to1=1&amp;email2=&amp;emailtype2=substring&amp;emailreporter2=1&amp;bugidtype=include&amp;bug_id=&amp;changedin=&amp;votes=&amp;chfieldfrom=&amp;chfieldto=Now&amp;chfieldvalue=&amp;product=Fop&amp;short_desc=%5BPATCH%5D&amp;short_desc_type=allwordssubstr&amp;long_desc=&amp;long_desc_type=allwordssubstr&amp;bug_file_loc=&amp;bug_file_loc_type=allwordssubstr&amp;keywords=&amp;keywords_type=anywords&amp;field0-0-0=noop&amp;type0-0-0=noop&amp;value0-0-0=&amp;namedcmd=Fop+all&amp;newqueryname=fop+patch+queue&amp;tofooter=1&amp;order=Reuse+same+sort+as+last+time"/>
           </menu>
       </book>
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/architecture.xml
  
  Index: architecture.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>Architecture</title>
          <subtitle>Architecture information for FOP</subtitle>
          <authors>
              <person name="Arved Sandstrom" email=""/>
          </authors>
      </header>
  
      <body>
  
  <section>
    <title>FOP Mechanics</title>
  
  <section>
    <title>Introduction</title>
  <p>
  The overall process is controlled by <em>org.apache.fop.apps.Driver</em>.
  This class handles the FO Tree building, renderers, output and logging.
  </p>
  <p>
  The process in general is that the FO document is sent to the tree
  builder via SAX events. This creates an FO Tree. The FO Tree is then
  handled by the layout processor which converts the FO Tree into an area
  tree. This area tree is then given to the renderer and the renderer converts
  the area tree into a stream of data containing the output document.
  </p>
  </section>
  
  <section>
    <title>Formatting Object Tree</title>
  <p>
  The class <em>org.apache.fop.fo.FOTreeBuilder</em> is responsible for
  actually constructing the FO tree. The key SAX events used are </p>
  <p><code>startElement()</code>,</p>
  <p><code>endElement()</code> and <code>characters()</code>.</p>
  
  <p>All formatting objects derive from abstract class
  <em>org.apache.fop.fo.FONode</em>. The other FO classes inherit from
  <em>FONode</em> as follows:</p>
  
  </section>
  
  <section>
    <title>Rendering</title>
  <p>
  This is a separate process. The <code>render()</code> method in
  <em>Driver</em> is invoked (say,
  by <em>CommandLine</em>) with the laid-out <em>AreaTree</em> and a
  <em>PrintWriter</em> as arguments.
  This actually calls the <code>render()</code> method in a specific implementation of
  the <em>Renderer</em> interface, typically <em>PDFRenderer</em> or
  <em>AWTRenderer</em>.
  </p>
  </section>
  
  </section>
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/areas.xml
  
  Index: areas.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?> 
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>Area Tree</title>
          <subtitle>Area Tree Design for FOP</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>Area Tree</title>
  <p>
  The code to implement the area tree will attempt to match the areas
  defined in the specification. A number of optimisations may be possible
  for similar areas and groups of areas.
    </p>
    <p> 
  Since the area tree will be used during the layout by the layout managers
  it will need to store information that affects the layout. The information
  such as spacing and keeps will be held in such a way that it can be
  discarded once the layout is finalised.
    </p>
  <section>
    <title>Structure</title>
  <p> 
  The area tree is a root element that has a list of page-viewport-areas.
  Each page viewport has a page-reference-area which holds the contents of
  the page. To handle the processing better FOP does not maintain a list
  at the root level but lets another class handle each page as it is added.
    </p>
    </section>
  <section>
    <title>Page</title>
  <p>
  A page is made up of five area regions. These are before, start, body,
  end and after. Each region has a viewport and contains the areas
  produced from the children in the FO object heirarchy.
    </p>
    <p>
  For the body area there are more subdivisions for before floats,
  footnotes and the main reference area. The main reference area is
  made from span areas which have normal flow reference areas as
  children. The flow areas are then created inside these normal flow
  reference areas.
    </p>
    <p>
  Since the layout is done inside a page, the page is created from the
  pagemaster with all the appropriate areas. The layout manager then
  uses the page to add areas into the normal flow reference areas
  and floats and footnotes. After the layout of the body region
  is complete then the other regions can be done.
    </p>
    </section>
  <section>
    <title>Block Areas</title>
  <p>
  Block areas are created and/or returned by all top level elements
  in the flow. These areas have keep and spacing information that
  needs to be retained until the page is finalised. A block area
  is stacked with other block areas in a particular direction, it
  has a size and it contains either line areas made from a group
  of inline areas or block areas.
    </p>
    <p>
  A block area can also be split into two block areas by splitting
  between two line areas or splitting between two block areas (or
  groups) that are stacked in the block progression direction of
  the page. The split may also be in a child block area.
    </p>
    </section>
  <section>
    <title>Line Areas</title>
  <p>
  A line areas is simply a collection of inline areas that are stacked
  in the inline progression direction. A line area has a height and
  width. It also contains information about floats and footnotes
  that are associated with the inline areas.
    </p>
    <p>
  A line area gets a set of inline areas added until complete then
  it is justified and vertically aligned. If the line area contains
  unresolved areas it will retain the justification information
  until all areas are resolved.
    </p>
    </section>
  <section>
    <title>Inline Areas</title>
  <p>
  There are a few different types of inline areas. All inline areas
  have a height. Their width may be variable until the line is
  finalised.
    </p>
    <p>
  Unresolved areas can reserve some space to allow for possible
  sizes once it is resolved. Then the line can be re-justified
  and finalised.
    </p>
    </section>
  <section>
    <title>Cloning</title>
  <p>
  Any subtree of the area tree should be cloneable so that for
  areas that are repeated the area tree can simply be copied rather
  than going through the layout again. This will only work if the
  width is the same.
    </p>
    <p>
  Resolveable areas may be converted into an unresolved form.
    </p>
    </section>
  <section>
    <title>Classes</title>
  <p>
  The following class structure will be used to represent the area
  tree.
    </p>
  <section>
    <title>Page Area Classes</title>
  <p>
  The page area classes hold the top level layout of a page. The
  areas are created by the page master and should be ready to have
  flow areas added.
    </p>
    </section>
  <section>
    <title>Block Area Classes</title>
  <p>
  The block areas typically hold either a set of line areas or a set of
  block areas. The child areas are usually stacked in a particular
  direction.
    </p>
    <p>
  Areas for tables and lists have their child block areas stacked
  in different ways. Lists also can have spacing between the block
  areas.
    </p>
    </section>
  <section>
    <title>Inline Area Classes</title>
  <p>
  The inline areas are used to make up a line area. An inline area
  typically has a height, width and some content. The alignment is
  used for block progression direction displacement and to determine
  the height of a line.
    </p>
    </section>
    </section>
  
  <section>
    <title>Rendering Area Tree</title>
  <p>
  The rendering of an area tree is done by rendering each page
  to a suitable output. The regions are rendered in order and each
  region is contained by a viewport.
    </p>
    <p>
  The relevent structures that will need to be rendered are:
  Page
  Viewport
  Region
  Span
  Block
  Line
  Inline
    </p>
    <p>
  The renderer will need to be able to:
         <ul>
         <li><p>
  render each individual page
         </p></li>
         <li><p>
  clip and align child areas to a viewport
         </p></li>
         <li><p>
  handle all types of inline area, text, image etc.
         </p></li>
         <li><p>
  draw various lines and rectangles
         </p></li>
         </ul>
    </p>
    <p>
  An abstract renderer will be able to handle the generic positioning
  of child areas, iterating through areas that have child areas.
    </p>
    </section>
    </section>
  
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/book.xml
  
  Index: book.xml
  ===================================================================
  <?xml version="1.0" encoding="UTF-8"?>
  <!DOCTYPE book PUBLIC "-//APACHE//DTD Cocoon Documentation Book V1.0//EN" 
"book-cocoon-v10.dtd">
  
  <book software="FOP"
      title="FOP Design"
      copyright="@year@ The Apache Software Foundation"
      xmlns:xlink="http://www.w3.org/1999/xlink";>
  
      <menu label="About">
        <menu-item label="Index" href="index.html"/>
      </menu>
      <menu label="Overview">
        <menu-item label="Architecture" href="architecture.html"/>
      </menu>
      <menu label="Sections">
        <menu-item label="FO Tree" href="fotree.html"/>
        <menu-item label="Propterties" href="properties.html"/>
        <menu-item label="Layout" href="layout.html"/>
        <menu-item label="Area Tree" href="areas.html"/>
        <menu-item label="Renderers" href="renderers.html"/>
      </menu>
      <menu label="Miscellaneous">
        <menu-item label="Embedding" href="embedding.html"/>
        <menu-item label="Extending" href="extending.html"/>
        <menu-item label="Break Possibility" href="breakpos.html"/>
        <menu-item label="Optimisations" href="optimise.html"/>
        <menu-item label="User Agent" href="useragent.html"/>
      </menu>
      <menu label="Status">
        <menu-item label="Status" href="status.html"/>
      </menu>
  </book>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/breakpos.xml
  
  Index: breakpos.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>Layout Managers</title>
          <subtitle>Break Possibility Proposal</subtitle>
          <authors>
              <person name="Karen Lease" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>Introduction</title>
  <p>
  As explained in <link href="layout.html">Layout</link>,
  the hierarchy of Layout Managers is responsible for building and placing
  areas. Each Layout Manager is responsible for creating and filling
  areas of a particular type, either inline or block. This document
  explains one potential algorithm for this process. It is based on the
  the generation of <em>break possibilities</em> (BP for short). The
  Layout Managers (LM for short), will generate one or more BP and
  choose the best one. The BP is then used to generate the corresponding
  areas.
  </p>
  </section>
  <section>
    <title>Anatomy of a Break Possibility</title>
  <p>A break possibility is represented by the BreakPoss class. A
  BreakPoss contains size information in the stacking direction and in
  the
  non-stacking direction (at least for inline areas, it must have both). Flags
  indicating various conditions (ISFIRST, ISLAST, CAN_BREAK_AFTER,
  FORCE_BREAK_AFTER, ANCHORS etc). A BreakPoss contains a reference to
  the top-level LayoutManager which generated it.
  </p> 
  <p>A BreakPoss contains an object implementing
  the BreakPoss.Position interface. This object is specific to the layout
  manager which created the BreakPoss. It should indicate where the
  break occurs and allow the LM to 
  create an area corresponding to the BP. A higher level LM Position
  must somehow reference or wrap the Position returned by its child LM in its
  BreakPoss object. The layout manager  modifies the flags and dimension
  information in the BP to reflect its own requirements. For example an
  inline FO layout manager might add space-start, space-end, border and
  padding values to the stacking or non-stacking dimensions. It might also
  modify the flags based its on keep properties.</p>
  </section>
  <section>
    <title>Turning Break Possibilities into Areas</title>
  <p>Once break possibilities have been generated, the galley-level
  layout manager selects the best one
  and passes it back to the LayoutManager which generated it to create
  the area. A LayoutManager is responsible for
  storing enough information in its Position objects to be able to
  create the corresponding areas.</p>
    </section>
  <section>
    <title>A walk-through</title>
  <p>Layout Managers are created from the top down. First the 
  page sequence creates a PageLM and a FlowLM. The PageLM will manage
  finding the right page model (with help from the PageSequenceMaster)
  and managing the balancing act between before-floats, footnotes and
  the normal text flow. The FlowLM will
  manage the normal content in the main flow. We can think of it as a
  <em>galley</em> manager.
  </p>
  <p>In general, each LM asks its child LMs to return sucessive
  break possibilities. It passes some
  information to the child in a flags object and it gets back
  a break possibility which contains the size in
  the stacking direction as well as information about such things as
  anchors, break conditions and span conditions which can change the
  reference area environment. This process continues down to the lowest
  level of the layout manager hierarchy which corresponds to atomic
  inline-level FOs such as characters or graphics.
  </p>
  <p>
  Each layout manager will repeatedly call getNextBreakPoss on its current
  child LM until the child returns a BP with the ISLAST
  flag set. Then the layout manager moves on to its next child LM (ie,
  it asks the next child FO to generate a layout manager.) Galley level
  layout managers which are Line and Flow will return to their parent
  layout managers either when they have finished their content or when
  they encounter a a BP which will fill one of their areas.
  </p>
  <p>The break possibilities are generated from the bottom up.
  All inline content must first be broken into
  lines which are then stacked into block areas. This is done by the
  LineLayoutManager, which creates line areas.
  The LineLM asks its child LM to generate a break possibility, which
  represents a place where the line can end. This
  initially means each potential line-end (primarily spaces or forced
  linefeeds and a few other potential line-end characters such as hard
  hyphens.) The text LM returns an object which stores the size in the
  stacking direction as a MinOptMax triplet
  and a <em>cost</em>, which is based on how well this break
  would satisfy the constraints. The Text LM keeps track of its position in
  the text content and returns the total size of the text area it would
  create if it were to break at a given point. The returned BP
  object also contains information about whether the break is forced
  (linefeed) or whether this is the last area which can be generated by
  the LM (ISLAST flag). If a textFO ends on a non-break character, the
  ISLAST flag is set, but the CAN_BREAK_AFTER flag isn't, since we don't
  know if there is any following text in another inline object for
  example.
  </p>
  <p>Variable size content is taken into account from
  the bottom up. Each LM returns a range of sizes in the stacking
  direction, based on property values. For text, this comes from
  variable word-space values or letter-space values. For other inline
  objects, it may include variable space-start and space-end values
  (after calculation of the entire sequence of space specifiers at a
  particular break possibility.)</p>
  <p>The main constraint for laying out
  lines is the available inline-progression-dimension (IPD) for the line
  area to be created. This
  depends on the IPD of the reference area ancestor, on the indents of the
  containing fo:block, and on any side-floats which may be intruding on
  this line.</p>
  <note>See below <link href="#getRefIPD">Getting the Reference
  IPD</link>
  for discussion of how the reference area IPD is
  transmitted to the Line LM.</note>
  <p>For now, let's assume that only the LineLM knows about the IPD
  available to it. Therefore only it can make a decision about which BP
  is the best one; the lower level inline layout managers can only
  return potential break points.</p>
  <note>There are certainly optimizations to this model which can be
  examined later.</note>
  <p>So the Line LM will ask its child LM(s) for break possibilities until
  it gets back a BP whose stacking dimension <em>could</em> fill the
  line. This means that the BP.stackdim.max >= LineIPD.min. It can look
  for further BP, perhaps one whose stackdim.opt is closer to the
  LineIPD.opt. If it isn't happy with the choice of break possibilities,
  it can go past the end of the line to the next one, and then try to
  find a hyphenation point between the last one which fits and the first
  one which doesn't. If no possibility is found whose min/max values
  enclose the available IPD, some constraint will be violated (and
  reported in the log.) The actual strategy is up to the Line LM and
  should be able to be easily replaced without changing the architecture
  (Strategy pattern).
  </p>
  <p>The definition of a good break possibility depends on the
  properties at the block and inline level which govern things such as
  wrapping behavior and justification mode. For example, if lines are
  not to be wrapped, only an explicit linefeed can serve as a BP. If
  lines are wrapped but not justified then there is no requirement to
  completely fill the IPD on each line, but a sophisticated layout
  manager will try to achieve "aesthetic rag".
  </p>
  <p>Note that no areas have actually been created yet. Once the LineLM
  has found a potential break point for the inline content, it can
  calculate the total size of the line area which would be created. The
  size in the IPD is determined by the Line LM based on the chosen BP.
  The size of the line area in the the block-progression-dimension
  depends on the size of the text (or other inline content). These
  values are set by the inline-level LM
  in their returned BP (in terms of ascender and descender heights with
  respect to the baseline). The LineLM adds spacing implied by the
  current line-stacking strategy and line-height property values. It
  stores a reference to the chosen inline BP and "wraps" that in its own
  Position object which it stores in the BP it returns to its parent LM
  (the block layout manager).
  </p><p>The block LM now has a potential break position after its
  first line. It assigns that possibility a cost, based on widow, orphan
  and keep properties. It can also calculate the total size of the block
  area it would create, were it to end the area after this line. It does
  this by adding any padding and border (taking into account
  conditionality). It also calculates space-before and space-after
  values, or contributes to building up a sequence of such values.
  With this information, the block LM creates a new BP (or
  updates the existing one). It stores a Position object in this
  BP which wraps the returned BP from its child Line LM.
  It returns the new BP to its parent and so on, back up to the
  FlowLM.</p>
  <p>Obviously there is more complicated logic involved when dealing
  with lists and tables. These cases need to be walked through in detail.</p>
  <p>The FlowLM sees if the returned stacking dimension will still
  fit in its available block-progression-dimension (BPD). It repeatedly calls
  getNextBreakPoss on its
  child LMs until it reaches the maximum BPD for the flow reference area
  or until there is no more content to lay out. If one child LM is
  finished, it moves on to the next until the last child LM has returned
  a BP with the ISLAST flag set. If any child LM returns a
  BP with a FORCE_BREAK_BEFORE or SPAN flag set, the FlowLM will
  force layout of any pending break possibilities and return to its
  parent (the PageLM) in order to handle the break or span condition.</p>
  <p>If the returned BP has any new before-float or footnote anchors in
  it (ANCHOR flag in the
  BP), the FlowLM will also return to the PageLM. The PageLM must then
  try to find space to place the floats, possibly asking the FlowLM for
  help if the body contains multiple columns.</p>
  </section>
  <section>
    <title>Some issues</title>
  <p>Following are a few remarks on specific issues.</p>
  <section>
    <title>Where Line Layout Managers are created</title>
  <p>If the first child FO in a block FO is an inline-level FO
  such as text, the block LM creates an intermediate level LineLM
  to layout the
  sequence of inline content into Lines. Note that the whole sequence of
  inline FOs is managed by a single instance of LineLM. The LineLM
  becomes the parent to the various inline-level LM created by each
  individual inline FO.
  Since an fo:block can have both block and inline content, its LM
  may create a sequence of intermixed BlockLM and LineLM.</p>
  </section>
  <section id="getRefIPD">
    <title>Getting the reference IPD</title>
  <p>When the layout process starts, with the FlowLM asking its first
  child LM for a break possibility, the IPD isn't known, since we don't
  know whether
  the first FO might be spanning, or on which page it might start. (Of
  course, if all page masters in the sequence have the same region-body IPD
  and all have only a single column, the IPD will never change
  and could already be calculated before starting layout.)
  The FlowLM gets its
  first child LM and calls its getNextBreakPoss method. That is a child LM for
  some block-level FO. For now, suppose it's an fo:block. The BlockLM
  will create its first child LM, which may be another block-level LM in
  the case of nested blocks or a LineLM as explained above. (Question:
  do we need a START flag for layout status?)
  </p>
  <p>We keep calling getNextBreakPoss on lower level layout managers until we
  get down to the inline level or to a level which cannot have break-before
  properties, such as a list-item-label. At that point, we assume we are
  going to have to layout some actual content. But we can't do that yet
  since we don't know the inline-progression-dimension. So we return a
  BP object which has 0 size in the stacking dimension, but which
  has flags set to signal to
  higher-level layout managers what needs to be done. If it has a break-before
  property or a span property, it stores these in the BP. If
  no reference IPD is yet defined, it sets a flag to get that. It then
  returns to its parent. The parent LM will inspect the BP object
  returned. In general, it "wraps" it with information about its own
  needs. If the returned BP is not actually returning any potential
  areas, the LM can still add information about its own break or span
  requirements. This return path continues back up to the PageLM. It
  will then check break and span requirements and create a new page
  if necessary using the appropriate page-master. At that point, the
  reference IPD for the main
  flow is known and is set in the flags object used for
  the next getNextBreakPoss call to the lower level LM.
  </p><p>Using this information, the BlockLM parent can now calculate
  the available IPD for its LineLM child, based on its indents.
  (If there are any
  side-floats information about the intrusion must be passed down by the
  FlowLM to lower level managers.) The LineLM can now generate a series
  of BreakPoss objects, which it passes back to its parent LM.
  </p>
  </section>
  <section>
    <title>Hyphenation</title>
  <p>
  The LineLM is responsible for initiating hyphenation if it is allowed
  by the properties and if no satisfactory BP can be found without
  hyphenating. The hyphenation manager is passed two break
  possibilities, one whose IPD is less than the desired line area IPD
  and one whose IPD is greater. These break possibilities might have
  been generated by different inline-level layout managers (text + a
  wrapper with a color change for example), though
  frequently they represent two positions in a single text run.
  If hyphenation is successful, a new BP is
  returned. The LineLM may look for several intermediate BP
  based on the "cost" of the returned possibilities. If no intermediate
  BP is found, the line will be "short", the white-space stretch will be
  exceeded, or perhaps the content will be overflowed or clipped,
  depending on various property settings.</p>
  </section>
  <section>
    <title>Optimizing</title>
  <p>It obviously seems inefficient to go down to the lowest level
  LM and back up to the FlowLM for every possible line-break
  decision. It seems like it would be possible to optimize by letting
  the lower level layout managers run until they had exceeded the
  current limit in
  the stacking direction. They would then return control to the "galley"
  level (LineLM or FlowLM) which would fine-tune the break decision by
  asking the lower level LM to find a previous BP which would fit. At
  the inline level, this means hyphenation as described above.</p>
  <p>Another interesting question is at what point pending break
  possibilities can be turned into areas.The idea is to wait until we
  are sure we won't have to redo the breaking. This depends on the
  sophistication of the layout strategy. For example, if a
  linebreak can be considered final if the line is full and there are no
  anchors on the line, we could create the LineArea at that point. But
  if we are willing to change a previous line-end decision to get a
  better overall composition of a whole group of lines (to prevent multiple
  hyphens for example), we might wait until the LineLM had finished
  laying out all its material and then make all the Lines at once.</p>
  </section>
  </section>
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/embedding.xml
  
  Index: embedding.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>Embedding Design</title>
          <subtitle>Design Approach to FOP</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>Introduction</title>
  <p>
  This is the design for the external interface when FOP is to be embedded
  inside another java application.
    </p>
    <p>
  Common places where FOP is embedded is in a report production application
  of a server side application such as <jump 
href="http://xml.apache.org/cocoon/index.html";>Cocoon</jump>.
     </p>
    </section>
  <section>
    <title>Settings</title>
  <section>
    <title>User Agent</title>
  <p>
  The user agent is responsible for supplying user or context
  specific information. The list of user agent values can be found on the
  <jump href="useragent.html">User Agent</jump> page.
     </p>
    </section>
  <section>
    <title>Logging</title>
  <p>
  <ul>
  <li>logging level</li>
  <li>logging messages of various levels</li>
  <li>error handling</li>
  <li>Logging setup (LogKit, Log4J, JDK14Logging)</li>
  </ul>
     </p>
    </section>
  <section>
    <title>XML input</title>
  <p>
  <ul>
  <li>various ways to supply FOP with the xsl:fo file, fo, xml+xsl</li>
  <li>sax handler</li>
  </ul>
     </p>
    </section>
  <section>
    <title>general options</title>
  <p>
  <ul>
  <li>base directory</li>
  <li>uri resolvers</li>
  <li>which implementation of a particular</li>
  <li>LayoutManager to use</li>
  </ul>
     </p>
    </section>
  <section>
    <title>Rendering Options</title>
  <p>
  <ul>
  <li>embedding fonts</li>
  <li>compression in pdf</li>
  <li>image embedding</li>
  </ul>
  </p>
  <p>
  for the PS renderer (eventually):
  <ul>
  <li>PostScript Level</li>
  <li>PPD to use</li>
  <li>binary/ascii switch</li>
  </ul>
     </p>
    </section>
  <section>
    <title>User Agent</title>
  <p>
  Output from FOP:
  - Generation statistics: Number of pages total, Number of pages of each
    page-sequence, page-master used for each page (could be used to
    control the paper bin to get paper from, important for me in
    conjunction with PS Renderer).
     </p>
    </section>
  <section>
    <title>Setting Up</title>
  <p>
  The Driver handles the XML input.
  The user agent information is through the FOUserAgent.
  We could handle logging through the user agent.
  Options could also be handled through the user agent, using mime type
  selection for renderer options.
  </p>
    </section>
  <section>
    <title>Others</title>
  <p>
  render to more than one renderer at once (maybe not from the command line).
  For example you could generate a PDF for the archive
  and the PS for the printer in one run. It would probably be faster than
  converting the PDF to PostScript afterwards.
  </p>
  <p>
  Several code pieces for resolving URLs and/or
  file locations are scattered all over FOP and Batik. These should
  be replaced with an URIResolver invocation to unify behaviour and
  remove redundancies.
     </p>
    </section>
  </section>
  
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/extending.xml
  
  Index: extending.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>FOP Extensions</title>
          <subtitle>Adding extensions to FOP</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header> 
  
      <body>
  <section>
    <title>Extensions</title>
  <p>
  FOP provides an extension mechanism to add extra functionality. There
  are a number of different types of extensions that apply to different
  steps when converting FO into the rendered output.
    </p>
  <section>
    <title>Extensions</title>
    <p>
  SVG Graphic - This applies to svg and any other xml document that
  can be converted into svg in the output. All that is required is
  the element mapping for the xml and a converter that changes the
  document into svg. This conversion is done in the FO Tree. The
  conversion is done by the top level element of the namespace
  or in the case of an external image a Converter.
    </p>
    <p>
  XML Document - Instead of converting the document into svg it
  can be passed directly to the renderer. The renderer will need
  to have a handler for the xml document. This handler can add
  information directly to the output document.
    </p>
    <p>
  Output Document - This is used to add document level information
  to the output result. Such an extension will set information that
  is passed to the output document. There needs to be a handler for
  the output information which creates a document level result.
    </p>
    <p>
  FO Area - This is where an extension creates an normal area in
  the Area Tree. This is useful when the normal FO objects
  cannot create the area in the way that is needed.
    </p>
    <p>
  Resolveable - In some cases it may require information to be
  resolved for information such as page numbers. This can apply
  to the XML Document, FO Area or output document extensions.
     </p>
    <p>
  - Add a string ['(Continued)'] to a table header if the table spans
  multiple pages. These tables are part of the content and can start
  anywhere in the page.
     </p>
    <p>
  - Separate page number display for a subsection. ie. - master document
  is page 4 of 7, but subsection is page 2 of 3.
  
     </p>
  </section>
  <section>
    <title>Examples</title>
    <p>
  Plan - The plan extension is a simple SVG graphic extension.
  Given a plan document either inside an InstreamForeignObject
  or as an external graphic, it converts the plan document into
  an svg graphic. The svg graphic is then passed through the
  Area Tree to the Renderer. The Renderer then renders the svg
  graphic as normal.
     </p>
    <p>
  PDF Outline - This is output document extension. If rendering to
  pdf and this extensionis used then the bookmark information is
  passed to the pdf document. This information is then set on the
  document.
     </p>
    <p>
  PDF Additions - This can be done with an XML Document extension.
  A simple xml document is defined that provides the appropriate
  information. When the document is rendered a handler converts the
  document into PDF markup.
     </p>
    <p>
  eg.
  <source><![CDATA[<my:script-link 
script="app.execMenuItem('AcroSrch:Query');">Search</my:script-link>]]></source>
  
  to result in a text box referencing the following PDF action:
  <source><![CDATA[<< /S /JavaScript /JS (app.execMenuItem("AcroSrch:Query");) 
>>]]></source>
  
     </p>
  
  </section>
  
    </section>
  
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/fotree.xml
  
  Index: fotree.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document> 
      <header>
          <title>FO Tree</title>
          <subtitle>Design of FO Tree Structure</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>Introduction</title>
  <p> 
  The FO Tree is an internal representation of the input FO document.
  The tree is created by building the elements and attributes from
  the SAX events.
    </p>
    <p>
  The FO Tree is used as an intermediatory structure which is converted
  into the area tree. The complete FO tree should not be held in memory
  since FOP should be able to handle FO documents of any size.
     </p>
  
  <section>
    <title>FONode</title>
  <p>
  The class inheritance described above only describes the nature of the
  content. Every FO in FOP also has a parent, and a Vector of children. The
  parent attribute (in the Java sense), in particular, is used to enforce
  constraints required by the FO hierarchy.
  </p>
  
  <p>
  FONode, among other things, ensures that FO's have a parent, that they
  have children, that they maintain a marker of where the layout was up to
  (for FObj's it is the child number, and for FOText's it is the character
  number), and that they have a <code>layout()</code> method.
  </p>
  </section>
  
  <section>
    <title>Making FO's</title>
  <p>
  An FO maker is read from a hashmap lookup using the namespace and
  element name. This maker is then used to create a new class that
  represents an FO element. This is then added to the FO tree as a child
  of the current parent.
  </p>
  
  
  <p>
  Properties (recall that FO's have properties, areas have traits, and XML
  nodes have attributes) are also a concern of <em>FOTreeBuilder</em>. It
  accomplishes this by using a <em>PropertyListBuilder</em>. There is a
  separate <em>PropertyListBuilder</em> for each namespace encountered
  while building the FO tree. Each Builder object contains a hash of
  property names and <ref>their</ref> respective makers. It may also
  contain element-specific property maker hashes; these are based on the
  <em>local name</em> of the flow object, ie. <em>table-row</em>, not
  <em>fo:table-row</em>. If an element-specific property mapping exists,
  it is preferred to the generic mapping.</p>
  <p>The base class for all
  properties is <em>Property</em>, and all the property makers extend
  <em>Property.Maker</em>. A more complete discussion of the property
  architecture may be found in <jump href="properties.html">Properties</jump>.
  </p>
  </section>
  
  <section>
    <title>Foreign XML</title>
  <p>
  FOP supports the handlingof foreign XML.
  The XML is converted internally into a DOM, this is then available to
  the FO tree to convert the DOM into another format which can be rendered.
  In the case of SVG the DOM needs to be created with Batik, so an element
  mapping is used to read all elements in the SVG namespace and pass them
  into the Batik DOM.
  </p>
  </section>
  
  <section>
    <title>Extensions</title>
  <p>
  It is possible to add extensions to FOP so that you can extend the ability of
  FOP with respect to render output, document specific information or extended
  layout functionality.
  </p>
  </section>
  
    </section>
  
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/index.xml
  
  Index: index.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>FOP Design</title>
          <subtitle>Design Approach to FOP</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>Introduction</title>
  <p>
  The information here describes the design and architecture details for FOP.
  Currently this is part of a redesign process for some of the core parts of
  FOP.
    </p>
    <p>
  The redesign is mainly focusing on some particular process involved
  with the layout process when converting the FO tree into the Area Tree.
    </p>
  <section>
    <title>Aims</title>
    <p>
  The main aim for FOP is to comply with the spec and to be able to
  process files of any size.
     </p>
    <p>
  In achieving this aim we need to understand the problem and break it
  into small problems that can be solved.
     </p>
    <p>
  <ul>
  <li>use SAX as input</li>
  <li>process FO elements ASAP</li>
  <li>dispose of unused memory, keep memory minimal</li>
  <li>layout handles floats, footnotes and keeps in a simple straight forward way</li>
  <li>id references are kept simple</li>
  <li>pages are rendered ASAP, can be cached until resolved</li>
  <li>renderers are totally responsible for their output format</li>
  <li>output is sent to a stream</li>
  </ul>
     </p>
  </section>
    </section>
  
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/layout.xml
  
  Index: layout.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>Layout</title>
          <subtitle>Layout Process in FOP</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>FO Layout</title>
  <p>
  The aim of the layout system is to be self contained and allow for
  easy changes or extensions for future development. For example the
  line breaking should be decided at a particular point in the process
  that makes it easier to handle other languages.
    </p>
    <p>
  The layout begins once the hierarchy of FO objects has been constructed.
  Note: it may be possible to start immediately after a block formatting
  object has been added to the flow but this is not currently in the scope
  of the layout. It is also possible to layout all pages in a page sequence
  after each page sequence has been added from the xml.
    </p>
    <p>
  The layout process is handled by a set of layout managers. The block
  level layout managers are used to create the block areas which are
  added to the region area of a page.
    </p>
  <section>
    <title>Layout Managers</title>
    <p>
  The layout managers are set up from the hierarchy of the formatting
  object tree. A manager represents a hierachy of area producing objects.
  A manager is able to handle the block area(s) that it creates and
  organise or split areas for page breaks.
    </p>
    <p>
  Normally any object that creates a block area will have an associated
  layout manager. Other cases are tables and lists, these objects will
  also have layout managers that will manager the group of layout managers
  that make up the object.
    </p>
    <p>
  A layout manager is also able to determine height (min/max/optimum)
  and keep status. This will be used when organising the layout on
  a page. The manager will be able to determine the next place a break
  can be made and then be able to organise the height.
    </p>
    <p>
  A layout manager is essentially a bridge between the formatting objects
  and the area tree. It will keep a list of line areas inside block areas.
  Each line area will contain a list of inline areas that is able to be
  adjusted if the need arises.
    </p>
    <p>
  The objects in the area tree that are organised by the manager will mostly
  contain the information about there layout such as spacing and keeps, this
  information will be thrown away once the layout for a page is finalised.
     </p>
    </section>
  <section>
    <title>Creating Managers</title>
    <p>
  The managers are created by the page sequence. The top level manager
  is the Page manager. This asks the flow to add all managers in this
  page sequence.
    </p>
    <p>
  For block level objects they have a layout manager. Neutral objects
  don't represent any areas but are used to contain a block level
  area and as such these objects will ask the appropriate child to
  create a layout manager.
    </p>
    <p>
  Any nested block areas or inline areas may be handled by the layout
  manager at a later stage.
     </p>
    </section>
  <section>
    <title>Using Managers</title>
    <p>
  Block area layout managers are used to create a block area, other block
  level managers may ask their child layout managers to create block areas
  which are then added to the area tree (subset).
    </p>
    <p>
  A manager is used to add areas to a page until the page is full,
  then the manages contain all the information necessary to make
  the decision about page break and spacing. A manager can split an
  area that it has created will keep a status about what has been
  added to the current area tree.
     </p>
    </section>
  <section>
    <title>Page Layout</title>
    <p>
  Once the Page layout manager, belonging to the page sequence, is ready
  then we can start laying out each page. The page sequence will create
  the current page to put the page data, the next page and if it exists
  a last page.
    </p>
    <p>
  The current page will have the areas added to it from the block layout
  managers. The next page will be used when splitting a block that goes
  over the page break. Note: any page break overrides the layout decided
  here. The last page will be necessary if the last block area is added
  to this page. The size of the last page will be considered and the
  areas will be added to the last page instead.
    </p>
    <p>
  The first step is to add areas to the current page until the area is full
  and the lines of the last block area contain at least n(orphans) and at least
  n(orphans) + n(widows) in total. This will only be relevant for areas at
  the start or end of a particular reference area.
     </p>
    <p>
     <image src="page.svg"/>
    </p>
    <p>
  The spacing between the areas (including spacing in block areas inside
  an inline-container) will be set to the minimum values. This will allow
  the page to have at least all the information it needs to organise the
  page properly.
    </p>
    <p>
  This should handle the situation where there are keeps on some
  block areas that go over the end of the page better. It is possible that
  fitting the blocks on the page using a spacing between min and optimum
  would give a closer value to the optimum than putting the blocks on the
  next page and the spacing being between optimum and max. So if the objects
  are placed first at optimum then you will need to keep going to see if
  there is a lower keep further on that has a spacing that is closer to the
  optimum.
    </p>
    <p>
  The spacing and keep information is stored so that the area positions
  and sizes can be adjusted.
    </p>
    </section>
  <section>
    <title>Balancing Page</title>
    <p>
  The page is vertically justified so that it distributes the areas
  on the page for the best result when considering keeps and spacing.
    </p>
    </section>
  <section>
    <title>Finding Break</title>
    <p>
  First the keeps are checked. The available space on the page may have
  changed due to the presence of before floats or footnotes. The page break
  will need to be at a height &lt;= the available space on the page.
    </p>
    <p>
  A page break should be made at the first available position that
  has the lowest keep value when searching from the bottom. Once the first
  possible break is found then the next possible break, with equally low
  keep value, is considered. If the height of the page is closer to the
  optimal spacing then this break will be used instead.
    </p>
    <p>
  Keep values include implicit and explicit values when trying to
  split a block area into more than one area. Implicit keeps may
  be such things as widows/orphans.
    </p>
    <p>
  If the page contains before floats or footnotes then as each area or line
  area is removed the float/footnote should also be removed. This will
  change the available space and is a one way operation. The footnote
  should be removed first as a footnote may be placed on the next page.
  The lowest keep value may need to be reassessed as each conditional
  area is removed.
    </p>
    <p>
  The before float and footnote regions are managed so that the separator
  regions will be present if it contains at least one area.
    </p>
    </section>
  <section>
    <title>Optimising</title>
    <p>
  Once the areas for the page are finalised then the spacing will
  need to be adjusted. The available height on the page is compared
  with the min and max spacing. All of the spacing in all the areas
  on the page is then adjusted by the appropriate percentage value.
    </p>
    </section>
  <section>
    <title>Multi-Column Pages</title>
    <p>
  In the case of multi-column pages the column breaks and eventually
  the page break must be found in a slightly different way.
    </p>
    <p>
  The columns need to be layed out completely from first to last but
  this can only be done after a rough estimate of all the elements
  on the page in case of before floats or footnotes.
    </p>
    <p>
  So first the complete page is layed out with all columns filled
  with areas and the spacing at a minimum. Then if there are any
  before floats or footnotes then the availabe space is adjusted.
  Then each the best break is found for each column starting from
  the first column. If any before floats or footnotes are removed
  as a result of the new breaks and optimised spacing then all the
  columns should still be layed out for the same column height.
    </p>
    </section>
  <section>
    <title>Completing Page</title>
    <p>
  After the region body has been finished the static areas can be
  layed out. The width of the static area is set and the height is
  inifinite, that is all block areas should be placed in the area
  and their visibility is controlled be other factors.
    </p>
    <p>
  The area tree for the region body will contain the information
  about markers that may be necessary for the retrieve marker.
    </p>
    <p>
  The ordering of the area tree must be adjusted so that the areas are
  before, start, body, end and after in that order. The body region
  should be in the order before float, main then footnote.
    </p>
    </section>
  <section>
    <title>Line Areas</title>
    <p>
  Creating a line areas uses a similair concept. Each inline area
  is placed across the available space until there is no room left.
  The line is then split by considering all keeps and spacing.
    </p>
    <p>
  Each word (group of adjacent character inline areas) will have keeps
  based on hyphenation. The line break is at the lowest keep value
  starting from the end of the line.
    </p>
    <p>
  Once a line has been layed out for a particular width
  then that line is fixed for the page (except for unresolved
  page references).
    </p>
    </section>
  <section>
    <title>Before Floats and Footnotes</title>
    <p>
  The before float region and footnote region are handled by the page
  layoutmanger. These regions will handle the addition and removal
  of the separator regions when before floats/footnotes area added
  and removed.
    </p>
    </section>
  <section>
    <title>Side Floats</title>
    <p>
  If a float anchor is present in a particular line area then the available
  space for that line (and other in the block) will be reduced. The side float
  adds to the height of the block area and this height also depends
  on the clear value of subsequent blocks. The keep status of the block is
  also effected as there must be enough space on the page to fit the
  side float.
    </p>
    <p>
  <image src="float.svg"/>
    </p>
    </section>
  <section>
    <title>Unresolved Areas</title>
    <p>
  Once the layout of the page is complete there may be unresolved areas.
    </p>
    <p>
  Page number citations and links may require following pages to be
  layed out before they can be resolved. These will remain in the
  area tree as unresolved areas.
    </p>
    <p>
  As each page is completed the list of unresolved id's will be checked
  and if the id can be resolved it will be. Once all id's are resolved
  then the page can be rendered.
    </p>
    <p>
  Each page contains a map of all unresolved id's and the corresponding
  areas.
    </p>
    <p>
  In the case of page number citations. The areas reserves the equivalent
  of 3 number nines in the current font. When the area is resolved
  then the area is adjusted to its proper size and the line area is
  re-aligned to accomodate the change.
    </p>
    </section>
  <section>
    <title>ID and Link Areas</title>
    <p>
  Any formatting object that has an ID or any inline link defines an area
  that will be required when rendering and resolving id references.
    </p>
    <p>
  This area is stored in the parent area and may be a shape that exists
  in more than one page, for example over a page break. This shape consists
  of the boundary of all inline (or block) areas that the shape is defined
  for.
    </p>
    </section>
  <section>
    <title>Inline Areas</title>
    <p>
  This is the definition of all inline areas that will exist in the
  area.
    </p>
    </section>
  <section>
    <title>Fixed Areas</title>
    <p>
  instream-foreign-object, external-graphic, inline-container
    </p>
    <p>
  These areas have a fixed width and height. They also have a viewport.
    </p>
    </section>
  <section>
    <title>Stretch Areas</title>
    <p>
  leader, inline space
    </p>
    <p>
  These areas have a fixed height but the width may vary.
    </p>
    </section>
  <section>
    <title>Character Areas</title>
    <p>
  character
    </p>
    <p>
  This is an simple character that has fixed properties according to
  the current font. There are implicit keeps with adjacent characters.
    </p>
    </section>
  <section>
    <title>Anchor Areas</title>
    <p>
  float anchor, footnote anchor
    </p>
    <p>
  This area has no size. It keeps the position for footnotes and floats
  and has a keep with the associated inline area.
    </p>
    </section>
  <section>
    <title>Unresolved Page Numbers</title>
    <p>
  page-number-citation
    </p>
    <p>
  A page number area that needs resolving, behaves as a character and
  has the space of 3 normal characters reserved. The size will adjust
  when the value is resolved.
    </p>
    </section>
  <section>
    <title>Block Areas</title>
    <p>
  The block area has info about the following:
         <ul>
         <li><p>
  all anchors including which lines they are on
         </p></li>
         <li><p>
  unresolved page references with line info
         </p></li>
         <li><p>
  id and link areas
         </p></li>
         <li><p>
  height (min/max/optimum) or area including floats
         </p></li>
         <li><p>
  holds space before/after and keep information
         </p></li>
         <li><p>
  widows and orphans
         </p></li>
         </ul>
    </p>
    <p>
  Once the layout has been finalised then this information can be
  discarded.
    </p>
    </section>
  <section>
    <title>Page Areas</title>
    <p>
  Contains inforamtion about all the block areas in the body,
  before area and footer area.
    </p>
    <p>
  Has a list of the unresolved page references and a list of id refences
  that can be used to obtain the area associated with that id.
    </p>
    </section>
  <section>
    <title>Test Cases</title>
    <p>
  Here a few layout possibilities areas explored to determine how the
  layout process will handle these situations.
    </p>
  <section>
    <title>Simple Pages</title>
    <p>
  All blocks (including nested) are placed on the page with minimum spacing
  and the last block has the minimum number of lines past the page end.
  The lowest keep value is then found within the body area limits. Then the next
  equally low keep is found to determine if the spacing will be closer to
  the optimum values.
    </p>
    </section>
  <section>
    <title>Before Floats/Footnotes</title>
    <p>
  After filling the page with the block areas then the new body height
  is used to find the best position to break. Before each line area or block
  area is remove any associated before floats and footnotes are removed.
  This will then adjust the available space on the page and may allow
  for a different breaking point. Areas are removed towards the new
  breaking point until the areas fit on the page. When finding the
  optimum spacing the removal of before floats and footnotes must also
  be considered.
    </p>
    </section>
  <section>
    <title>Multicolumn</title>
    <p>
  First the page is filled with all columns for the intial page area.
  Then each column is adjusted for the new height starting from the
  first column. The best break for the column is found then the next
  column is considered, any left over areas a pre-pended to the next
  column. Once all the columns are finished then all the columns are
  adjusted to fit in the same height columns. This handles the situation
  where before floats or footnotes may have been removed.
    </p>
    </section>
  <section>
    <title>Last Page</title>
    <p>
  If in the process of adding areas to a page it is found that there
  are no more areas in the flow then this page will need to be changed to
  the last page (if applicable). The areas are then placed on a last
  page.
    </p>
    </section>
    </section>
  
    </section>
  
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/optimise.xml
  
  Index: optimise.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>FOP Optimisations</title>
          <subtitle>Notes for Optimising FOP</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>Process Optimisations</title>
  <p>
  FOP should be able to handle very large documents. A document can be
  supplied using SAX and the information should be passed entirely through
  the system, from fo elements to rendered output as soon as possible.
    </p>
    <p>
  A top level block area, immediately below the flow, can be added to the
  page layout as soon as the element is complete.
    </p>
    <p>
  The fo elements used to construct a page can be discarded as soon as the
  layout for the page is complete. Some information may be stored in the
  area tree of the page in order to handle unresolved page references
  and links.
    </p>
    <p>
  Once the layout of a page has been completed, all elements are fully
  resolved, then the page can be rendered. Some renderers may support
  out of order rendering of pages.
    </p>
    <p>
  The main problem that will remain is that any page with forward
  references will need to be stored until the refence is resolved.
  This means that the information contained in the page should be
  as minimal as possible.
    </p>
    <p>
  Line areas can be optimised once the layout for the line has
  been finalised. Consecutive characters with the same properties
  can be combined into a "word" to hold the information with
  limited overhead.
    </p>
    <p>
  If there are a large number of pages where forward references
  cannot be resolved the a method of writing a page onto disk
  could be used to save memory. The easiest way to achieve this
  is to make the page and all children serializable.
     </p>
    </section>
  
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/properties.xml
  
  Index: properties.xml
  ===================================================================
  <?xml version="1.0" encoding="ISO-8859-1"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document> 
      <header>
          <title>Properties</title>
          <subtitle>Properties overview</subtitle>
          <authors>
              <person name="Karen Lease" email=""/>
          </authors>
      </header>
  
      <body> 
  
  <section>
    <title>Properties</title>
    <section>
      <title>Property datatypes</title>
  <p>The property datatypes are defined in the
  org.apache.fop.datatypes package, except Number and String which are java
  primitives. The FOP datatypes are:</p> 
  <ul>
  <li>Number</li>
  <li>String</li>
  <li>ColorType</li>
  <li>Length (has several subclasses)</li>
  <li>CondLength (compound)</li>
  <li>LengthRange (compound)</li>
  <li>Space (compound)</li>
  <li>Keep (compound)</li>
  </ul>
  <p>The <em>org.apache.fop.fo.Property</em> class is the superclass for all
  Property subclasses. There is a subclass for each kind of property
  datatype. These are named using the datatype name plus the word
  Property, resulting in NumberProperty, StringProperty, and so
  on. There is also a class EnumProperty which uses an <code>int</code>
  primitive to hold enumerated values. There is no corresponding Enum
  datatype class.</p>
  <p>The Property class provides a "wrapper" around any possible
  property value. Code manipulating property values (in layout for
  example) usually knows what kind (or kinds) of datatypes are
  acceptable for a given property and will use the appropriate accessor.</p>
  <p>The base Property class defines accessor methods for all FO property
  datatypes, such as getNumber(), getColorType(), getSpace(), getEnum(),
  etc. It doesn't define
  accessors for SVG types, since these are handled separately (at least
  for now.) In the base Property class, all of these methods return
  null, except getEnum which returns 0. Individual subclasses return a value of the 
appropriate type,
  such as Length or ColorType. A subclass may also choose to return a
  reasonable value for other accessor types. For example, a
  SpaceProperty will return the optimum value if asked for a Length.</p>
    </section>
  
    <section>
      <title>Property Makers</title>
  <p>The Property class contains a nested class called
  <em>Maker</em>. This is the base class for all other property Makers. It
  provides basic framework functionality which is overridden by the
  code generated by properties.xsl from the *properties.xml files. In
  particular it provides basic expression evaluation, using
  PropertyParser class in the org.apache.fop.fo.expr package.</p>
  <p>Other Property subclasses such as LengthProperty define their own
  nested Maker classes (subclasses of Property.Maker). These handle
  conversion from the Property subclass returned from expression
  evaluation into the appropriate subclass for the property.</p>
  <p>For each generic or specific property definition in the
  properties.xml files, a new subclass of one of the Maker classes is
  created. Note that no new Property subclasses are created, only new
  PropertyMaker subclasses. Once the property value has been parsed and
  stored, it has no specific functionality. Only the Maker code is
  specific. Maker subclasses define such aspects as keyword
  substitutions, whether the property can be inherited or not, which
  enumerated values are legal, default values, corresponding properties
  and specific datatype conversions.</p>
  </section>
    <section>
      <title>XML property specification format</title>
    <section>
      <title>Generic properties</title>
  <p>In the properties xml files, one can define generic property
  definitions which can serve as a basis for individual property
  definitions. There are currently several generic properties defined in
  foproperties.xml. An example is GenericColor, which defines basic properties
  for all ColorType properties. Since the generic specification doesn't include
  the inherited or default elements, these should be set in each property
  which is based on GenericColor. Here is an example:</p>
  <p>
  <code>
    &lt;property type='generic'>
      &lt;name>background-color&lt;/name>
      &lt;use-generic>GenericColor&lt;/use-generic>
      &lt;inherited>false&lt;/inherited>
      &lt;default>transparent&lt;/default>
    &lt;/property>
  </code></p>
  <p>A generic property specification can include all of the elements
  defined for the property element in the DTD, including the description
  of components for compound properties, and the specification of
  keyword shorthands.</p>
  
  <p>Generic property specifications can be based on other generic
  specifications.
  An example is GenericCondPadding template which is based on the
  GenericCondLength definition but which extends it by adding an inherited
  element and a default value for the length component.</p>
  <p>
  Generic properties can specify enumerated values, as in the
  GenericBorderStyle template. This means that the list of values, which
  is used by 8 properties (the "absolute" and "writing-mode-relative"
  variants for each BorderStyle property) is only specified one time.</p>
  <p>
  When a property includes a "use-generic" element and includes no other
  elements (except the "name" element), then no class is generated for the
  property. Instead the generated mapping will associate this
  property directly with an instance of the generic Maker.</p>
  <p>
  A generic class may also be hand-coded, rather than generated from the
  properties file.
  Properties based on such a generic class are indicated by the
  attribute <code>ispropclass='true'</code> on the
  <em>use-generic</em> element.</p>
  <p> This is illustrated by the SVG properties, most of
  which use one of the Property subclasses defined in the
  <em>org.apache.fop.svg</em>
  package. Although all of these properties are now declared in
  svgproperties.xml, no specific classes are generated.  Classes are only
  generated for those SVG properties which are not based on generic
  classes defined in svg.</p>
    </section>
    <section>
      <title>Element-specific properties</title>
  <p>Properties may be defined for all flow objects or only for
  particular flow objects. A PropertyListBuilder object will always look
  first for a Property.Maker for the flow object before looking in the
  general list. These are specified in the
  <code>element-property-list</code> section of the properties.xml
  files. The <code>localname</code> element children of this element specify for
  which flow-object elements the property should be registered.</p>
  <p><em>NOTE</em>: All the properties for an object or set of objects
  must be specified in a single element-property-list element. If the
  same localname appears in several element lists, the later set of
  properties will hide the earlier ones! Use the <em>ref</em>
  functionality if the same property is to be used in different sets of
  element-specific mappings.
  </p>
    </section>
    <section>
      <title>Reference properties</title>
    <p>A property element may have a type attribute with the value
    <code>ref</code>. The
    content of the <em>name</em> child element is the name of the referenced
    property (not its class-name!). This indicates that the property
    specification has
    already been given, either in this same specification file or in a
    different one (indicated by the <code>family</code> attribute). The
    value of the family attribute is <em>XX</em> where the file
    <em>XXproperties.xml</em> defines the referenced property. For
    example, some SVG objects may have properties defined for FO. Rather
    than defining them again with a new name, the SVG properties simply
    reference the defined FO properties. The generating mapping for the
    SVG properties will use the FO Maker classes.</p>
    </section>
    <section>
      <title>Corresponding properties</title>
  <p>Some properties have both <em>absolute</em> and
  <em>writing-mode-relative</em> forms. In general, the absolute forms
  are equivalent to CSS properties, and the writing-mode-relative forms
  are based on DSSSL. FO files may use either or both forms. In
  FOP code, a request for an absolute form will retrieve that value if it
  was specified on the FO; otherwise the corresponding relative property
  will be used if it was specified. However, a request for a relative
  form will only use the specified relative value if the corresponding
  absolute value was <em>not</em> specified for that FO.
  </p>
  <p>Corresponding properties are specified in the properties.xml files
  using the element <code>corresponding</code>, which has at least one
  <code>propval</code> child and may have a <code>propexpr</code> child,
  if the corresponding
  value is calculated based on several other properties, as for
  <code>start-indent</code>.
  </p>
  <p><em>NOTE</em>: most current FOP code accesses the absolute variants
  of these properties, notably for padding, border, height and width
  attributes. However it does use start-indent and end-indent, rather
  than the "absolute" margin properties.
  </p>
  </section>
    </section>
  
    <section>
      <title>Mapping</title>
  <p>The XSL script <code>propmap.xsl</code> is used to generate
  property mappings based on
  both foproperties.xml and svgproperties.xml. The mapping classes
  in the main fop packages simply load these automatically generated
  mappings. The mapping code still uses the static
  "maker" function of the generated object to obtain a Maker
  object. However, for all generated classes, this method returns an
  instance of the class itself (which is a subclass of Property.Maker)
  and not an instance of a separate nested Maker class.</p>
  <p>For most SVG properties which use the SVG Property classes directly,
  the generated mapper code calls the "maker" method of the SVG Property
  class, which returns an instance of its nested Maker class.</p>
  <p>The property generation also handles element-specific property
  mappings as specified in the properties XML files.</p>
    </section>
  
    <section>
      <title>Enumerated values</title>
  <p>For any property whose datatype is <code>Enum</code> or which
  contains possible enumerated values, FOP code may need to access
  enumeration constants. These are defined in the interfaces whose name
  is the same as the generated class name for the property,
  for example <code>BorderBeforeStyle.NONE</code>. These interface classes
  are generated by the XSL script <code>enumgen.xsl</code>. A separate
  interface defining the enumeration constants is always generated for
  every property which uses the constants, even if the constants
  themselves are defined in a generic class, as in BorderStyle.</p>
  <p>If a subproperty or component of a compound property has enumerated
  values, the constants are defined in a nested interface whose name is
  the name of the subproperty (using appropriate capitalization
  rules). For example,
  the keep properties may have values of AUTO or FORCE or an integer
  value. These are defined for each kind of keep property. For example,
  the keep-together property is a compound property with the components
  within-line, within-column and within-page. Since each component may
  have the values AUTO or FORCE, the KeepTogether interface defines
  three nested interfaces, one for each component, and each defines
  these two constants. An example of a reference in code to the constant
  is <code>KeepTogether.WithinPage.AUTO</code>.</p>
  
    </section>
  
    <section>
      <title>Compound property types</title>
  <p>Some XSL FO properties are specified by compound datatypes. In the FO file,
  these are defined by a group of attributes, each having a name of the
  form <code>property.component</code>, for example
  <code>space-before.minimum</code>. These are several compound
  datatypes:</p>
  <ul>
  <li>LengthConditional, with components length and conditionality</li>
  <li>LengthRange, with components minimum, optimum, and maximum</li>
  <li>Space, with components minimum, optimum, maximum, precedence and
  conditionality </li>
  <li>Keep, with components within-line, within-column and within-page</li>
  </ul>
  <p>These are described in the properties.xml files using the element
  <code>compound</code> which has <code>subproperty</code> children. A subproperty 
element is much
  like a property element, although it may not have an <code>inherited</code> child
  element, as only a complete property object may be inherited.
  </p>
  <p>Specific datatype classes exist for each compound property. Each
  component of a compound datatype is itself stored as a Property
  object. Individual components may be accessed either by directly
  performing a get operation on the name, using the "dot" notation,
  eg. <code>get("space-before.optimum")</code>; or by using an accessor on the compound
  property, eg. <code>get("space-before").getOptimum()</code>.
  In either case,
  the result is a Property object, and the actual value may be accessed
  (in this example) by using the "getLength()" accessor.
  </p>
    </section>
  </section>
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/renderers.xml
  
  Index: renderers.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>Renderers</title>
          <subtitle>Design of Renderers</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>Introduction</title>
  <p>
  A render is primarily design to convert a given area tree into the output
  document format. It should be able to produce pages and fill the pages
  with the text and graphical content. Usually the output is sent to
  an output stream.
    </p>
    <p>
  Some output formats may support extra information that is not available
  from the area tree or depends on the destination of the document.
     </p>
    </section>
  
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/status.xml
  
  Index: status.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <!-- Status of Design/Rewrite -->
  
  <document>
      <header>
          <title>Design Status</title>
          <subtitle>Current Status of FOP and Design</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>Status</title>
  <p>
  Currently some of FOP is being re-written so that the layout can be handled
  properly without the problems that have been encountered and to make
  it possible to handle keeps/breaks and spacing better.
    </p>
  <p>
  A number of issues have been dealt with in the redesigned code.
    </p>
    </section>
  <section>
    <title>Features TODO</title>
  <p>
  <ul>
  <li>avalon integration - logging, configuration, component management, caching, uri 
resolver</li>
  <li>improved interfaces
  <ul>
    <li>no threading/static problems</li>
    <li>simpler to use</li>
  </ul>
  </li>
  <li>better commandline handling</li>
  </ul>
    </p>
    </section>
  <section>
    <title>Done/Started Features</title>
  <p>
  Many of these changes will make FOP much better to use and develop.
  So if you help out with the development you can get to use these
  things sooner.
  <ul>
  <li><strong>better layout design</strong> - that can handle keeps and
  spacing better</li>
  <li><strong>better area tree handling</strong> - means it can run in
  less memory</li>
  <li><strong>better image handling</strong> - redone so it can use a
  cache and synchronizes properly only on the current image while loading</li>
  <li>svg now in an xml handler, FOP can be used without batik</li>
  <li>bookmark extension improved a bit - changed bookmark extension,
  now requires a wrapping element bookmark</li>
  <li>new interface for structured documents, rtf and mif</li>
  <li>better handling of unknown xml and xml from an unknown namespace</li>
  <li>Changed extensions to allow for external xml</li>
  <li>Can have a default element mapping for extensions</li>
  <li>mathml extension</li>
  <li>another xml -> svg extension</li>
  <li>svg text normal text if that can be handled otherwise stroked
  this is done automatically</li>
  <li>new area tree model</li>
  <li>new renderer model</li>
  <li>added handlers for xml in renderer</li>
  <li>changed area tree xml format to match the area tree hierarchy</li>
  </ul>
  
    </p>
    </section>
  
      </body>
  </document>
  
  
  
  
  1.1                  xml-fop/src/documentation/content/xdocs/design/useragent.xml
  
  Index: useragent.xml
  ===================================================================
  <?xml version="1.0" standalone="no"?>
  <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "document-v11.dtd">
  
  <document>
      <header>
          <title>FO User Agent</title>
          <subtitle>Design of FO User Agent</subtitle>
          <authors>
              <person name="Keiron Liddle" email="[EMAIL PROTECTED]"/>
          </authors>
      </header>
  
      <body>
  <section>
    <title>Purpose</title>
  <p>
  Technically the user agent is FOP in the role of determining the
  output format and when resolving various attributes. The user
  agent is represented by a class that is available to others to
  specify how FOP should behave.
    </p>
    <p>
  The user agent is used by the formatting process to determine
  certain user definable values.
    </p>
    <p>
  It will enable the customisation of values for generating and
  rendering the document.
    </p>
    <p>
  The user agent must be available to the layout processor and
  the renderer. Users can supply their own user agent or use
  the default one for a particular renderer.
    </p>
    <p>
  The user agent needs to be made available to the property
  resolution layout process and the renderer.
     </p>
  
    <p>
  Standard Features:
         <ul>
         <li><p>
  error handling, what to do if fo markup is invalid
         </p></li>
         <li><p>
  auto overflow value and handling error-if-overflow
         </p></li>
         <li><p>
  adjusting length values (eg. for borders) to renderable values
         </p></li>
         <li><p>
  available fonts
         </p></li>
         <li><p>
  converting cm/in to pt (dpi)
         </p></li>
         <li><p>
  active state for multi properties
         </p></li>
         <li><p>
  title, used to identify a set of pages (in a page sequence)
         </p></li>
         <li><p>
  the width (in inline-progression-dimension) of a character with
  treat-word-as-space true
         </p></li>
         <li><p>
  maximum space used by conditional areas from region-reference-area
         </p></li>
         <li><p>
  if there should be "hot links" to before floats or footnotes
         </p></li>
         <li><p>
  when to clear side floats if space in inline-progression-dimension
  is not enough
         </p></li>
         <li><p>
  placement of left over footnotes on a page with a region-body
         </p></li>
         <li><p>
  using color property as border colour
         </p></li>
         <li><p>
  interpretting all border styles (except outset) as solid
         </p></li>
         <li><p>
  thin, medium and thick values for border width
         </p></li>
         <li><p>
  initial font-family value
         </p></li>
         <li><p>
  absolute font sizes (eg, xx-small, x-small etc.)
         </p></li>
         <li><p>
  relative font sizes (eg. larger, smaller)
         </p></li>
         <li><p>
  small caps simulation
         </p></li>
         <li><p>
  font weight mapping
         </p></li>
         <li><p>
  baseline info for a font if not available
         </p></li>
         <li><p>
  determining sub/superscript when another baseline is dominant
         </p></li>
         <li><p>
  scaling method for external-graphic and instream-foreign-object
         </p></li>
         <li><p>
  the width of a replaced element
         </p></li>
         <li><p>
  "normal" line height value
         </p></li>
         <li><p>
  text alignment (stretching the line with letter and word spacing)
         </p></li>
         <li><p>
  text transform
         </p></li>
         <li><p>
  initial color
         </p></li>
         <li><p>
  rendering intent of auto
         </p></li>
         <li><p>
  dot character for leader
         </p></li>
         <li><p>
  line breaking with leaders, use optimum length when breaking the line
         </p></li>
         <li><p>
  page height/width of auto
         </p></li>
         <li><p>
  left and right caption widths
         </p></li>
         <li><p>
  glyph orientation vertical of auto
         </p></li>
         <li><p>
  rendering processor of content-type (mime type)
         </p></li>
         </ul>
    </p>
  
    <p>
  Interactive Features:
         <ul>
         <li><p>
  inline and block scroll amount
         </p></li>
         <li><p>
  dynamic effects, links and property sets
         </p></li>
         <li><p>
  initial "pause-after", "pause-before" and "voice-family" value
         </p></li>
         <li><p>
  treating fixed as scroll on background attachement
         </p></li>
         <li><p>
  media usage of auto
         </p></li>
         </ul>
    </p>
  
    </section>
  
      </body>
  </document>
  
  
  
  
  1.3       +8 -0      xml-fop/src/documentation/content/xdocs/dev/book.xml
  
  Index: book.xml
  ===================================================================
  RCS file: /home/cvs/xml-fop/src/documentation/content/xdocs/dev/book.xml,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- book.xml  31 Oct 2002 09:37:30 -0000      1.2
  +++ book.xml  7 Nov 2002 08:15:01 -0000       1.3
  @@ -6,12 +6,20 @@
         copyright="@year@ The Apache Software Foundation"
         xmlns:xlink="http://www.w3.org/1999/xlink";>
   
  +    <menu label="Index">
  +      <menu-item label="FOP Index" href="../index.html"/>
  +    </menu>
       <menu label="About">
         <menu-item label="Index" href="index.html"/>
         <menu-item label="FAQs" href="faq.html"/>
         <menu-item label="Examples" href="examples.html"/>
         <menu-item label="SVG" href="svg.html"/>
  +    </menu>
  +    <menu label="Using">
         <menu-item label="Extensions" href="extensions.html"/>
         <menu-item label="Configuration" href="configuration.html"/>
  +    </menu>
  +    <menu label="Developers">
  +      <menu-item label="Design" href="../design/index.html"/>
       </menu>
   </book>
  
  
  
  1.2       +3 -2      xml-fop/src/documentation/content/xdocs/dev/extensions.xml
  
  Index: extensions.xml
  ===================================================================
  RCS file: /home/cvs/xml-fop/src/documentation/content/xdocs/dev/extensions.xml,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- extensions.xml    28 Oct 2002 08:41:00 -0000      1.1
  +++ extensions.xml    7 Nov 2002 08:15:01 -0000       1.2
  @@ -59,8 +59,9 @@
       <fox:outline>
     </fox:bookmarks>
   </fo:root>]]></source>
  -    <p>It works similarly to a basic-link. There is also an external-destination 
  -       property, but it isn't supported currently. See the <link 
href="pdfoutline.pdf">pdfoutline.pdf</link> for a more complete example.
  +    <p>It works similarly to a basic-link. There is also an external-destination
  +       property, but it isn't supported currently. The documents
  +       on the fop-site use this extension so you can see it in action.
               </p>
             </section>
             </section>


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: xml-fop/src/documentation/content/xdocs/dev book.xml extensions.xml

Reply via email to