This patch enables the StAX parser to behave as an XML validating parser, when
configured to do so.

That mostly wraps it up for now. Merry Christmas.

2005-12-24  Chris Burdess  <[EMAIL PROTECTED]>

        * gnu/xml/stream/SAXParser.java,
          gnu/xml/stream/XMLParser.java: DTD validation support for StAX parser.
-- 
Chris Burdess
  "They that can give up essential liberty to obtain a little safety
  deserve neither liberty nor safety." - Benjamin Franklin
Index: gnu/xml/stream/SAXParser.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/xml/stream/SAXParser.java,v
retrieving revision 1.6
diff -u -r1.6 SAXParser.java
--- gnu/xml/stream/SAXParser.java       24 Dec 2005 14:14:48 -0000      1.6
+++ gnu/xml/stream/SAXParser.java       24 Dec 2005 17:52:57 -0000
@@ -481,8 +481,9 @@
                         // Element decl
                         if (declHandler != null)
                           {
-                            String model = doctype.getElementModel(name);
-                            declHandler.elementDecl(name, model);
+                            XMLParser.ContentModel model =
+                              doctype.getElementModel(name);
+                            declHandler.elementDecl(name, model.text);
                           }
                       }
                     else if ('A' == c)
Index: gnu/xml/stream/XMLParser.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/xml/stream/XMLParser.java,v
retrieving revision 1.10
diff -u -r1.10 XMLParser.java
--- gnu/xml/stream/XMLParser.java       24 Dec 2005 14:14:48 -0000      1.10
+++ gnu/xml/stream/XMLParser.java       24 Dec 2005 17:52:58 -0000
@@ -131,6 +131,9 @@
   private StringBuffer nmtokenBuf = new StringBuffer();
   private StringBuffer literalBuf = new StringBuffer();
   private char[] tmpBuf = new char[1024];
+  
+  private ContentModel currentContentModel;
+  private LinkedList validationStack = new LinkedList();
 
   private String piTarget, piData;
 
@@ -141,7 +144,7 @@
   Doctype doctype;
   private boolean expandPE, peIsError;
 
-  private final boolean validating; // TODO
+  private final boolean validating;
   private final boolean stringInterning;
   private final boolean coalescing;
   private final boolean replaceERefs;
@@ -835,6 +838,8 @@
                   {
                     reset();
                     event = readCharData(null);
+                    if (validating)
+                      validatePCData(buf.toString());
                   }
               }
             break;
@@ -844,6 +849,8 @@
             buf.append(elementName);
             state = stack.isEmpty() ? MISC : CONTENT;
             event = XMLStreamConstants.END_ELEMENT;
+            if (validating)
+              endElementValidationHook();
             break;
           case INIT: // XMLDecl?
             if (tryRead(TEST_XML_DECL))
@@ -1586,11 +1593,12 @@
     throws IOException, XMLStreamException
   {
     if (tryRead("EMPTY"))
-      doctype.addElementDecl(elementName, "EMPTY");
+      doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
     else if (tryRead("ANY"))
-      doctype.addElementDecl(elementName, "ANY");
+      doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
     else
       {
+        ContentModel model;
         StringBuffer acc = new StringBuffer();
         require('(');
         acc.append('(');
@@ -1599,11 +1607,17 @@
           {
             // mixed content
             acc.append("#PCDATA");
+            MixedContentModel mm = new MixedContentModel();
+            model = mm;
             skipWhitespace();
             if (tryRead(')'))
               {
-                acc.append(")*");
-                tryRead('*');
+                acc.append(")");
+                if (tryRead('*'))
+                  {
+                    mm.min = 0;
+                    mm.max = -1;
+                  }
               }
             else
               {
@@ -1612,27 +1626,32 @@
                     require('|');
                     acc.append('|');
                     skipWhitespace();
-                    acc.append(readNmtoken(true));
+                    String name = readNmtoken(true);
+                    acc.append(name);
+                    mm.addName(name);
                     skipWhitespace();
                   }
                 require('*');
                 acc.append(")*");
+                mm.min = 0;
+                mm.max = -1;
               }
           }
         else
-          readElements(acc);
-        doctype.addElementDecl(elementName, acc.toString());
+          model = readElements(acc);
+        doctype.addElementDecl(elementName, acc.toString(), model);
       }
   }
 
-  private void readElements(StringBuffer acc)
+  private ElementContentModel readElements(StringBuffer acc)
     throws IOException, XMLStreamException
   {
     char separator;
+    ElementContentModel model = new ElementContentModel();
     
     // Parse first content particle
     skipWhitespace();
-    readContentParticle(acc);
+    model.addContentParticle(readContentParticle(acc));
     // End or separator
     skipWhitespace();
     char c = readCh();
@@ -1644,15 +1663,25 @@
         c = readCh();
         switch (c)
           {
+          case '?':
+            acc.append(c);
+            model.min = 0;
+            model.max = 1;
+            break;
           case '*':
+            acc.append(c);
+            model.min = 0;
+            model.max = -1;
+            break;
           case '+':
-          case '?':
             acc.append(c);
+            model.min = 1;
+            model.max = -1;
             break;
           default:
             reset();
           }
-        return; // done
+        return model; // done
       case ',':
       case '|':
         separator = c;
@@ -1660,13 +1689,13 @@
         break;
       default:
         error("bad separator in content model", new Character(c));
-        return;
+        return model;
       }
     // Parse subsequent content particles
     while (true)
       {
         skipWhitespace();
-        readContentParticle(acc);
+        model.addContentParticle(readContentParticle(acc));
         skipWhitespace();
         c = readCh();
         if (c == ')')
@@ -1677,7 +1706,7 @@
         else if (c != separator)
           {
             error("bad separator in content model", new Character(c));
-            return;
+            return model;
           }
         else
           acc.append(c);
@@ -1688,40 +1717,64 @@
     switch (c)
       {
       case '?':
+        acc.append(c);
+        model.min = 0;
+        model.max = 1;
+        break;
       case '*':
+        acc.append(c);
+        model.min = 0;
+        model.max = -1;
+        break;
       case '+':
         acc.append(c);
-        return;
+        model.min = 1;
+        model.max = -1;
+        break;
       default:
         reset();
-        return;
       }
+    return model;
   }
 
-  private void readContentParticle(StringBuffer acc)
+  private ContentParticle readContentParticle(StringBuffer acc)
     throws IOException, XMLStreamException
   {
+    ContentParticle cp = new ContentParticle();
     if (tryRead('('))
       {
         acc.append('(');
-        readElements(acc);
+        cp.content = readElements(acc);
       }
     else
       {
-        acc.append(readNmtoken(true));
+        String name = readNmtoken(true);
+        acc.append(name);
+        cp.content = name;
         mark(1);
         char c = readCh();
         switch (c)
           {
           case '?':
+            acc.append(c);
+            cp.min = 0;
+            cp.max = 1;
+            break;
           case '*':
+            acc.append(c);
+            cp.min = 0;
+            cp.max = -1;
+            break;
           case '+':
             acc.append(c);
+            cp.min = 1;
+            cp.max = -1;
             break;
           default:
             reset();
           }
       }
+    return cp;
   }
 
   /**
@@ -2094,6 +2147,14 @@
               error("unbound attribute prefix", attr.prefix);
           }
       }
+    if (validating)
+      {
+        validateStartElement(elementName);
+        currentContentModel = doctype.getElementModel(elementName);
+        if (currentContentModel == null)
+          error("no element declaration", elementName);
+        validationStack.add(new LinkedList());
+      }
     // make element name available for read
     buf.setLength(0);
     buf.append(elementName);
@@ -2222,6 +2283,22 @@
     // Make element name available
     buf.setLength(0);
     buf.append(expected);
+    if (validating)
+      endElementValidationHook();
+  }
+
+  private void endElementValidationHook()
+    throws XMLStreamException
+  {
+    validateEndElement();
+    validationStack.removeLast();
+    if (stack.isEmpty())
+      currentContentModel = null;
+    else
+      {
+        String parent = (String) stack.getLast();
+        currentContentModel = doctype.getElementModel(parent);
+      }
   }
 
   /**
@@ -3227,6 +3304,136 @@
     throw new XMLStreamException(message);
   }
 
+  private void validateStartElement(String elementName)
+    throws XMLStreamException
+  {
+    if (currentContentModel == null)
+      return; // root element
+    if (doctype == null)
+      error("document does not specify a DTD");
+    switch (currentContentModel.type)
+      {
+      case ContentModel.EMPTY:
+        error("child element found in empty element", elementName);
+        break;
+      case ContentModel.ELEMENT:
+        LinkedList ctx = (LinkedList) validationStack.getLast();
+        ctx.add(elementName);
+        break;
+      case ContentModel.MIXED:
+        MixedContentModel mm = (MixedContentModel) currentContentModel;
+        if (!mm.containsName(elementName))
+          error("illegal element for content model", elementName);
+        break;
+      }
+  }
+
+  private void validateEndElement()
+    throws XMLStreamException
+  {
+    if (currentContentModel == null)
+      return; // root element
+    switch (currentContentModel.type)
+      {
+      case ContentModel.ELEMENT:
+        LinkedList ctx = (LinkedList) validationStack.getLast();
+        ElementContentModel ecm = (ElementContentModel) currentContentModel;
+        validateElementContent(ecm, ctx);
+        break;
+      }
+  }
+
+  private void validatePCData(String text)
+    throws XMLStreamException
+  {
+    switch (currentContentModel.type)
+      {
+      case ContentModel.EMPTY:
+        error("character data found in empty element", text);
+        break;
+      case ContentModel.ELEMENT:
+        boolean white = true;
+        int len = text.length();
+        for (int i = 0; i < len; i++)
+          {
+            char c = text.charAt(i);
+            if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
+              {
+                white = false;
+                break;
+              }
+          }
+        if (!white)
+          error("character data found in element with element content", text);
+        break;
+      }
+  }
+
+  private void validateElementContent(ElementContentModel model,
+                                      LinkedList children)
+    throws XMLStreamException
+  {
+    // Use regular expression
+    StringBuffer buf = new StringBuffer();
+    for (Iterator i = children.iterator(); i.hasNext(); )
+      {
+        buf.append((String) i.next());
+        buf.append(' ');
+      }
+    String c = buf.toString();
+    String regex = createRegularExpression(model);
+    if (!c.matches(regex))
+      error("element content does not match expression "+regex, c);
+  }
+
+  private String createRegularExpression(ElementContentModel model)
+  {
+    if (model.regex == null)
+      {
+        StringBuffer buf = new StringBuffer();
+        buf.append('(');
+        for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
+          {
+            ContentParticle cp = (ContentParticle) i.next();
+            if (cp.content instanceof String)
+              {
+                buf.append('(');
+                buf.append((String) cp.content);
+                buf.append(' ');
+                buf.append(')');
+                if (cp.max == -1)
+                  {
+                    if (cp.min == 0)
+                      buf.append('*');
+                    else
+                      buf.append('+');
+                  }
+                else if (cp.min == 0)
+                  buf.append('?');
+              }
+            else
+              {
+                ElementContentModel ecm = (ElementContentModel) cp.content;
+                buf.append(createRegularExpression(ecm));
+              }
+            if (i.hasNext())
+              buf.append('|');
+          }
+        buf.append(')');
+        if (model.max == -1)
+          {
+            if (model.min == 0)
+              buf.append('*');
+            else
+              buf.append('+');
+          }
+        else if (model.min == 0)
+          buf.append('?');
+        model.regex = buf.toString();
+      }
+    return model.regex;
+  }
+
   public static void main(String[] args)
     throws Exception
   {
@@ -3235,7 +3442,7 @@
       xIncludeAware = true;
     XMLParser p = new XMLParser(new java.io.FileInputStream(args[0]),
                                 absolutize(null, args[0]),
-                                false, // validating
+                                true, // validating
                                 true, // namespaceAware
                                 true, // coalescing,
                                 true, // replaceERefs
@@ -3386,10 +3593,11 @@
       this.systemId = systemId;
     }
 
-    void addElementDecl(String name, String model)
+    void addElementDecl(String name, String text, ContentModel model)
     {
       if (elements.containsKey(name))
         return;
+      model.text = text;
       elements.put(name, model);
       entries.add("E" + name);
     }
@@ -3454,9 +3662,9 @@
       entries.add("p" + key);
     }
 
-    String getElementModel(String name)
+    ContentModel getElementModel(String name)
     {
-      return (String) elements.get(name);
+      return (ContentModel) elements.get(name);
     }
 
     AttributeDecl getAttributeDecl(String ename, String aname)
@@ -3522,6 +3730,106 @@
     String notationName;
   }
 
+  abstract class ContentModel
+  {
+    static final int EMPTY = 0;
+    static final int ANY = 1;
+    static final int ELEMENT = 2;
+    static final int MIXED = 3;
+    
+    int min;
+    int max;
+    final int type;
+    String text;
+
+    ContentModel(int type)
+    {
+      this.type = type;
+      min = 1;
+      max = 1;
+    }
+    
+  }
+
+  class EmptyContentModel
+    extends ContentModel
+  {
+    
+    EmptyContentModel()
+    {
+      super(ContentModel.EMPTY);
+      min = 0;
+      max = 0;
+    }
+    
+  }
+
+  class AnyContentModel
+    extends ContentModel
+  {
+    
+    AnyContentModel()
+    {
+      super(ContentModel.ANY);
+      min = 0;
+      max = -1;
+    }
+    
+  }
+
+  class ElementContentModel
+    extends ContentModel
+  {
+
+    LinkedList contentParticles;
+    String regex; // regular expression cache
+    
+    ElementContentModel()
+    {
+      super(ContentModel.ELEMENT);
+      contentParticles = new LinkedList();
+    }
+
+    void addContentParticle(ContentParticle cp)
+    {
+      contentParticles.add(cp);
+    }
+    
+  }
+
+  class ContentParticle
+  {
+
+    int min = 1;
+    int max = 1;
+    Object content; // Name (String) or ElementContentModel
+    
+  }
+
+  class MixedContentModel
+    extends ContentModel
+  {
+
+    private HashSet names;
+    
+    MixedContentModel()
+    {
+      super(ContentModel.MIXED);
+      names = new HashSet();
+    }
+
+    void addName(String name)
+    {
+      names.add(name);
+    }
+
+    boolean containsName(String name)
+    {
+      return names.contains(name);
+    }
+    
+  }
+
   class AttributeDecl
   {
     

Attachment: pgpb6Jt1veH6A.pgp
Description: PGP signature

_______________________________________________
Classpath-patches mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/classpath-patches

Reply via email to