I've just run into a problem I don't understand - when I parse a document with dtd and line returns '\n' in the content that followed dtd tree get additional text nodes for each '\n'.

Inline example shows that. test2 produces text nodes - <doc> has 3 children (direct of course),
but <doc> from test1 has only one.

Note that without dtd I got the same trees in both cases.

Is it expected behavior? If so how to tell parser to not create these text nodes?

P.S. I'm using libxml2 2.6.32 from Debian 5.

--- source ---
#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>

int main()
{
    static const char test1[] =
       "<?xml version=\"1.0\"?>"
       "<!DOCTYPE doc ["
       "<!ELEMENT doc ANY>"
       "<!ELEMENT elem ANY>"
       "<!ATTLIST elem attr CDATA #IMPLIED>"
       "]>"
       "<doc>"
       "<elem attr=\"blah\"/>"
       "</doc>";
    static const char test2[] =
       "<?xml version=\"1.0\"?>"
       "<!DOCTYPE doc ["
       "<!ELEMENT doc ANY>"
       "<!ELEMENT elem ANY>"
       "<!ATTLIST elem attr CDATA #IMPLIED>"
       "]>"
       "<doc>\n"
       "<elem attr=\"blah\"/>\n"
       "</doc>";

    xmlDocPtr doc;
    xmlNodePtr node;

doc = xmlReadMemory(test1, sizeof(test1), NULL, NULL, XML_PARSE_NOBLANKS);
    node = xmlDocGetRootElement(doc);
    printf("%d\n", node->children->type);
    printf("%s\n", node->children->name);
    xmlFreeDoc(doc);

doc = xmlReadMemory(test2, sizeof(test2), NULL, NULL, XML_PARSE_NOBLANKS);
    node = xmlDocGetRootElement(doc);
    printf("%d\n", node->children->type);
    printf("%s\n", node->children->name);
    xmlFreeDoc(doc);

    return 0;
}
--- source ---
_______________________________________________
xml mailing list, project page  http://xmlsoft.org/
xml@gnome.org
http://mail.gnome.org/mailman/listinfo/xml

Reply via email to