Hello everybody,
i would like to parse a quite large XML file (about 180 MB).
I used the DOM interface because i need the tree for further
processing of the data the xml file contains. Of course there
is a lot of memory used during parsing the file and i got an
"Out of memory" exception.
I noticed that a class DOMLSParserFilter comes along wiht Xercesc C++ 3.0.1
(Win32), which makes it possible to filter the Nodes during parsing.
That is perfect for me because one XML-Element in my large file
contains most of the data. This XML-Element is called DATA and
appears serveral time in my XML file.
So i had the idea to reject this XML-Element from the DOM tree
during parsing to reduce the used memory by using the method
startElement() of the DOMLSParserFilter class. After that i would
use a SAX parser and just get all XML-Elements DATA with their values.
But it does not work.
I integregated my code into the DOMPrint example which comes along
with Xercesc C++ 3.0.1. The following error message occurred:
DOM Error during parsing:
'C:\Daten\2009-08-07_NewXercesc\3_0_1\xerces-c-3.0.1\Build\Win32\VC6\Debug\MyXML.xml'
DOMException code is: 3
Message is: attempt is made to insert a node where it is not permitted
Did i misunderstand the functionality of the DOMLSParserFilter class
and its method startElement?
It is possible to realize my idea with the help of this class? Did
i something wrong with in my code (please have a look below)?
I would be very grateful for any help.
Thanks in advanced,
Mirko
DOMPrintFilter.hpp:
--------------------
class DOMParserFilter : public DOMLSParserFilter {
public:
DOMParserFilter(DOMNodeFilter::ShowType whatToShow = DOMNodeFilter::SHOW_ALL);
~DOMParserFilter(){};
virtual FilterAction startElement(DOMElement* node);
virtual FilterAction acceptNode(DOMNode* node){return
DOMParserFilter::FILTER_ACCEPT;};
virtual DOMNodeFilter::ShowType getWhatToShow() const {return fWhatToShow;};
private:
DOMNodeFilter::ShowType fWhatToShow;
};
DOMPrintFilter.cpp:
--------------------
DOMParserFilter::DOMParserFilter(DOMNodeFilter::ShowType whatToShow)
:fWhatToShow(whatToShow)
{}
DOMParserFilter::FilterAction DOMParserFilter::startElement(DOMElement* node)
{
// for element whose name is "DATA", skip it
if (XMLString::compareString(node->getNodeName(), element_data)==0)
return DOMParserFilter::FILTER_REJECT;
else
return DOMParserFilter::FILTER_ACCEPT;
}
DOMPrint.cpp:
---------------
static const XMLCh gLS[] = { xercesc::chLatin_L, xercesc::chLatin_S,
xercesc::chNull };
xercesc::DOMImplementation *implParser =
xercesc::DOMImplementationRegistry::getDOMImplementation(gLS);
xercesc::DOMLSParser* parser =
((xercesc::DOMImplementationLS*)implParser)->createLSParser(xercesc::DOMImplementationLS::MODE_SYNCHRONOUS,
0);
DOMTreeErrorReporter *errReporter = new DOMTreeErrorReporter();
parser->getDomConfig()->setParameter(xercesc::XMLUni::fgDOMErrorHandler,
errReporter);
DOMParserFilter * pDOMParserFilter = new DOMParserFilter();
parser->setFilter(pDOMParserFilter);
//
// Parse the XML file, catching any XML exceptions that might propogate
// out of it.
//
bool errorsOccured = false;
DOMDocument *doc = NULL;
try
{
doc = parser->parseURI(gXmlFile);
}
catch (const OutOfMemoryException&)
{
XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" <<
XERCES_STD_QUALIFIER endl;
errorsOccured = true;
}
catch (const XMLException& e)
{
XERCES_STD_QUALIFIER cerr << "An error occurred during parsing\n
Message: "
<< StrX(e.getMessage()) << XERCES_STD_QUALIFIER endl;
errorsOccured = true;
}
catch (const DOMException& e)
{
const unsigned int maxChars = 2047;
XMLCh errText[maxChars + 1];
XERCES_STD_QUALIFIER cerr << "\nDOM Error during parsing: '" << gXmlFile
<< "'\n"
<< "DOMException code is: " << e.code << XERCES_STD_QUALIFIER endl;
if (DOMImplementation::loadDOMExceptionMsg(e.code, errText, maxChars))
XERCES_STD_QUALIFIER cerr << "Message is: " << StrX(errText) <<
XERCES_STD_QUALIFIER endl;
errorsOccured = true;
}
catch (...)
{
XERCES_STD_QUALIFIER cerr << "An error occurred during parsing\n " <<
XERCES_STD_QUALIFIER endl;
errorsOccured = true;
}