[ https://issues.apache.org/jira/browse/XERCESC-2030?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Alberto Massari resolved XERCESC-2030. -------------------------------------- Resolution: Not a Problem Your code is doing a dangerous thing: using XMLString::trascode in the SAX callback. This tries to convert a Unicode string into a locale that you cannot control and that, in your case, is unable to represent non-European characters. If you really need to store non-Unicode strings in a stack, please convert them into UTF-8, and never use XMLString::transcode, unless you are preparing to print data to the console > failed to do validation when there's Japanese words in the xml file > ------------------------------------------------------------------- > > Key: XERCESC-2030 > URL: https://issues.apache.org/jira/browse/XERCESC-2030 > Project: Xerces-C++ > Issue Type: Bug > Components: SAX/SAX2 > Environment: SunOS 5.10 Generic_139555-08 sun4u sparc > SUNW,Sun-Fire-V245 > xerces C++ 3.1.1 > Reporter: ocean_helen > > Hi owners, > I got a problem when using Xerces C++ 3.1.1 to do schema validation > which has Japanese words in the xml file. it raised FatalError: invalid > multi-byte sequence and stop validation. > Environment: Linux > Locale: > LANG= > LC_CTYPE=en_GB.ISO8859-1 > LC_NUMERIC=C > LC_TIME=en_GB.ISO8859-1 > LC_COLLATE=en_GB.ISO8859-1 > LC_MONETARY=en_GB.ISO8859-1 > LC_MESSAGES=C > LC_ALL= > The xml file is generated in linux and because of the business, we > couldn't change characterset from ISO8859-1 to UTF-8 from the system side, so > do we have any workaround to skip this kind of error, or is it possible to > modify characterset to pass the validation in C++? > All the source codes are attached at below, please let me know if you > need any more information. > Looking forward to your reply and thank you so much in advance. > Source Code: > a.xsd: > ============================================================ > <?xml version="1.0" encoding="UTF-8"?> > <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> > <xs:element name="phonebook"> > <xs:complexType> > <xs:sequence> > <xs:element name="name" minOccurs="1" maxOccurs="1"> > <xs:complexType> > <xs:sequence> > <xs:element name="first" type="xs:string"/> > </xs:sequence> > </xs:complexType> > </xs:element> > </xs:sequence> > </xs:complexType> > </xs:element> > </xs:schema> > a.xml: > ============================================================ > <?xml version="1.0" encoding="UTF-8"?> > <phonebook xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" > xsi:noNamespaceSchemaLocation= > "gobitan.xsd"> > <name> > <first>円短期</first> > </name> > </phonebook> > val.cpp > ============================================================ > #include <xercesc/util/PlatformUtils.hpp> > #include <xercesc/validators/common/Grammar.hpp> > #include <xercesc/sax2/SAX2XMLReader.hpp> > #include <xercesc/util/XMLException.hpp> > #include <xercesc/util/OutOfMemoryException.hpp> > #include <xercesc/util/XMLString.hpp> > #include <xercesc/sax2/XMLReaderFactory.hpp> > #include <stdio.h> > #include "MyHandler.hpp" > #if defined(XERCES_NEW_IOSTREAMS) > #include <iostream> > #else > #include <iostream.h> > #endif > using namespace std; > using namespace xercesc; > //XERCES_CPP_NAMESPACE_USE > int main( int argc , char** argv ) > { > XMLPlatformUtils::Initialize(); //..... > SAX2XMLReader* parser = XMLReaderFactory::createXMLReader(); > parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true); > parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true); > parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal, true); > parser->setFeature(XMLUni::fgSAX2CoreValidation, true); > parser->setFeature(XMLUni::fgXercesSchema, true); > parser->setFeature(XMLUni::fgXercesSchemaFullChecking, true); > parser->setFeature(XMLUni::fgXercesLoadSchema,true); > parser->setExitOnFirstFatalError(false); > parser->loadGrammar ("a.xsd", Grammar::SchemaGrammarType, true); > MyHandler* handler=new MyHandler(); > parser->setContentHandler(handler); > parser->setErrorHandler(handler); > try > { > parser->parse("a.xml"); > vector<string> errs=handler->getSchemaErrorContent(); > if(errs.size()>0) > { > cout<<"ERROR MESSAGE OF SCHEMA VALIDATION============="<<endl; > for (unsigned int i = 0; i < errs.size();i++) > { > cout<<errs.at(i)<<endl; > } > } > cout<<"END TRY"<<endl; > } > catch (const XMLException& toCatch) { > char* message = XMLString::transcode(toCatch.getMessage()); > cout << "Exception message is: \n" > << message << "\n"; > XMLString::release(&message); > return -1; > } > catch (const SAXParseException& toCatch) { > char* message = XMLString::transcode(toCatch.getMessage()); > cout << "Exception message is: \n" > << message << "\n"; > XMLString::release(&message); > return -1; > } > catch (...) { > cout << "Unexpected Exception \n" ; > return -1; > } > cout<<"FINISH"<<endl; > XMLPlatformUtils::Terminate(); > return 0; > } > MyHandler.cpp > ============================================================ > #include "MyHandler.hpp" > #include <xercesc/sax2/Attributes.hpp> > #include <xercesc/sax/SAXParseException.hpp> > #include <xercesc/sax/SAXException.hpp> > #if defined(XERCES_NEW_IOSTREAMS) > #include <iostream> > #else > #include <iostream.h> > #endif > // --------------------------------------------------------------------------- > // MyHandler: Constructors and Destructor > // --------------------------------------------------------------------------- > MyHandler::MyHandler() : > fAttrCount(0) > , fCharacterCount(0) > , fElementCount(0) > , fSpaceCount(0) > , fSchemaErrors(false) > , fSystemException(false) > , eleName("") > , eleValue("") > , curElement("") > , curValue("") > , buf("") > { > } > MyHandler::~MyHandler() > { > } > // --------------------------------------------------------------------------- > // MyHandler: Implementation of the SAX DocumentHandler interface > // --------------------------------------------------------------------------- > void MyHandler::startElement(const XMLCh* const uri > , const XMLCh* const localname > , const XMLCh* const qname > , const Attributes& attrs) > { > curValue = ""; > curElement=""; > curElement=XMLString::transcode(localname); > elementList.push_back(curElement); > fElementCount++; > fAttrCount += attrs.getLength(); > } > void MyHandler::endElement( const XMLCh* const uri > , const XMLCh* const localname > , const XMLCh* const qname) > { > curElement = XMLString::transcode(localname); > elementList.remove(curElement); > } > void MyHandler::characters( const XMLCh* const chars > , const XMLSize_t length) > { > fCharacterCount += length; > curValue = StrUtil(chars); > } > void MyHandler::ignorableWhitespace( const XMLCh* const /* chars */ > , const XMLSize_t length) > { > fSpaceCount += length; > } > void MyHandler::startDocument() > { > fAttrCount = 0; > fCharacterCount = 0; > fElementCount = 0; > fSpaceCount = 0; > eleName=""; > eleValue=""; > curElement=""; > curValue=""; > elementList.clear(); > cout<<"Start to Parse File*****"<<endl; > } > void MyHandler::endDocument() > { > cout<<"Finish Parse File*****"<<endl; > } > // --------------------------------------------------------------------------- > // MyHandler: Overrides of the SAX ErrorHandler interface > // --------------------------------------------------------------------------- > void MyHandler::error(const SAXParseException& e) > { > string tmp; > string message = StrUtil(e.getMessage()); > tmp.append( "Error: " +message); > tmp.append( " curElement = [" + curElement + "] element, curValue = > ["+ curValue+ "]."); > vSchemaErrorContent.push_back(tmp); > cout<<"ERROR======================== msg = ["<<tmp<<"]."<<endl; > } > void MyHandler::fatalError(const SAXParseException& e) > { > fSchemaErrors = true; > char* message = XMLString::transcode(e.getMessage()); > cout << "Fatal Error: " << message << " at line: " << e.getLineNumber()<< > endl; > cout<<"FATAL ERROR============================ msg = ["<<message<<"]."<<endl; > XMLString::release(&message); > } > void MyHandler::warning(const SAXParseException& e) > { > char* message = XMLString::transcode(e.getMessage()); > cout << "Warning : " << message<< " at line: " << e.getLineNumber()<< > endl; > XMLString::release(&message); > } > void MyHandler::resetErrors() > { > fSchemaErrors = false; > fSystemException = false; > vSchemaErrorContent.clear(); > vSystemErrorContent.clear(); > } > MyHandler.hpp > ============================================================ > #include <xercesc/sax2/Attributes.hpp> > #include <xercesc/sax2/DefaultHandler.hpp> > #include <string> > #include <vector> > #include <list> > #include <sstream> > using namespace std; > XERCES_CPP_NAMESPACE_USE > class MyHandler : public DefaultHandler > { > public: > // > ----------------------------------------------------------------------- > // Constructors and Destructor > // > ----------------------------------------------------------------------- > MyHandler(); > ~MyHandler(); > // > ----------------------------------------------------------------------- > // Getter methods > // > ----------------------------------------------------------------------- > XMLSize_t getElementCount() const > { > return fElementCount; > } > XMLSize_t getAttrCount() const > { > return fAttrCount; > } > XMLSize_t getCharacterCount() const > { > return fCharacterCount; > } > XMLSize_t getSpaceCount() const > { > return fSpaceCount; > } > bool hasfSchemaErrors() const{ > return fSchemaErrors; > } > bool hasfSystemException() const{ > return fSystemException; > } > vector<string> getSchemaErrorContent() const { > return vSchemaErrorContent; > } > vector<string> getSystemErrorContent() const { > return vSystemErrorContent; > } > void startElement(const XMLCh* const uri, const XMLCh* const > localname, const XMLCh* const qname, const Attributes& attrs); > void endElement(const XMLCh* const uri,const XMLCh* const localname,const > XMLCh* const qname ) ; > void characters(const XMLCh* const chars, const XMLSize_t length); > void ignorableWhitespace(const XMLCh* const chars, const XMLSize_t > length); > void startDocument(); > void endDocument(); > void warning(const SAXParseException& exc); > void error(const SAXParseException& exc); > void fatalError(const SAXParseException& exc); > void resetErrors(); > private: > XMLSize_t fAttrCount; > XMLSize_t fCharacterCount; > XMLSize_t fElementCount; > XMLSize_t fSpaceCount; > bool fSchemaErrors; > bool fSystemException; > vector<string> vSchemaErrorContent; > vector<string> vSystemErrorContent; > string curElement; > string curValue; > list<string> elementList; > }; -- This message was sent by Atlassian JIRA (v6.2#6252) --------------------------------------------------------------------- To unsubscribe, e-mail: c-dev-unsubscr...@xerces.apache.org For additional commands, e-mail: c-dev-h...@xerces.apache.org