ocean_helen created XERCESC-2030:
------------------------------------

             Summary: failed to do validation when there's Japanese words in 
the xml file
                 Key: XERCESC-2030
                 URL: https://issues.apache.org/jira/browse/XERCESC-2030
             Project: Xerces-C++
          Issue Type: Bug
          Components: SAX/SAX2
         Environment: SunOS 5.10 Generic_139555-08 sun4u sparc 
SUNW,Sun-Fire-V245
xerces C++ 3.1.1
            Reporter: ocean_helen


Hi owners,

     I got a problem when using Xerces C++ 3.1.1 to do schema validation which 
has Japanese words in the xml file.  it raised FatalError: invalid multi-byte 
sequence and stop validation.
     Environment: Linux
     Locale:
        LANG=
        LC_CTYPE=en_GB.ISO8859-1
        LC_NUMERIC=C
        LC_TIME=en_GB.ISO8859-1
        LC_COLLATE=en_GB.ISO8859-1
        LC_MONETARY=en_GB.ISO8859-1
        LC_MESSAGES=C
        LC_ALL=

    The xml file is generated in linux and because of the business, we couldn't 
change characterset from ISO8859-1 to UTF-8 from the system side, so do we have 
any workaround to skip this kind of error,  or is it possible to modify 
characterset to pass the validation in C++?
     All the source codes are attached at below, please let me know if you need 
any more information.
     Looking forward to your reply and thank you so much in advance.

Source Code: 
a.xsd:
============================================================
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema";>
 <xs:element name="phonebook">
    <xs:complexType>
      <xs:sequence>
        <xs:element name="name" minOccurs="1" maxOccurs="1">
          <xs:complexType>
            <xs:sequence>
              <xs:element name="first" type="xs:string"/>
            </xs:sequence>
          </xs:complexType>
        </xs:element>
      </xs:sequence>
    </xs:complexType>
 </xs:element>
</xs:schema>


a.xml:
============================================================
<?xml version="1.0" encoding="UTF-8"?>
<phonebook xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
xsi:noNamespaceSchemaLocation=
"gobitan.xsd">
 <name>
    <last>円短期</last>
</name>
</phonebook>


val.cpp
============================================================
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/validators/common/Grammar.hpp>
#include <xercesc/sax2/SAX2XMLReader.hpp>
#include <xercesc/util/XMLException.hpp>
#include <xercesc/util/OutOfMemoryException.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/sax2/XMLReaderFactory.hpp>
#include <stdio.h>
#include "MyHandler.hpp"
#if defined(XERCES_NEW_IOSTREAMS)
#include <iostream>
#else
#include <iostream.h>
#endif
using namespace std;
using namespace xercesc;
//XERCES_CPP_NAMESPACE_USE


int main( int argc , char** argv )
{
       XMLPlatformUtils::Initialize(); //.....
       SAX2XMLReader* parser = XMLReaderFactory::createXMLReader();

       parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
    parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
       parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
       parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
    parser->setFeature(XMLUni::fgXercesSchema, true);
    parser->setFeature(XMLUni::fgXercesSchemaFullChecking, true);
parser->setFeature(XMLUni::fgXercesLoadSchema,true);
parser->setExitOnFirstFatalError(false);
 parser->loadGrammar ("a.xsd", Grammar::SchemaGrammarType, true);

MyHandler* handler=new MyHandler();
        parser->setContentHandler(handler);
parser->setErrorHandler(handler);
           try
           {
       parser->parse("a.xml");
vector<string> errs=handler->getSchemaErrorContent();
if(errs.size()>0)
{
cout<<"ERROR MESSAGE OF SCHEMA VALIDATION============="<<endl;

for (unsigned int i = 0; i < errs.size();i++)
{
cout<<errs.at(i)<<endl;
}
}
cout<<"END TRY"<<endl;
 }
        catch (const XMLException& toCatch) {
            char* message = XMLString::transcode(toCatch.getMessage());
            cout << "Exception message is: \n"
                 << message << "\n";
            XMLString::release(&message);
            return -1;
        }
        catch (const SAXParseException& toCatch) {
            char* message = XMLString::transcode(toCatch.getMessage());
            cout << "Exception message is: \n"
                 << message << "\n";
            XMLString::release(&message);
            return -1;
        }
        catch (...) {
            cout << "Unexpected Exception \n" ;
            return -1;
        }

                cout<<"FINISH"<<endl;
       XMLPlatformUtils::Terminate();

       return 0;
}

MyHandler.cpp
============================================================
#include "MyHandler.hpp"
#include <xercesc/sax2/Attributes.hpp>
#include <xercesc/sax/SAXParseException.hpp>
#include <xercesc/sax/SAXException.hpp>
#if defined(XERCES_NEW_IOSTREAMS)
#include <iostream>
#else
#include <iostream.h>
#endif

// ---------------------------------------------------------------------------
//  MyHandler: Constructors and Destructor
// ---------------------------------------------------------------------------
MyHandler::MyHandler() :

fAttrCount(0)
        , fCharacterCount(0)
        , fElementCount(0)
        , fSpaceCount(0)
        , fSchemaErrors(false)
        , fSystemException(false)
        , eleName("")
        , eleValue("")
        , curElement("")
        , curValue("")
        , buf("")
{

}

MyHandler::~MyHandler()
{
}

// ---------------------------------------------------------------------------
//  MyHandler: Implementation of the SAX DocumentHandler interface
// ---------------------------------------------------------------------------
void MyHandler::startElement(const XMLCh* const  uri
        , const XMLCh* const  localname
        , const XMLCh* const  qname
        , const Attributes& attrs)
{
        curValue = "";
        curElement="";
        curElement=XMLString::transcode(localname);
        elementList.push_back(curElement);
        fElementCount++;
        fAttrCount += attrs.getLength();
}

void MyHandler::endElement( const XMLCh* const uri
        , const XMLCh* const localname
        , const XMLCh* const qname)
{
        curElement = XMLString::transcode(localname);
        elementList.remove(curElement);
        }


void MyHandler::characters(  const   XMLCh* const chars
        , const XMLSize_t length)
{
        fCharacterCount += length;
        curValue = StrUtil(chars);
}

void MyHandler::ignorableWhitespace( const   XMLCh* const /* chars */
        , const XMLSize_t length)
{
        fSpaceCount += length;
}

void MyHandler::startDocument()
{
        fAttrCount = 0;
        fCharacterCount = 0;
        fElementCount = 0;
        fSpaceCount = 0;
        eleName="";
        eleValue="";
        curElement="";
        curValue="";
        elementList.clear();
        cout<<"Start to Parse File*****"<<endl;
}

void MyHandler::endDocument()
{
        cout<<"Finish Parse File*****"<<endl;
}


// ---------------------------------------------------------------------------
//  MyHandler: Overrides of the SAX ErrorHandler interface
// ---------------------------------------------------------------------------
void MyHandler::error(const SAXParseException& e)
{
string tmp;
 string message = StrUtil(e.getMessage());
    tmp.append( "Error: " +message);
         tmp.append( " curElement = [" + curElement + "] element, curValue = 
["+ curValue+ "].");
vSchemaErrorContent.push_back(tmp);
cout<<"ERROR======================== msg = ["<<tmp<<"]."<<endl;

}

void MyHandler::fatalError(const SAXParseException& e)
{
        fSchemaErrors = true;
 char* message = XMLString::transcode(e.getMessage());
    cout << "Fatal Error: " << message << " at line: " << e.getLineNumber()<< 
endl;
cout<<"FATAL ERROR============================ msg = ["<<message<<"]."<<endl;
 XMLString::release(&message);
}

void MyHandler::warning(const SAXParseException& e)
{
 char* message = XMLString::transcode(e.getMessage());
    cout << "Warning : " << message<< " at line: " << e.getLineNumber()<< endl;
    XMLString::release(&message);
}

void MyHandler::resetErrors()
{
        fSchemaErrors = false;
        fSystemException = false;
        vSchemaErrorContent.clear();
        vSystemErrorContent.clear();
}


MyHandler.hpp
============================================================
#include <xercesc/sax2/Attributes.hpp>
#include <xercesc/sax2/DefaultHandler.hpp>
#include <string>
#include <vector>
#include <list>
#include <sstream>
using namespace std;
XERCES_CPP_NAMESPACE_USE
class MyHandler : public DefaultHandler
{
public:
        // 
-----------------------------------------------------------------------
        //  Constructors and Destructor
        // 
-----------------------------------------------------------------------
        MyHandler();
        ~MyHandler();


        // 
-----------------------------------------------------------------------
        //  Getter methods
        // 
-----------------------------------------------------------------------
        XMLSize_t getElementCount() const
        {
                return fElementCount;
        }

        XMLSize_t getAttrCount() const
        {
                return fAttrCount;
        }

        XMLSize_t getCharacterCount() const
        {
                return fCharacterCount;
        }

        XMLSize_t getSpaceCount() const
        {
                return fSpaceCount;
        }
        bool            hasfSchemaErrors() const{
                return fSchemaErrors;
        }
        bool            hasfSystemException() const{
                return fSystemException;
        }

        vector<string>  getSchemaErrorContent() const {
                return vSchemaErrorContent;
        }
        vector<string>  getSystemErrorContent() const {
                return vSystemErrorContent;
        }
        void startElement(const XMLCh* const uri, const XMLCh* const localname, 
const XMLCh* const qname, const Attributes& attrs);
void endElement(const XMLCh* const uri,const XMLCh* const localname,const 
XMLCh* const qname ) ;

        void characters(const XMLCh* const chars, const XMLSize_t length);
        void ignorableWhitespace(const XMLCh* const chars, const XMLSize_t 
length);
        void startDocument();
        void endDocument();

        void warning(const SAXParseException& exc);
        void error(const SAXParseException& exc);
        void fatalError(const SAXParseException& exc);
        void resetErrors();


private:
        XMLSize_t     fAttrCount;
        XMLSize_t     fCharacterCount;
        XMLSize_t     fElementCount;
        XMLSize_t               fSpaceCount;

        bool            fSchemaErrors;
        bool            fSystemException;
        vector<string>                  vSchemaErrorContent;
        vector<string>                  vSystemErrorContent;

        string curElement;
        string curValue;
        list<string>    elementList;
};





--
This message was sent by Atlassian JIRA
(v6.2#6252)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to