Author: amassari Date: Tue Jun 21 10:52:28 2011 New Revision: 1137953 URL: http://svn.apache.org/viewvc?rev=1137953&view=rev Log: The ReaderMgr now asks the stream if it knows which encoding has been used (XERCESC-1967)
Modified: xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp xerces/c/trunk/src/xercesc/util/BinInputStream.cpp xerces/c/trunk/src/xercesc/util/BinInputStream.hpp xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp Modified: xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp?rev=1137953&r1=1137952&r2=1137953&view=diff ============================================================================== --- xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp (original) +++ xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp Tue Jun 21 10:52:28 2011 @@ -397,14 +397,17 @@ XMLReader* ReaderMgr::createReader( cons // We need to handle this exception to avoid leak on newStream. try { - if (src.getEncoding()) + const XMLCh* encoding = src.getEncoding(); + if(encoding == 0) + encoding = newStream->getEncoding(); + if (encoding) { retVal = new (fMemoryManager) XMLReader ( src.getPublicId() , src.getSystemId() , newStream - , src.getEncoding() + , encoding , refFrom , type , source Modified: xerces/c/trunk/src/xercesc/util/BinInputStream.cpp URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/BinInputStream.cpp?rev=1137953&r1=1137952&r2=1137953&view=diff ============================================================================== --- xerces/c/trunk/src/xercesc/util/BinInputStream.cpp (original) +++ xerces/c/trunk/src/xercesc/util/BinInputStream.cpp Tue Jun 21 10:52:28 2011 @@ -42,4 +42,12 @@ BinInputStream::BinInputStream() { } +// --------------------------------------------------------------------------- +// BinInputStream: Default implementations +// --------------------------------------------------------------------------- +const XMLCh* BinInputStream::getEncoding() const +{ + return 0; +} + XERCES_CPP_NAMESPACE_END Modified: xerces/c/trunk/src/xercesc/util/BinInputStream.hpp URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/BinInputStream.hpp?rev=1137953&r1=1137952&r2=1137953&view=diff ============================================================================== --- xerces/c/trunk/src/xercesc/util/BinInputStream.hpp (original) +++ xerces/c/trunk/src/xercesc/util/BinInputStream.hpp Tue Jun 21 10:52:28 2011 @@ -66,6 +66,23 @@ public : */ virtual const XMLCh* getContentType() const = 0; + /** + * Return the "out-of-band" encoding for the data supplied by this + * input stream. If no such content type is provided for the data, 0 is + * returned. This function is expected to return the correct value at + * any time after the construction of the stream. + * + * An example of the stream that may return non-0 from this function is + * an HTTP stream with the value returned taken from the "Content-Type" + * HTTP header. Note also that if the encoding of the data is known + * to the application by some other means then the setEncoding function + * in the InputSource object should be used instead. The getEncoding + * function should only be used to return information that is intrinsic + * to the stream. + * + * @return The name of the encoding, or 0 if one is not available. + */ + virtual const XMLCh *getEncoding() const; protected : // ----------------------------------------------------------------------- Modified: xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp?rev=1137953&r1=1137952&r2=1137953&view=diff ============================================================================== --- xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp (original) +++ xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp Tue Jun 21 10:52:28 2011 @@ -45,7 +45,9 @@ XERCES_CPP_NAMESPACE_BEGIN BinHTTPInputStreamCommon::BinHTTPInputStreamCommon(MemoryManager *manager) : fBytesProcessed(0) , fBuffer(1023, manager) + , fBufferPos(0) , fContentType(0) + , fEncoding(0) , fMemoryManager(manager) { } @@ -54,6 +56,7 @@ BinHTTPInputStreamCommon::BinHTTPInputSt BinHTTPInputStreamCommon::~BinHTTPInputStreamCommon() { if(fContentType) fMemoryManager->deallocate(fContentType); + if(fEncoding) fMemoryManager->deallocate(fEncoding); } static const char *CRLF = "\r\n"; @@ -263,6 +266,69 @@ const XMLCh *BinHTTPInputStreamCommon::g return fContentType; } +const XMLCh *BinHTTPInputStreamCommon::getEncoding() const +{ + if(fEncoding == 0) { + const XMLCh* contentTypeHeader = getContentType(); + if(contentTypeHeader) + { + const XMLCh szCharsetEquals[] = {chLatin_c, chLatin_h, chLatin_a, chLatin_r, chLatin_s, chLatin_e, chLatin_t, chEqual, chNull }; + + BaseRefVectorOf<XMLCh>* tokens=XMLString::tokenizeString(contentTypeHeader, chSemiColon, fMemoryManager); + for(XMLSize_t i=0;i<tokens->size();i++) + { + XMLString::removeWS(tokens->elementAt(i), fMemoryManager); + if(XMLString::startsWithI(tokens->elementAt(i), szCharsetEquals)) + { + // mutable + const XMLCh* encodingName=tokens->elementAt(i)+XMLString::stringLen(szCharsetEquals); + const_cast<BinHTTPInputStreamCommon*>(this)->fEncoding = XMLString::replicate(encodingName, fMemoryManager); + break; + } + } + // if the encoding=value entry was not present, check if we should use a default value + if(fEncoding==0 && tokens->size()>0) + { + const XMLCh szTextSlash[] = { chLatin_t, chLatin_e, chLatin_x, chLatin_t, chForwardSlash, chNull }; + const XMLCh szXml[] = {chLatin_x, chLatin_m, chLatin_l, chNull }; + const XMLCh szXmlDash[] = {chLatin_x, chLatin_m, chLatin_l, chDash, chNull }; + + XMLBuffer contentType(XMLString::stringLen(contentTypeHeader), fMemoryManager); + contentType.set(tokens->elementAt(0)); + + XMLCh* strType = contentType.getRawBuffer(); + XMLString::removeWS(strType, fMemoryManager); + if(XMLString::startsWithI(strType, szTextSlash)) + { + // text/* has a default encoding of iso-8859-1 + + // text/xml, text/xml-external-parsed-entity, or a subtype like text/AnythingAtAll+xml + // has a default encoding of us-ascii + XMLCh* subType = strType+XMLString::stringLen(szTextSlash); + XMLCh* cursor=subType; + int plusPos; + do + { + plusPos=XMLString::indexOf(cursor, chPlus); + if(plusPos!=-1) + *(cursor+plusPos)=0; + if(XMLString::compareIStringASCII(cursor, szXml)==0 || XMLString::startsWithI(cursor, szXmlDash)) + { + const_cast<BinHTTPInputStreamCommon*>(this)->fEncoding = XMLString::replicate(XMLUni::fgUSASCIIEncodingString, fMemoryManager); + break; + } + cursor+=plusPos+1; + } while(plusPos==-1); + if(fEncoding==0) + const_cast<BinHTTPInputStreamCommon*>(this)->fEncoding = XMLString::replicate(XMLUni::fgISO88591EncodingString, fMemoryManager); + } + } + delete tokens; + } + } + return fEncoding; +} + XMLSize_t BinHTTPInputStreamCommon::readBytes(XMLByte* const toFill, const XMLSize_t maxToRead) { Modified: xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp?rev=1137953&r1=1137952&r2=1137953&view=diff ============================================================================== --- xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp (original) +++ xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp Tue Jun 21 10:52:28 2011 @@ -169,6 +169,7 @@ public : ); virtual const XMLCh *getContentType() const; + virtual const XMLCh *getEncoding() const; protected : BinHTTPInputStreamCommon(MemoryManager *manager); @@ -208,12 +209,17 @@ private : // fBufferPos // Pointers into fBuffer, showing start and end+1 of content // that readBytes must return. + // fContentType + // Holds the HTTP header for the Content-Type setting + // fEncoding + // Holds the encoding of this stream, extracted from the Content-Type setting // ----------------------------------------------------------------------- XMLSize_t fBytesProcessed; CharBuffer fBuffer; char * fBufferPos; XMLCh * fContentType; + XMLCh * fEncoding; MemoryManager* fMemoryManager; }; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@xerces.apache.org For additional commands, e-mail: commits-h...@xerces.apache.org