Author: amassari
Date: Tue Jun 21 10:52:28 2011
New Revision: 1137953

URL: http://svn.apache.org/viewvc?rev=1137953&view=rev
Log:
The ReaderMgr now asks the stream if it knows which encoding has been used 
(XERCESC-1967)

Modified:
    xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp
    xerces/c/trunk/src/xercesc/util/BinInputStream.cpp
    xerces/c/trunk/src/xercesc/util/BinInputStream.hpp
    xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp
    xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp

Modified: xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp Tue Jun 21 10:52:28 2011
@@ -397,14 +397,17 @@ XMLReader* ReaderMgr::createReader( cons
     // We need to handle this exception to avoid leak on newStream.
 
     try {
-        if (src.getEncoding())
+               const XMLCh* encoding = src.getEncoding();
+               if(encoding == 0)
+                       encoding = newStream->getEncoding();
+        if (encoding)
         {
             retVal = new (fMemoryManager) XMLReader
                 (
                 src.getPublicId()
                 , src.getSystemId()
                 , newStream
-                , src.getEncoding()
+                , encoding
                 , refFrom
                 , type
                 , source

Modified: xerces/c/trunk/src/xercesc/util/BinInputStream.cpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/BinInputStream.cpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/BinInputStream.cpp (original)
+++ xerces/c/trunk/src/xercesc/util/BinInputStream.cpp Tue Jun 21 10:52:28 2011
@@ -42,4 +42,12 @@ BinInputStream::BinInputStream()
 {
 }
 
+// ---------------------------------------------------------------------------
+//  BinInputStream: Default implementations
+// ---------------------------------------------------------------------------
+const XMLCh* BinInputStream::getEncoding() const
+{
+    return 0;
+}
+
 XERCES_CPP_NAMESPACE_END

Modified: xerces/c/trunk/src/xercesc/util/BinInputStream.hpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/BinInputStream.hpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/BinInputStream.hpp (original)
+++ xerces/c/trunk/src/xercesc/util/BinInputStream.hpp Tue Jun 21 10:52:28 2011
@@ -66,6 +66,23 @@ public :
      */
     virtual const XMLCh* getContentType() const = 0;
 
+    /**
+     * Return the "out-of-band" encoding for the data supplied by this
+     * input stream. If no such content type is provided for the data, 0 is
+     * returned. This function is expected to return the correct value at
+     * any time after the construction of the stream.
+     *
+     * An example of the stream that may return non-0 from this function is
+     * an HTTP stream with the value returned taken from the "Content-Type"
+     * HTTP header. Note also that if the encoding of the data is known
+     * to the application by some other means then the setEncoding function
+     * in the InputSource object should be used instead. The getEncoding
+     * function should only be used to return information that is intrinsic
+     * to the stream.
+     *
+     * @return The name of the encoding, or 0 if one is not available.
+     */
+    virtual const XMLCh *getEncoding() const;
 
 protected :
     // -----------------------------------------------------------------------

Modified: 
xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp 
(original)
+++ xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp 
Tue Jun 21 10:52:28 2011
@@ -45,7 +45,9 @@ XERCES_CPP_NAMESPACE_BEGIN
 BinHTTPInputStreamCommon::BinHTTPInputStreamCommon(MemoryManager *manager)
       : fBytesProcessed(0)
       , fBuffer(1023, manager)
+         , fBufferPos(0)
       , fContentType(0)
+         , fEncoding(0)
       , fMemoryManager(manager)
 {
 }
@@ -54,6 +56,7 @@ BinHTTPInputStreamCommon::BinHTTPInputSt
 BinHTTPInputStreamCommon::~BinHTTPInputStreamCommon()
 {
     if(fContentType) fMemoryManager->deallocate(fContentType);
+    if(fEncoding) fMemoryManager->deallocate(fEncoding);
 }
 
 static const char *CRLF = "\r\n";
@@ -263,6 +266,69 @@ const XMLCh *BinHTTPInputStreamCommon::g
     return fContentType;
 }
 
+const XMLCh *BinHTTPInputStreamCommon::getEncoding() const
+{
+       if(fEncoding == 0) {
+               const XMLCh* contentTypeHeader = getContentType();
+               if(contentTypeHeader)
+               {
+                       const XMLCh szCharsetEquals[] = {chLatin_c, chLatin_h, 
chLatin_a, chLatin_r, chLatin_s, chLatin_e, chLatin_t, chEqual, chNull };
+
+                       BaseRefVectorOf<XMLCh>* 
tokens=XMLString::tokenizeString(contentTypeHeader, chSemiColon, 
fMemoryManager);
+                       for(XMLSize_t i=0;i<tokens->size();i++)
+                       {
+                               XMLString::removeWS(tokens->elementAt(i), 
fMemoryManager);
+                               if(XMLString::startsWithI(tokens->elementAt(i), 
szCharsetEquals))
+                               {
+                                       // mutable
+                                       const XMLCh* 
encodingName=tokens->elementAt(i)+XMLString::stringLen(szCharsetEquals);
+                                       
const_cast<BinHTTPInputStreamCommon*>(this)->fEncoding = 
XMLString::replicate(encodingName, fMemoryManager);
+                                       break;
+                               }
+                       }
+                       // if the encoding=value entry was not present, check 
if we should use a default value
+                       if(fEncoding==0 && tokens->size()>0)
+                       {
+                               const XMLCh szTextSlash[] = { chLatin_t, 
chLatin_e, chLatin_x, chLatin_t, chForwardSlash, chNull };
+                               const XMLCh szXml[] = {chLatin_x, chLatin_m, 
chLatin_l, chNull };
+                               const XMLCh szXmlDash[] = {chLatin_x, 
chLatin_m, chLatin_l, chDash, chNull };
+
+                               XMLBuffer 
contentType(XMLString::stringLen(contentTypeHeader), fMemoryManager);
+                               contentType.set(tokens->elementAt(0));
+
+                               XMLCh* strType = contentType.getRawBuffer();
+                               XMLString::removeWS(strType, fMemoryManager);
+                               if(XMLString::startsWithI(strType, szTextSlash))
+                               {
+                                       // text/* has a default encoding of 
iso-8859-1
+                                       
+                                       // text/xml, 
text/xml-external-parsed-entity, or a subtype like text/AnythingAtAll+xml 
+                                       // has a default encoding of us-ascii
+                                       XMLCh* subType = 
strType+XMLString::stringLen(szTextSlash);
+                                       XMLCh* cursor=subType;
+                                       int plusPos;
+                                       do
+                                       {
+                                               
plusPos=XMLString::indexOf(cursor, chPlus);
+                                               if(plusPos!=-1)
+                                                       *(cursor+plusPos)=0;
+                                               
if(XMLString::compareIStringASCII(cursor, szXml)==0 || 
XMLString::startsWithI(cursor, szXmlDash))
+                                               {
+                                                       
const_cast<BinHTTPInputStreamCommon*>(this)->fEncoding = 
XMLString::replicate(XMLUni::fgUSASCIIEncodingString, fMemoryManager);
+                                                       break;
+                                               }
+                                               cursor+=plusPos+1;
+                                       } while(plusPos==-1);
+                                       if(fEncoding==0)
+                                               
const_cast<BinHTTPInputStreamCommon*>(this)->fEncoding = 
XMLString::replicate(XMLUni::fgISO88591EncodingString, fMemoryManager);
+                               }
+                       }
+                       delete tokens;
+               }
+       }
+    return fEncoding;
+}
+
 XMLSize_t BinHTTPInputStreamCommon::readBytes(XMLByte* const    toFill,
                                               const XMLSize_t    maxToRead)
 {

Modified: 
xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp 
(original)
+++ xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp 
Tue Jun 21 10:52:28 2011
@@ -169,6 +169,7 @@ public :
     );
 
     virtual const XMLCh *getContentType() const;
+    virtual const XMLCh *getEncoding() const;
 
 protected :
     BinHTTPInputStreamCommon(MemoryManager *manager);
@@ -208,12 +209,17 @@ private :
     //  fBufferPos
     //      Pointers into fBuffer, showing start and end+1 of content
     //      that readBytes must return.
+       //  fContentType
+       //      Holds the HTTP header for the Content-Type setting
+       //  fEncoding
+       //      Holds the encoding of this stream, extracted from the 
Content-Type setting
     // -----------------------------------------------------------------------
 
     XMLSize_t           fBytesProcessed;
     CharBuffer          fBuffer;
     char *              fBufferPos;
     XMLCh *             fContentType;
+    XMLCh *             fEncoding;
     MemoryManager*      fMemoryManager;
 };
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@xerces.apache.org
For additional commands, e-mail: commits-h...@xerces.apache.org

Reply via email to