maxfortun commented on code in PR #166:
URL: https://github.com/apache/xalan-java/pull/166#discussion_r1465447856
##########
serializer/src/main/java/org/apache/xml/serializer/ToStream.java:
##########
@@ -1595,23 +1599,40 @@ else if (m_encodingInfo.isInEncoding(ch)) {
// not in the normal ASCII range, we also
// just leave it get added on to the clean characters
}
- else if (Encodings.isHighUTF16Surrogate(ch) && i < end-1
&& Encodings.isLowUTF16Surrogate(chars[i+1])) {
- // So, this is a (valid) surrogate pair
- if (! m_encodingInfo.isInEncoding(ch, chars[i+1])) {
- int codepoint = Encodings.toCodePoint(ch,
chars[i+1]);
- writeOutCleanChars(chars, i,
lastDirtyCharProcessed);
- writer.write("&#");
- writer.write(Integer.toString(codepoint));
- writer.write(';');
- lastDirtyCharProcessed = i+1;
- }
- i++; // skip the low surrogate, too
+ else if (Encodings.isHighUTF16Surrogate(ch)) {
+ // Store for later processing. We may be at the end of
a buffer,
+ // and must wait till low
surrogate arrives
+ // before we can do anything
with this.
+ writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+ m_highUTF16Surrogate = ch;
+ lastDirtyCharProcessed = i;
+ }
+ else if (m_highUTF16Surrogate != 0 &&
Encodings.isLowUTF16Surrogate(ch)) {
+ // The complete utf16 byte sequence is now available
and may be serialized.
+ if (! m_encodingInfo.isInEncoding(m_highUTF16Surrogate,
ch)) {
+ int codepoint =
Encodings.toCodePoint(m_highUTF16Surrogate, ch);
+ writer.write("&#");
+ writer.write(Integer.toString(codepoint));
+ writer.write(';');
+ } else {
+ writer.write(m_highUTF16Surrogate);
+ writer.write(ch);
+ }
+ lastDirtyCharProcessed = i;
+ m_highUTF16Surrogate = 0;
}
else {
// This is a fallback plan, we get here if the
// encoding doesn't contain ch and it's not part
// of a surrogate pair
// The right thing is to write out an entity
+ if(m_highUTF16Surrogate != 0) {
Review Comment:
Btw, I did encounter this scenario, that's why I coded this. But in theory
this should not be happening.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]