This is an automated email from the ASF dual-hosted git repository. swebb2066 pushed a commit to branch xml_layout_char_neutralisation in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git
commit 15338e797a70d26eb0eab783419c454f851f0cc3 Author: Stephen Webb <[email protected]> AuthorDate: Mon Mar 16 17:30:37 2026 +1100 Use character references for control characters in XML and HTML output --- src/main/cpp/transform.cpp | 143 ++++++++++++++++----------- src/main/include/log4cxx/helpers/transform.h | 8 ++ 2 files changed, 95 insertions(+), 56 deletions(-) diff --git a/src/main/cpp/transform.cpp b/src/main/cpp/transform.cpp index 6cc7ca77..75ee8586 100644 --- a/src/main/cpp/transform.cpp +++ b/src/main/cpp/transform.cpp @@ -27,27 +27,36 @@ using namespace LOG4CXX_NS::helpers; void Transform::appendEscapingTags( LogString& buf, const LogString& input) { - //Check if the string is zero length -- if so, return - //what was sent in. - - if (input.length() == 0 ) - { - return; - } - logchar specials[] = { 0x22 /* " */, 0x26 /* & */, 0x3C /* < */, 0x3E /* > */, 0x00 }; size_t start = 0; - size_t special = input.find_first_of(specials, start); - - while (special != LogString::npos) + for (size_t index = 0; index < input.size(); ++index) { - if (special > start) + int ch = input[index]; + bool cdataEnd = false; + // Allowable XML 1.0 characters are: + // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + if (0x20 <= ch && ch <= 0xD7FF) { - buf.append(input, start, special - start); + auto pSpecial = &specials[0]; + while (*pSpecial && *pSpecial != ch) + ++pSpecial; + if (!*pSpecial) + continue; + } + else if (0x9 == ch || 0xA == ch || 0xD == ch || + (0xE000 <= ch && ch <= 0xFFFD) || + (0x10000 <= ch && ch <= 0x10FFFF)) + { + continue; } - switch (input[special]) + if (start < index) + buf.append(input, start, index - start); + start = index + 1; + switch (ch) { + case 0: // Do not output a NUL character + break; case 0x22: buf.append(LOG4CXX_STR(""")); break; @@ -65,20 +74,9 @@ void Transform::appendEscapingTags( break; default: - buf.append(1, input[special]); + appendCharacterReference(buf, ch); break; } - - start = special + 1; - - if (special < input.size()) - { - special = input.find_first_of(specials, start); - } - else - { - special = LogString::npos; - } } if (start < input.size()) @@ -90,43 +88,76 @@ void Transform::appendEscapingTags( void Transform::appendEscapingCDATA( LogString& buf, const LogString& input) { - static const WideLife<LogString> CDATA_END(LOG4CXX_STR("]]>")); - static const WideLife<LogString> CDATA_EMBEDED_END(LOG4CXX_STR("]]>]]><![CDATA[")); - + static const LogString CDATA_END(LOG4CXX_STR("]]>")); const LogString::size_type CDATA_END_LEN = 3; - - - if (input.length() == 0 ) - { - return; - } - - LogString::size_type end = input.find(CDATA_END); - - if (end == LogString::npos) - { - buf.append(input); - return; - } - - LogString::size_type start = 0; - - while (end != LogString::npos) + static const LogString CDATA_EMBEDED_END(LOG4CXX_STR("]]><![CDATA[")); + size_t start = 0; + for (size_t index = 0; index < input.size(); ++index) { - buf.append(input, start, end - start); - buf.append(CDATA_EMBEDED_END); - start = end + CDATA_END_LEN; - - if (start < input.length()) + int ch = input[index]; + bool cdataEnd = false; + // Allowable XML 1.0 characters are: + // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + if (0x20 <= ch && ch <= 0xD7FF) { - end = input.find(CDATA_END, start); + if (CDATA_END[0] == ch && + index + CDATA_END_LEN <= input.size() && + 0 == input.compare(index, CDATA_END_LEN, CDATA_END)) + { + index += CDATA_END_LEN; + cdataEnd = true; + } + else + { + continue; + } } - else + else if (0x9 == ch || 0xA == ch || 0xD == ch || + (0xE000 <= ch && ch <= 0xFFFD) || + (0x10000 <= ch && ch <= 0x10FFFF)) { - return; + continue; } + + if (start < index) + buf.append(input, start, index - start); + if (cdataEnd) + { + buf.append(CDATA_EMBEDED_END); + --index; + } + else if (0 != ch) + appendCharacterReference(buf, ch); + start = index + 1; } - buf.append(input, start, input.length() - start); + if (start < input.size()) + buf.append(input, start, input.size() - start); } +void Transform::appendCharacterReference(LogString& buf, int ch) +{ + auto toHexDigit = [](int ch) -> int + { + return (10 <= ch ? (0x61 - 10) : 0x30) + ch; + }; + buf.push_back('&'); + buf.push_back('#'); + buf.push_back('x'); + if (0xFFFFFFF < ch) + buf.push_back(toHexDigit((ch & 0x70000000) >> 28)); + if (0xFFFFFF < ch) + buf.push_back(toHexDigit((ch & 0xF000000) >> 24)); + if (0xFFFFF < ch) + buf.push_back(toHexDigit((ch & 0xF00000) >> 20)); + if (0xFFFF < ch) + buf.push_back(toHexDigit((ch & 0xF0000) >> 16)); + if (0xFFF < ch) + buf.push_back(toHexDigit((ch & 0xF000) >> 12)); + if (0xFF < ch) + buf.push_back(toHexDigit((ch & 0xF00) >> 8)); + if (0xF < ch) + buf.push_back(toHexDigit((ch & 0xF0) >> 4)); + buf.push_back(toHexDigit(ch & 0xF)); + buf.push_back(';'); +} diff --git a/src/main/include/log4cxx/helpers/transform.h b/src/main/include/log4cxx/helpers/transform.h index 4ae67aac..1a24caf9 100644 --- a/src/main/include/log4cxx/helpers/transform.h +++ b/src/main/include/log4cxx/helpers/transform.h @@ -54,6 +54,14 @@ class LOG4CXX_EXPORT Transform */ static void appendEscapingCDATA( LogString& buf, const LogString& input); + + /** + * Add \c ch to \c buf as an XML character reference. + * + * @param buf output stream holding the XML data to this point. + * @param ch the value to encode as a XML character reference + */ + static void appendCharacterReference(LogString& buf, int ch); }; // class Transform } // namespace helpers } //namespace log4cxx
