tools/source/misc/json_writer.cxx | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-)
New commits: commit 972c619f4f058d86c0cd0ed388bf141b94d9a912 Author: Tor Lillqvist <t...@collabora.com> AuthorDate: Fri Jan 15 03:03:41 2021 +0200 Commit: Stephan Bergmann <sberg...@redhat.com> CommitDate: Fri Jan 22 11:42:15 2021 +0100 Make JsonWriter::writeEscapedOUString() handle surrogate pairs properly It is wrong to iterate over UTF-16 code units one by one. We have OUString::iterateCodePoints() to iterate over Unicode code points. The two UTF-16 code units of a surrogate pair (for a non-BMP code point) should not be encoded separately to UTF-8 bytes. It is the code point that should be encoded (to four bytes). Change-Id: Ica4341308deb6618c9c2da8dcee8a11ef4e8238d Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109318 Tested-by: Jenkins Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk> Reviewed-by: Tor Lillqvist <t...@collabora.com> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109474 Reviewed-by: Stephan Bergmann <sberg...@redhat.com> diff --git a/tools/source/misc/json_writer.cxx b/tools/source/misc/json_writer.cxx index 1ccee8569480..0b13a63fc038 100644 --- a/tools/source/misc/json_writer.cxx +++ b/tools/source/misc/json_writer.cxx @@ -136,9 +136,10 @@ void JsonWriter::put(const char* pPropName, const OUString& rPropVal) mPos += 4; // Convert from UTF-16 to UTF-8 and perform escaping - for (int i = 0; i < rPropVal.getLength(); ++i) + sal_Int32 i = 0; + while (i < rPropVal.getLength()) { - sal_Unicode ch = rPropVal[i]; + sal_uInt32 ch = rPropVal.iterateCodePoints(&i); if (ch == '\\') { *mPos = static_cast<char>(ch); @@ -165,7 +166,7 @@ void JsonWriter::put(const char* pPropName, const OUString& rPropVal) *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */ ++mPos; } - else + else if (ch <= 0xFFFF) { *mPos = 0xE0 | (ch >> 12); /* 1110xxxx */ ++mPos; @@ -174,6 +175,17 @@ void JsonWriter::put(const char* pPropName, const OUString& rPropVal) *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */ ++mPos; } + else + { + *mPos = 0xF0 | (ch >> 18); /* 11110xxx */ + ++mPos; + *mPos = 0x80 | ((ch >> 12) & 0x3F); /* 10xxxxxx */ + ++mPos; + *mPos = 0x80 | ((ch >> 6) & 0x3F); /* 10xxxxxx */ + ++mPos; + *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */ + ++mPos; + } } *mPos = '"'; _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits