tools/source/misc/json_writer.cxx |   18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

New commits:
commit 972c619f4f058d86c0cd0ed388bf141b94d9a912
Author:     Tor Lillqvist <t...@collabora.com>
AuthorDate: Fri Jan 15 03:03:41 2021 +0200
Commit:     Stephan Bergmann <sberg...@redhat.com>
CommitDate: Fri Jan 22 11:42:15 2021 +0100

    Make JsonWriter::writeEscapedOUString() handle surrogate pairs properly
    
    It is wrong to iterate over UTF-16 code units one by one. We have
    OUString::iterateCodePoints() to iterate over Unicode code points.
    
    The two UTF-16 code units of a surrogate pair (for a non-BMP code
    point) should not be encoded separately to UTF-8 bytes. It is the code
    point that should be encoded (to four bytes).
    
    Change-Id: Ica4341308deb6618c9c2da8dcee8a11ef4e8238d
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109318
    Tested-by: Jenkins
    Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk>
    Reviewed-by: Tor Lillqvist <t...@collabora.com>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109474
    Reviewed-by: Stephan Bergmann <sberg...@redhat.com>

diff --git a/tools/source/misc/json_writer.cxx 
b/tools/source/misc/json_writer.cxx
index 1ccee8569480..0b13a63fc038 100644
--- a/tools/source/misc/json_writer.cxx
+++ b/tools/source/misc/json_writer.cxx
@@ -136,9 +136,10 @@ void JsonWriter::put(const char* pPropName, const 
OUString& rPropVal)
     mPos += 4;
 
     // Convert from UTF-16 to UTF-8 and perform escaping
-    for (int i = 0; i < rPropVal.getLength(); ++i)
+    sal_Int32 i = 0;
+    while (i < rPropVal.getLength())
     {
-        sal_Unicode ch = rPropVal[i];
+        sal_uInt32 ch = rPropVal.iterateCodePoints(&i);
         if (ch == '\\')
         {
             *mPos = static_cast<char>(ch);
@@ -165,7 +166,7 @@ void JsonWriter::put(const char* pPropName, const OUString& 
rPropVal)
             *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */
             ++mPos;
         }
-        else
+        else if (ch <= 0xFFFF)
         {
             *mPos = 0xE0 | (ch >> 12); /* 1110xxxx */
             ++mPos;
@@ -174,6 +175,17 @@ void JsonWriter::put(const char* pPropName, const 
OUString& rPropVal)
             *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */
             ++mPos;
         }
+        else
+        {
+            *mPos = 0xF0 | (ch >> 18); /* 11110xxx */
+            ++mPos;
+            *mPos = 0x80 | ((ch >> 12) & 0x3F); /* 10xxxxxx */
+            ++mPos;
+            *mPos = 0x80 | ((ch >> 6) & 0x3F); /* 10xxxxxx */
+            ++mPos;
+            *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */
+            ++mPos;
+        }
     }
 
     *mPos = '"';
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

Reply via email to