Author: centic
Date: Tue Mar 24 10:58:16 2020
New Revision: 1875571

URL: http://svn.apache.org/viewvc?rev=1875571&view=rev
Log:
Bug 64244: Take the replacement of RichtText strings into account when 
computing length of strings

Modified:
    
poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java
    
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java

Modified: 
poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java?rev=1875571&r1=1875570&r2=1875571&view=diff
==============================================================================
--- 
poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java 
(original)
+++ 
poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java 
Tue Mar 24 10:58:16 2020
@@ -201,7 +201,7 @@ public class XSSFRichTextString implemen
         CTRElt lt = st.addNewR();
         lt.setT(text);
         preserveSpaces(lt.xgetT());
-        
+
         if (font != null) {
             CTRPrElt pr = lt.addNewRPr();
             setRunAttributes(font.getCTFont(), pr);
@@ -247,7 +247,7 @@ public class XSSFRichTextString implemen
     }
 
     /**
-     * Does this string have any explicit formatting applied, or is 
+     * Does this string have any explicit formatting applied, or is
      *  it just text in the default style?
      */
     public boolean hasFormatting() {
@@ -496,6 +496,32 @@ public class XSSFRichTextString implemen
     }
 
     /**
+     * Optimized counting of actual length of a string
+     * considering the replacement of _xHHHH_ that needs
+     * to be applied to rich-text strings.
+     *
+     * @param value The string
+     * @return The length of the string, 0 if the string is null.
+     */
+    static int utfLength(String value) {
+        if(value == null) {
+            return 0;
+        }
+        if (!value.contains("_x")) {
+            return value.length();
+        }
+
+        Matcher matcher = utfPtrn.matcher(value);
+        int count = 0;
+        while (matcher.find()) {
+            count++;
+        }
+
+        // Length of pattern is 7 (_xHHHH_), and we replace it with one 
character
+        return value.length() - (count * 6);
+    }
+
+    /**
      * For all characters which cannot be represented in XML as defined by the 
XML 1.0 specification,
      * the characters are escaped using the Unicode numerical character 
representation escape character
      * format _xHHHH_, where H represents a hexadecimal character in the 
character's value.
@@ -512,7 +538,7 @@ public class XSSFRichTextString implemen
         if(value == null || !value.contains("_x")) {
             return value;
         }
-        
+
         StringBuilder buf = new StringBuilder();
         Matcher m = utfPtrn.matcher(value);
         int idx = 0;
@@ -528,13 +554,13 @@ public class XSSFRichTextString implemen
 
             idx = m.end();
         }
-        
-        // small optimization: don't go via StringBuilder if not necessary, 
-        // the encodings are very rare, so we should almost always go via this 
shortcut. 
+
+        // small optimization: don't go via StringBuilder if not necessary,
+        // the encodings are very rare, so we should almost always go via this 
shortcut.
         if(idx == 0) {
             return value;
         }
-        
+
         buf.append(value.substring(idx));
         return buf.toString();
     }
@@ -577,7 +603,7 @@ public class XSSFRichTextString implemen
             String txt = r.getT();
             CTRPrElt fmt = r.getRPr();
 
-            length += txt.length();
+            length += utfLength(txt);
             formats.put(length, fmt);
         }
         return formats;
@@ -605,7 +631,7 @@ public class XSSFRichTextString implemen
         }
         return stf;
     }
-    
+
     private ThemesTable getThemesTable() {
        if(styles == null) return null;
        return styles.getTheme();

Modified: 
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java?rev=1875571&r1=1875570&r2=1875571&view=diff
==============================================================================
--- 
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java
 (original)
+++ 
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java
 Tue Mar 24 10:58:16 2020
@@ -563,4 +563,29 @@ public final class TestXSSFRichTextStrin
         assertEquals("Rich Text\r\nTest", 
wb.getSheetAt(0).getRow(1).getCell(1).getRichStringCellValue().getString());
         wb.close();
     }
+
+    @Test
+    public void testUtfDecode_withApplyFont() {
+        XSSFFont font = new XSSFFont();
+        font.setBold(true);
+        font.setFontHeightInPoints((short) 14);
+
+        CTRst st = CTRst.Factory.newInstance();
+        st.setT("abc_x000D_2ef_x000D_");
+        XSSFRichTextString rt = new XSSFRichTextString(st);
+        rt.applyFont(font);
+        assertEquals("abc\r2ef\r", rt.getString());
+    }
+
+    @Test
+    public void testUtfLength() {
+        assertEquals(0, XSSFRichTextString.utfLength(null));
+        assertEquals(0, XSSFRichTextString.utfLength(""));
+
+        assertEquals(3, XSSFRichTextString.utfLength("abc"));
+        assertEquals(3, XSSFRichTextString.utfLength("ab_x0032_"));
+        assertEquals(3, XSSFRichTextString.utfLength("a_x0032__x0032_"));
+        assertEquals(3, XSSFRichTextString.utfLength("_x0032_a_x0032_"));
+        assertEquals(3, XSSFRichTextString.utfLength("_x0032__x0032_a"));
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@poi.apache.org
For additional commands, e-mail: commits-h...@poi.apache.org

Reply via email to