Author: fanningpj
Date: Fri Mar 17 23:35:33 2023
New Revision: 1908458
URL: http://svn.apache.org/viewvc?rev=1908458&view=rev
Log:
[bug-66532] more performant way to iterate over codepoints. Thanks to Matthias
Raschhofer
Modified:
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java
poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java
Modified:
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
URL:
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java?rev=1908458&r1=1908457&r2=1908458&view=diff
==============================================================================
---
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
(original)
+++
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
Fri Mar 17 23:35:33 2023
@@ -31,6 +31,7 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
+import java.util.PrimitiveIterator;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -397,37 +398,39 @@ public class SheetDataWriter implements
return;
}
- for (Iterator<String> iter = CodepointsUtil.iteratorFor(s);
iter.hasNext(); ) {
- String codepoint = iter.next();
+ int codepoint;
+ for (PrimitiveIterator.OfInt iter =
CodepointsUtil.primitiveIterator(s); iter.hasNext(); ) {
+ codepoint = iter.nextInt();
switch (codepoint) {
- case "<":
+ case '<':
_out.write("<");
break;
- case ">":
+ case '>':
_out.write(">");
break;
- case "&":
+ case '&':
_out.write("&");
break;
- case "\"":
+ case '\"':
_out.write(""");
break;
// Special characters
- case "\n":
+ case '\n':
_out.write("
");
break;
- case "\r":
+ case '\r':
_out.write("
");
break;
- case "\t":
+ case '\t':
_out.write("	");
break;
- case "\u00A0": // NO-BREAK SPACE
+ case '\u00A0': // NO-BREAK SPACE
_out.write(" ");
break;
default:
- if (codepoint.length() == 1) {
- char c = codepoint.charAt(0);
+ final char[] chars = Character.toChars(codepoint);
+ if (chars.length == 1) {
+ char c = chars[0];
// YK: XmlBeans silently replaces all ISO control
characters ( < 32) with question marks.
// the same rule applies to "not a character" symbols.
if (replaceWithQuestionMark(c)) {
@@ -436,7 +439,7 @@ public class SheetDataWriter implements
_out.write(c);
}
} else {
- _out.write(codepoint);
+ _out.write(chars);
}
break;
}
Modified: poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java
URL:
http://svn.apache.org/viewvc/poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java?rev=1908458&r1=1908457&r2=1908458&view=diff
==============================================================================
--- poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java
(original)
+++ poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java Fri Mar
17 23:35:33 2023
@@ -18,12 +18,28 @@
package org.apache.poi.util;
import java.util.Iterator;
+import java.util.PrimitiveIterator;
@Internal
public class CodepointsUtil {
+
+ /**
+ * @param text to iterate over
+ * @return iterator with Strings representing the codepoints
+ * @see #primitiveIterator(String) a more performnt iterator
+ */
public static Iterator<String> iteratorFor(String text) {
return text.codePoints()
.mapToObj(codePoint -> new
String(Character.toChars(codePoint)))
.iterator();
}
+
+ /**
+ * @param text to iterate over
+ * @return iterator with ints representing the codepoints
+ * @since POI 5.2.4
+ */
+ public static PrimitiveIterator.OfInt primitiveIterator(String text) {
+ return text.codePoints().iterator();
+ }
}
\ No newline at end of file
Modified:
poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java
URL:
http://svn.apache.org/viewvc/poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java?rev=1908458&r1=1908457&r2=1908458&view=diff
==============================================================================
--- poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java
(original)
+++ poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java Fri
Mar 17 23:35:33 2023
@@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Asse
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import java.util.function.IntConsumer;
import org.junit.jupiter.api.Test;
@@ -38,8 +39,22 @@ class TestCodepointsUtil {
List<String> codePoints = new ArrayList<>();
CodepointsUtil.iteratorFor(unicodeSurrogates).forEachRemaining(codePoints::add);
assertEquals(17, codePoints.size());
- for(String point : codePoints){
- assertTrue(point.length() >=1 && point.length() <= 2, "codepoint "
+ point + "is wrong size");
+ for (String point : codePoints) {
+ assertTrue(point.length() >= 1 && point.length() <= 2, "codepoint
" + point + "is wrong size");
+ }
+ }
+
+ @Test
+ void testPrimitiveIterator() {
+ final String unicodeSurrogates =
"\uD835\uDF4A\uD835\uDF4B\uD835\uDF4C\uD835\uDF4D\uD835\uDF4E"
+ + "abcdef123456";
+ List<String> codePoints = new ArrayList<>();
+
CodepointsUtil.primitiveIterator(unicodeSurrogates).forEachRemaining((IntConsumer)
(i) -> {
+ codePoints.add(new String(Character.toChars(i)));
+ });
+ assertEquals(17, codePoints.size());
+ for (String point : codePoints) {
+ assertTrue(point.length() >= 1 && point.length() <= 2, "codepoint
" + point + "is wrong size");
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]