This is an automated email from the ASF dual-hosted git repository. jtao pushed a commit to branch unicode in repository https://gitbox.apache.org/repos/asf/pinot.git
commit 7a91e70cca98bfc369fe175c7955153ea14dcdaa Author: jtao15 <[email protected]> AuthorDate: Fri Aug 15 09:40:40 2025 -0700 add test --- .../FixedByteValueReaderWriterTest.java | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java index 65b09df5644..5e47acf8bca 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/readerwriter/FixedByteValueReaderWriterTest.java @@ -74,4 +74,40 @@ public class FixedByteValueReaderWriterTest implements PinotBuffersAfterMethodCh } } } + + @Test(dataProvider = "params") + public void testFixedByteValueReaderWriterNonAscii(int maxStringLength, int configuredMaxLength, ByteOrder byteOrder) + throws IOException { + byte[] bytes = new byte[configuredMaxLength]; + // Use a multi-byte UTF-8 character (é = 0xC3 0xA9) + byte[] nonAsciiChar = "é".getBytes(StandardCharsets.UTF_8); + + try (PinotDataBuffer buffer = PinotDataBuffer.allocateDirect(configuredMaxLength * 1000L, byteOrder, + "testFixedByteValueReaderWriterNonAscii")) { + FixedByteValueReaderWriter readerWriter = new FixedByteValueReaderWriter(buffer); + List<String> inputs = new ArrayList<>(1000); + + for (int i = 0; i < 1000; i++) { + // number of *characters* to write + int charCount = ThreadLocalRandom.current().nextInt(maxStringLength); + int byteCount = charCount * nonAsciiChar.length; + if (byteCount > configuredMaxLength) { + byteCount = configuredMaxLength - (configuredMaxLength % nonAsciiChar.length); // fit whole chars + charCount = byteCount / nonAsciiChar.length; + } + + Arrays.fill(bytes, (byte) 0); + for (int pos = 0; pos < byteCount; pos += nonAsciiChar.length) { + System.arraycopy(nonAsciiChar, 0, bytes, pos, nonAsciiChar.length); + } + + readerWriter.writeBytes(i, configuredMaxLength, bytes); + inputs.add("é".repeat(charCount)); + } + + for (int i = 0; i < 1000; i++) { + assertEquals(readerWriter.getUnpaddedString(i, configuredMaxLength, bytes), inputs.get(i)); + } + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
