This is an automated email from the ASF dual-hosted git repository.
abhishek pushed a commit to branch 31.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/31.0.0 by this push:
new f6940669653 When removeNullBytes is set, length calculations did not
take into account null bytes. (#17232) (#17266)
f6940669653 is described below
commit f6940669653b70d9d13e93899ed975d52aef773d
Author: Kashif Faraz <[email protected]>
AuthorDate: Mon Oct 7 20:51:54 2024 +0530
When removeNullBytes is set, length calculations did not take into account
null bytes. (#17232) (#17266)
* When replaceNullBytes is set, length calculations did not take into
account null bytes.
Co-authored-by: Karan Kumar <[email protected]>
---
.../druid/frame/field/StringFieldWriter.java | 4 +-
.../apache/druid/frame/write/FrameWriterUtils.java | 22 +++--
.../druid/frame/field/StringFieldWriterTest.java | 98 ++++++++++++++++++----
3 files changed, 102 insertions(+), 22 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java
b/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java
index 2ffb79a12da..5b304f8a053 100644
---
a/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java
+++
b/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java
@@ -128,14 +128,14 @@ public class StringFieldWriter implements FieldWriter
written++;
if (len > 0) {
- FrameWriterUtils.copyByteBufferToMemoryDisallowingNullBytes(
+ int lenWritten =
FrameWriterUtils.copyByteBufferToMemoryDisallowingNullBytes(
utf8Datum,
memory,
position + written,
len,
removeNullBytes
);
- written += len;
+ written += lenWritten;
}
}
diff --git
a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
index 9ba2b29fe51..a480767f111 100644
---
a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
+++
b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
@@ -212,9 +212,11 @@ public class FrameWriterUtils
/**
* Copies {@code src} to {@code dst}, disallowing null bytes to be written
to the destination. If {@code removeNullBytes}
- * is true, the method will drop the null bytes, and if it is false, the
method will throw an exception.
+ * is true, the method will drop the null bytes, and if it is false, the
method will throw an exception. The written bytes
+ * can be less than "len" if the null bytes are dropped, and the callers
must evaluate the return value to see the actual
+ * length of the buffer that is copied
*/
- public static void copyByteBufferToMemoryDisallowingNullBytes(
+ public static int copyByteBufferToMemoryDisallowingNullBytes(
final ByteBuffer src,
final WritableMemory dst,
final long dstPosition,
@@ -222,11 +224,16 @@ public class FrameWriterUtils
final boolean removeNullBytes
)
{
- copyByteBufferToMemory(src, dst, dstPosition, len, false, removeNullBytes);
+ return copyByteBufferToMemory(src, dst, dstPosition, len, false,
removeNullBytes);
}
/**
- * Copies "len" bytes from {@code src.position()} to "dstPosition" in
"memory". Does not update the position of src.
+ * Tries to copy "len" bytes from {@code src.position()} to "dstPosition" in
"memory". If removeNullBytes is set to true,
+ * it will remove the U+0000 bytes from the src buffer, and the written
bytes will be less than "len". It is imperative that the
+ * callers check the number of written bytes when "removeNullBytes" can be
set to true, i.e. this method is invoked via
+ * {@link #copyByteBufferToMemoryDisallowingNullBytes}
+ * <p>
+ * Does not update the position of src.
* <p>
* Whenever "allowNullBytes" is true, "removeNullBytes" must be false. Use
the methods {@link #copyByteBufferToMemoryAllowingNullBytes}
* and {@link #copyByteBufferToMemoryDisallowingNullBytes} to copy between
the memory
@@ -234,7 +241,7 @@ public class FrameWriterUtils
*
* @throws InvalidNullByteException if "allowNullBytes" and
"removeNullBytes" is false and a null byte is encountered
*/
- private static void copyByteBufferToMemory(
+ private static int copyByteBufferToMemory(
final ByteBuffer src,
final WritableMemory dst,
final long dstPosition,
@@ -251,6 +258,7 @@ public class FrameWriterUtils
}
final int srcEnd = src.position() + len;
+ int writtenLength = 0;
if (allowNullBytes) {
if (src.hasArray()) {
@@ -264,6 +272,8 @@ public class FrameWriterUtils
dst.putByte(q, b);
}
}
+ // The method does not alter the length of the memory copied if null
bytes are allowed
+ writtenLength = len;
} else {
long q = dstPosition;
for (int p = src.position(); p < srcEnd; p++) {
@@ -282,9 +292,11 @@ public class FrameWriterUtils
} else {
dst.putByte(q, b);
q++;
+ writtenLength++;
}
}
}
+ return writtenLength;
}
/**
diff --git
a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java
b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java
index 0108e772d94..adde9f89cbd 100644
---
a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java
+++
b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java
@@ -21,6 +21,7 @@ package org.apache.druid.frame.field;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.frame.write.InvalidNullByteException;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.segment.ColumnValueSelector;
@@ -40,9 +41,11 @@ import org.mockito.junit.MockitoRule;
import org.mockito.quality.Strictness;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import java.util.stream.Collectors;
public class StringFieldWriterTest extends InitializedNullHandlingTest
{
@@ -57,9 +60,12 @@ public class StringFieldWriterTest extends
InitializedNullHandlingTest
@Mock
public DimensionSelector selectorUtf8;
+
private WritableMemory memory;
private FieldWriter fieldWriter;
private FieldWriter fieldWriterUtf8;
+ private FieldWriter fieldWriterRemoveNull;
+ private FieldWriter fieldWriterUtf8RemoveNull;
@Before
public void setUp()
@@ -67,13 +73,32 @@ public class StringFieldWriterTest extends
InitializedNullHandlingTest
memory = WritableMemory.allocate(1000);
fieldWriter = new StringFieldWriter(selector, false);
fieldWriterUtf8 = new StringFieldWriter(selectorUtf8, false);
+ fieldWriterRemoveNull = new StringFieldWriter(selector, true);
+ fieldWriterUtf8RemoveNull = new StringFieldWriter(selectorUtf8, true);
}
@After
public void tearDown()
{
- fieldWriter.close();
- fieldWriterUtf8.close();
+ for (FieldWriter fw : getFieldWriter(FieldWritersType.ALL)) {
+ try {
+ fw.close();
+ }
+ catch (Exception ignore) {
+ }
+ }
+ }
+
+
+ private List<FieldWriter> getFieldWriter(FieldWritersType fieldWritersType)
+ {
+ if (fieldWritersType == FieldWritersType.NULL_REPLACING) {
+ return Arrays.asList(fieldWriterRemoveNull, fieldWriterUtf8RemoveNull);
+ } else if (fieldWritersType == FieldWritersType.ALL) {
+ return Arrays.asList(fieldWriter, fieldWriterUtf8,
fieldWriterRemoveNull, fieldWriterUtf8RemoveNull);
+ } else {
+ throw new ISE("Handler missing for type:[%s]", fieldWritersType);
+ }
}
@Test
@@ -100,31 +125,63 @@ public class StringFieldWriterTest extends
InitializedNullHandlingTest
doTest(Arrays.asList("foo", "bar"));
}
+
@Test
public void testMultiValueStringContainingNulls()
{
doTest(Arrays.asList("foo", NullHandling.emptyToNullIfNeeded(""), "bar",
null));
}
+ @Test
+ public void testNullByteReplacement()
+ {
+ doTest(
+ Arrays.asList("abc\u0000", "foo" +
NullHandling.emptyToNullIfNeeded("") + "bar", "def"),
+ FieldWritersType.NULL_REPLACING
+ );
+ }
+
+ @Test
+ public void testNullByteNotReplaced()
+ {
+ mockSelectors(Arrays.asList("abc\u0000", "foo" +
NullHandling.emptyToNullIfNeeded("") + "bar", "def"));
+ Assert.assertThrows(InvalidNullByteException.class, () -> {
+ doTestWithSpecificFieldWriter(fieldWriter);
+ });
+ Assert.assertThrows(InvalidNullByteException.class, () -> {
+ doTestWithSpecificFieldWriter(fieldWriterUtf8);
+ });
+ }
+
private void doTest(final List<String> values)
+ {
+ doTest(values, FieldWritersType.ALL);
+ }
+
+ private void doTest(final List<String> values, FieldWritersType
fieldWritersType)
{
mockSelectors(values);
- // Non-UTF8 test
- {
- final long written = writeToMemory(fieldWriter);
- final Object[] valuesRead = readFromMemory(written);
- Assert.assertEquals("values read (non-UTF8)", values,
Arrays.asList(valuesRead));
+ List<FieldWriter> fieldWriters = getFieldWriter(fieldWritersType);
+ for (FieldWriter fw : fieldWriters) {
+ final Object[] valuesRead = doTestWithSpecificFieldWriter(fw);
+ List<String> expectedResults = new ArrayList<>(values);
+ if (fieldWritersType == FieldWritersType.NULL_REPLACING) {
+ expectedResults = expectedResults.stream()
+ .map(val -> StringUtils.replace(val,
"\u0000", ""))
+ .collect(Collectors.toList());
+ }
+ Assert.assertEquals("values read", expectedResults,
Arrays.asList(valuesRead));
}
+ }
- // UTF8 test
- {
- final long writtenUtf8 = writeToMemory(fieldWriterUtf8);
- final Object[] valuesReadUtf8 = readFromMemory(writtenUtf8);
- Assert.assertEquals("values read (UTF8)", values,
Arrays.asList(valuesReadUtf8));
- }
+ private Object[] doTestWithSpecificFieldWriter(FieldWriter fieldWriter)
+ {
+ final long written = writeToMemory(fieldWriter);
+ return readFromMemory(written);
}
+
private void mockSelectors(final List<String> values)
{
final RangeIndexedInts row = new RangeIndexedInts();
@@ -183,9 +240,20 @@ public class StringFieldWriterTest extends
InitializedNullHandlingTest
memory.getByteArray(MEMORY_POSITION, bytes, 0, (int) written);
final FieldReader fieldReader =
FieldReaders.create("columnNameDoesntMatterHere", ColumnType.STRING_ARRAY);
- final ColumnValueSelector<?> selector =
- fieldReader.makeColumnValueSelector(memory, new
ConstantFieldPointer(MEMORY_POSITION, -1));
+ final ColumnValueSelector<?> selector =
fieldReader.makeColumnValueSelector(
+ memory,
+ new ConstantFieldPointer(
+ MEMORY_POSITION,
+ -1
+ )
+ );
return (Object[]) selector.getObject();
}
+
+ private enum FieldWritersType
+ {
+ NULL_REPLACING, // include null replacing writers only
+ ALL // include all writers
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]