This is an automated email from the ASF dual-hosted git repository.
yqm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 6afb7dc0fab Add size check in
DictionaryBuildingSingleValuedRowBasedKeySerdeHelper in putToKeyBuffer (#18541)
6afb7dc0fab is described below
commit 6afb7dc0fabc3fa1fdaaa7fc77fe407cb344b852
Author: Cece Mei <[email protected]>
AuthorDate: Tue Oct 7 18:32:52 2025 -0700
Add size check in DictionaryBuildingSingleValuedRowBasedKeySerdeHelper in
putToKeyBuffer (#18541)
* oom
* size
* map
* structured-data
* format
* wrap
* object
* ignore-result
* ignore-result
* format
* format2
* revert-hash
* revert-hash
* revert-equal
* trigger ci / empty commit
---
.../epinephelinae/RowBasedGrouperHelper.java | 39 ++++--
.../druid/segment/column/TypeStrategies.java | 147 ++++++++++++++++++---
.../segment/nested/NestedDataComplexTypeSerde.java | 108 ++-------------
.../druid/segment/nested/StructuredData.java | 37 ++++--
.../druid/segment/column/TypeStrategiesTest.java | 35 +++++
.../druid/segment/nested/StructuredDataTest.java | 7 -
6 files changed, 226 insertions(+), 147 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
index 665eadf7de6..6ea439d7a90 100644
---
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
@@ -78,6 +78,7 @@ import org.apache.druid.segment.RowBasedColumnSelectorFactory;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.NullableTypeStrategy;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.column.TypeSignature;
import org.apache.druid.segment.column.TypeStrategies;
@@ -158,13 +159,13 @@ public class RowBasedGrouperHelper
/**
* Create a {@link Grouper} that groups according to the dimensions and
aggregators in "query", along with
* an {@link Accumulator} that accepts ResultRows and forwards them to the
grouper.
- *
+ * <p>
* The pair will operate in one of two modes:
- *
+ * <p>
* 1) Combining mode (used if "subquery" is null). In this mode, filters
from the "query" are ignored, and
* its aggregators are converted into combining form. The input ResultRows
are assumed to be partially-grouped
* results originating from the provided "query".
- *
+ * <p>
* 2) Subquery mode (used if "subquery" is nonnull). In this mode, filters
from the "query" (both intervals
* and dim filters) are respected, and its aggregators are used in standard
(not combining) form. The input
* ResultRows are assumed to be results originating from the provided
"subquery".
@@ -758,7 +759,10 @@ public class RowBasedGrouperHelper
case COMPLEX:
return (InputRawSupplierColumnSelectorStrategy<ColumnValueSelector>)
columnSelector ->
- () ->
DimensionHandlerUtils.convertObjectToType(columnSelector.getObject(),
capabilities.toColumnType());
+ () -> DimensionHandlerUtils.convertObjectToType(
+ columnSelector.getObject(),
+ capabilities.toColumnType()
+ );
default:
throw new IAE("Cannot create query type helper from invalid type
[%s]", capabilities.asTypeString());
}
@@ -1402,7 +1406,11 @@ public class RowBasedGrouperHelper
jp.currentToken() != JsonToken.END_ARRAY,
"Unexpected end of array when deserializing timestamp from the
spilled files"
);
- objects[dimsReadSoFar] =
JacksonUtils.readObjectUsingDeserializationContext(jp, deserializationContext,
Long.class);
+ objects[dimsReadSoFar] =
JacksonUtils.readObjectUsingDeserializationContext(
+ jp,
+ deserializationContext,
+ Long.class
+ );
++dimsReadSoFar;
jp.nextToken();
@@ -1519,6 +1527,7 @@ public class RowBasedGrouperHelper
case STRING:
return new ArrayStringRowBasedKeySerdeHelper(
keyBufferPosition,
+ valueType.getNullableStrategy(),
stringComparator
);
case LONG:
@@ -1601,10 +1610,15 @@ public class RowBasedGrouperHelper
private abstract class
DictionaryBuildingSingleValuedRowBasedKeySerdeHelper implements
RowBasedKeySerdeHelper
{
private final int keyBufferPosition;
+ private final NullableTypeStrategy nullableTypeStrategy;
- public DictionaryBuildingSingleValuedRowBasedKeySerdeHelper(final int
keyBufferPosition)
+ public DictionaryBuildingSingleValuedRowBasedKeySerdeHelper(
+ final int keyBufferPosition,
+ final NullableTypeStrategy nullableTypeStrategy
+ )
{
this.keyBufferPosition = keyBufferPosition;
+ this.nullableTypeStrategy = nullableTypeStrategy;
}
@Override
@@ -1619,6 +1633,11 @@ public class RowBasedGrouperHelper
final Object obj = key.getKey()[idx];
int id = getReverseDictionary().getInt(obj);
if (id == DimensionDictionary.ABSENT_VALUE_ID) {
+ int size = nullableTypeStrategy.estimateSizeBytes(obj);
+ if (currentEstimatedSize + size > maxDictionarySize) {
+ return false;
+ }
+ currentEstimatedSize += size;
id = getDictionary().size();
getReverseDictionary().put(obj, id);
getDictionary().add(obj);
@@ -1660,7 +1679,7 @@ public class RowBasedGrouperHelper
ColumnType columnType
)
{
- super(keyBufferPosition);
+ super(keyBufferPosition, columnType.getNullableStrategy());
validateColumnType(columnType);
this.columnTypeName = columnType.asTypeString();
this.dictionary = genericDictionaries.computeIfAbsent(
@@ -1730,7 +1749,7 @@ public class RowBasedGrouperHelper
ColumnType arrayType
)
{
- super(keyBufferPosition);
+ super(keyBufferPosition, arrayType.getNullableStrategy());
final TypeSignature<ValueType> elementType =
arrayType.getElementType();
this.dictionary = getDictionaryForType(elementType);
this.reverseDictionary = getReverseDictionaryForType(elementType);
@@ -1812,10 +1831,11 @@ public class RowBasedGrouperHelper
ArrayStringRowBasedKeySerdeHelper(
int keyBufferPosition,
+ NullableTypeStrategy nullableTypeStrategy,
@Nullable StringComparator stringComparator
)
{
- super(keyBufferPosition);
+ super(keyBufferPosition, nullableTypeStrategy);
final Comparator<Object[]> comparator;
if (useNaturalStringArrayComparator(stringComparator)) {
comparator = ColumnType.STRING_ARRAY.getNullableStrategy();
@@ -1936,7 +1956,6 @@ public class RowBasedGrouperHelper
* this returns -1.
*
* @param s a string
- *
* @return id for this string, or -1
*/
private int addToDictionary(final String s)
diff --git
a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java
b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java
index 7099c7b7891..07585f86dd7 100644
---
a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java
+++
b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java
@@ -19,25 +19,31 @@
package org.apache.druid.segment.column;
+import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.base.Preconditions;
import com.google.common.collect.Ordering;
import com.google.common.primitives.Floats;
import com.google.common.primitives.Longs;
+import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.segment.nested.StructuredData;
+import org.apache.druid.segment.serde.ColumnSerializerUtils;
import javax.annotation.Nullable;
+import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Comparator;
+import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
public class TypeStrategies
{
public static final byte IS_NULL_BYTE = (byte) 1;
public static final byte IS_NOT_NULL_BYTE = (byte) 0;
-
+
public static final int VALUE_OFFSET = Byte.BYTES;
public static final int NULLABLE_LONG_SIZE = Byte.BYTES + Long.BYTES;
public static final int NULLABLE_DOUBLE_SIZE = Byte.BYTES + Double.BYTES;
@@ -47,6 +53,7 @@ public class TypeStrategies
public static final FloatTypeStrategy FLOAT = new FloatTypeStrategy();
public static final DoubleTypeStrategy DOUBLE = new DoubleTypeStrategy();
public static final StringTypeStrategy STRING = new StringTypeStrategy();
+ public static final NestedDataTypeStrategy NESTED = new
NestedDataTypeStrategy();
public static final ConcurrentHashMap<String, TypeStrategy<?>>
COMPLEX_STRATEGIES = new ConcurrentHashMap<>();
/**
@@ -63,9 +70,9 @@ public class TypeStrategies
/**
* hmm... this might look familiar... (see ComplexMetrics)
- *
+ * <p>
* Register a complex type name -> {@link TypeStrategy} mapping.
- *
+ * <p>
* If the specified type name is already used and the supplied {@link
TypeStrategy} is not of the
* same type as the existing value in the map for said key, an {@link ISE}
is thrown.
*
@@ -96,10 +103,10 @@ public class TypeStrategies
* Clear and set the 'null' byte of a nullable value to {@link
TypeStrategies#IS_NULL_BYTE} to a {@link ByteBuffer} at
* the supplied position. This method does not change the buffer position,
limit, or mark, because it does not expect
* to own the buffer given to it (i.e. buffer aggs)
- *
+ * <p>
* Nullable types are stored with a leading byte to indicate if the value is
null, followed by the value bytes
* (if not null)
- *
+ * <p>
* layout: | null (byte) | value |
*
* @return number of bytes written (always 1)
@@ -114,10 +121,10 @@ public class TypeStrategies
* Checks if a 'nullable' value's null byte is set to {@link
TypeStrategies#IS_NULL_BYTE}. This method will mask the
* value of the null byte to only check if the null bit is set, meaning that
the higher bits can be utilized for
* flags as necessary (e.g. using high bits to indicate if the value has
been set or not for aggregators).
- *
+ * <p>
* Note that writing nullable values with the methods of {@link Types} will
always clear and set the null byte to
* either {@link TypeStrategies#IS_NULL_BYTE} or {@link
TypeStrategies#IS_NOT_NULL_BYTE}, losing any flag bits.
- *
+ * <p>
* layout: | null (byte) | value |
*/
public static boolean isNullableNull(ByteBuffer buffer, int offset)
@@ -129,9 +136,9 @@ public class TypeStrategies
/**
* Write a non-null long value to a {@link ByteBuffer} at the supplied
offset. The first byte is always cleared and
* set to {@link TypeStrategies#IS_NOT_NULL_BYTE}, the long value is written
in the next 8 bytes.
- *
+ * <p>
* layout: | null (byte) | long |
- *
+ * <p>
* This method does not change the buffer position, limit, or mark, because
it does not expect to own the buffer
* given to it (i.e. buffer aggs)
*
@@ -147,9 +154,9 @@ public class TypeStrategies
/**
* Reads a non-null long value from a {@link ByteBuffer} at the supplied
offset. This method should only be called
* if and only if {@link #isNullableNull} for the same offset returns false.
- *
+ * <p>
* layout: | null (byte) | long |
- *
+ * <p>
* This method does not change the buffer position, limit, or mark, because
it does not expect to own the buffer
* given to it (i.e. buffer aggs)
*/
@@ -161,9 +168,9 @@ public class TypeStrategies
/**
* Write a non-null double value to a {@link ByteBuffer} at the supplied
offset. The first byte is always cleared and
* set to {@link TypeStrategies#IS_NOT_NULL_BYTE}, the double value is
written in the next 8 bytes.
- *
+ * <p>
* layout: | null (byte) | double |
- *
+ * <p>
* This method does not change the buffer position, limit, or mark, because
it does not expect to own the buffer
* given to it (i.e. buffer aggs)
*
@@ -179,9 +186,9 @@ public class TypeStrategies
/**
* Reads a non-null double value from a {@link ByteBuffer} at the supplied
offset. This method should only be called
* if and only if {@link #isNullableNull} for the same offset returns false.
- *
+ * <p>
* layout: | null (byte) | double |
- *
+ * <p>
* This method does not change the buffer position, limit, or mark, because
it does not expect to own the buffer
* given to it (i.e. buffer aggs)
*/
@@ -193,9 +200,9 @@ public class TypeStrategies
/**
* Write a non-null float value to a {@link ByteBuffer} at the supplied
offset. The first byte is always cleared and
* set to {@link TypeStrategies#IS_NOT_NULL_BYTE}, the float value is
written in the next 4 bytes.
- *
+ * <p>
* layout: | null (byte) | float |
- *
+ * <p>
* This method does not change the buffer position, limit, or mark, because
it does not expect to own the buffer
* given to it (i.e. buffer aggs)
*
@@ -211,9 +218,9 @@ public class TypeStrategies
/**
* Reads a non-null float value from a {@link ByteBuffer} at the supplied
offset. This method should only be called
* if and only if {@link #isNullableNull} for the same offset returns false.
- *
+ * <p>
* layout: | null (byte) | float |
- *
+ * <p>
* This method does not change the buffer position, limit, or mark, because
it does not expect to own the buffer
* given to it (i.e. buffer aggs)
*/
@@ -463,7 +470,7 @@ public class TypeStrategies
/**
* Read and write non-null UTF8 encoded String values. Encodes the length in
bytes as an integer prefix followed by
* the actual encoded value bytes.
- *
+ * <p>
* format: | length (int) | bytes |
*/
public static final class StringTypeStrategy implements TypeStrategy<String>
@@ -551,7 +558,7 @@ public class TypeStrategies
* Read and write a non-null ARRAY which is permitted to have null elements
(all elements are always read and written
* with a {@link NullableTypeStrategy} wrapper on the {@link TypeStrategy}
of the
* {@link TypeSignature#getElementType()}.
- *
+ * <p>
* Encodes the number of elements in the array as an integer prefix followed
by the actual encoded value bytes of
* each element serially.
*/
@@ -665,6 +672,7 @@ public class TypeStrategies
return result;
}
}
+
/**
* Implements {@link Arrays#equals} but the element equality uses the
element's type strategy
*/
@@ -697,4 +705,101 @@ public class TypeStrategies
return Object[].class;
}
}
+
+ public static final class NestedDataTypeStrategy implements
TypeStrategy<Object>
+ {
+ @Override
+ public int estimateSizeBytes(Object value)
+ {
+ return
Objects.requireNonNull(StructuredData.wrap(value)).getSizeEstimate();
+ }
+
+ @Override
+ public Object read(ByteBuffer buffer)
+ {
+ final int len = buffer.getInt();
+ return fromByteBuffer(buffer, len);
+ }
+
+ @Nullable
+ public StructuredData fromByteBuffer(ByteBuffer buffer, int numBytes)
+ {
+ if (numBytes == 0) {
+ return null;
+ }
+
+ final byte[] bytes = new byte[numBytes];
+ buffer.get(bytes, 0, numBytes);
+ try {
+ return ColumnSerializerUtils.SMILE_MAPPER.readValue(bytes, 0,
bytes.length, StructuredData.class);
+ }
+ catch (IOException e) {
+ throw DruidException.defensive(e, "Unable to deserialize value");
+ }
+ }
+
+ @Override
+ public boolean readRetainsBufferReference()
+ {
+ return false;
+ }
+
+ @Override
+ public int write(ByteBuffer buffer, Object value, int maxSizeBytes)
+ {
+ TypeStrategies.checkMaxSize(buffer.remaining(), maxSizeBytes,
ColumnType.NESTED_DATA);
+ final byte[] bytes = toBytes(value);
+ final int sizeBytes = Integer.BYTES + bytes.length;
+ if (sizeBytes > maxSizeBytes) {
+ return maxSizeBytes - sizeBytes;
+ }
+ buffer.putInt(bytes.length);
+ buffer.put(bytes);
+ return sizeBytes;
+ }
+
+ @Nullable
+ public byte[] toBytes(@Nullable Object val)
+ {
+ if (val == null) {
+ return new byte[0];
+ }
+ try {
+ return ColumnSerializerUtils.SMILE_MAPPER.writeValueAsBytes(val);
+ }
+ catch (JsonProcessingException e) {
+ throw DruidException.defensive(e, "Unable to serialize value [%s]",
val);
+ }
+ }
+
+ @Override
+ public int compare(Object o1, Object o2)
+ {
+ return StructuredData.COMPARATOR.compare(StructuredData.wrap(o1),
StructuredData.wrap(o2));
+ }
+
+ @Override
+ public boolean groupable()
+ {
+ return true;
+ }
+
+ @Override
+ public int hashCode(Object o)
+ {
+ return Objects.hashCode(StructuredData.wrap(o));
+ }
+
+ @Override
+ public boolean equals(Object a, Object b)
+ {
+ return Objects.equals(StructuredData.wrap(a), StructuredData.wrap(b));
+ }
+
+ @Override
+ public Class<?> getClazz()
+ {
+ return StructuredData.class;
+ }
+ }
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java
b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java
index e3abfff8014..a1496dd8dcc 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java
@@ -19,11 +19,7 @@
package org.apache.druid.segment.nested;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import it.unimi.dsi.fastutil.Hash;
import org.apache.druid.data.input.impl.DimensionSchema;
-import org.apache.druid.error.DruidException;
-import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.segment.DimensionHandler;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.NestedCommonFormatColumnHandler;
@@ -34,7 +30,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnFormat;
import org.apache.druid.segment.column.ColumnType;
-import org.apache.druid.segment.column.ObjectStrategyComplexTypeStrategy;
+import org.apache.druid.segment.column.TypeStrategies;
import org.apache.druid.segment.column.TypeStrategy;
import org.apache.druid.segment.data.ObjectStrategy;
import org.apache.druid.segment.serde.ColumnSerializerUtils;
@@ -42,7 +38,6 @@ import org.apache.druid.segment.serde.ComplexMetricExtractor;
import org.apache.druid.segment.serde.ComplexMetricSerde;
import javax.annotation.Nullable;
-import java.io.IOException;
import java.nio.ByteBuffer;
public class NestedDataComplexTypeSerde extends ComplexMetricSerde
@@ -97,131 +92,54 @@ public class NestedDataComplexTypeSerde extends
ComplexMetricSerde
}
@Override
- public ObjectStrategy getObjectStrategy()
+ public ObjectStrategy<Object> getObjectStrategy()
{
return new ObjectStrategy<>()
{
@Override
public int compare(Object o1, Object o2)
{
- return Comparators.<StructuredData>naturalNullsFirst()
- .compare(StructuredData.wrap(o1),
StructuredData.wrap(o2));
+ return TypeStrategies.NESTED.compare(o1, o2);
}
@Override
public Class<? extends Object> getClazz()
{
- return StructuredData.class;
+ return TypeStrategies.NESTED.getClazz();
}
@Nullable
@Override
- public Object fromByteBuffer(ByteBuffer buffer, int numBytes)
+ public StructuredData fromByteBuffer(ByteBuffer buffer, int numBytes)
{
- return deserializeBuffer(buffer, numBytes);
+ return TypeStrategies.NESTED.fromByteBuffer(buffer, numBytes);
}
@Nullable
@Override
public byte[] toBytes(@Nullable Object val)
{
- return serializeToBytes(val);
+ return TypeStrategies.NESTED.toBytes(val);
}
@Override
public boolean readRetainsBufferReference()
{
- return false;
+ return TypeStrategies.NESTED.readRetainsBufferReference();
}
};
}
- /**
- * Reads numBytes from the position to the limit of the byte buffer argument
and deserailizes it into
- * a {@link StructuredData} object using {@link
ColumnSerializerUtils#SMILE_MAPPER}.
- */
- public static StructuredData deserializeBuffer(ByteBuffer buf)
- {
- return deserializeBuffer(buf, buf.remaining());
- }
-
- /**
- * Reads numBytes from the byte buffer argument and deserailizes it into a
{@link StructuredData} object
- * using {@link ColumnSerializerUtils#SMILE_MAPPER}.
- */
- public static StructuredData deserializeBuffer(ByteBuffer buf, int numBytes)
- {
- if (numBytes == 0) {
- return null;
- }
-
- final byte[] bytes = new byte[numBytes];
- buf.get(bytes, 0, numBytes);
- return deserializeBytes(bytes);
- }
-
- /**
- * Converts the bytes array into a {@link StructuredData} object using
{@link ColumnSerializerUtils#SMILE_MAPPER}.
- */
- public static StructuredData deserializeBytes(byte[] bytes)
- {
- return deserializeBytes(bytes, 0, bytes.length);
- }
-
- /**
- * Reads the bytes between offset and len from the byte array and
deserializes a {@link StructuredData} object from
- * it, using {@link ColumnSerializerUtils#SMILE_MAPPER}.
- */
- public static StructuredData deserializeBytes(byte[] bytes, int offset, int
len)
- {
- if (len == 0) {
- return null;
- }
- try {
- return ColumnSerializerUtils.SMILE_MAPPER.readValue(bytes, offset, len,
StructuredData.class);
- }
- catch (IOException e) {
- throw DruidException.defensive(e, "Unable to deserialize value");
- }
- }
-
- /**
- * Returns a byte array containing the val as serialized by {@link
ColumnSerializerUtils#SMILE_MAPPER}.
- */
- public static byte[] serializeToBytes(@Nullable Object val)
+ @Override
+ public TypeStrategy<Object> getTypeStrategy()
{
- if (val == null) {
- return new byte[0];
- }
- try {
- return ColumnSerializerUtils.SMILE_MAPPER.writeValueAsBytes(val);
- }
- catch (JsonProcessingException e) {
- throw DruidException.defensive(e, "Unable to serialize value [%s]", val);
- }
+ return TypeStrategies.NESTED;
}
@Override
- public <T extends Comparable<T>> TypeStrategy<T> getTypeStrategy()
+ public byte[] toBytes(@Nullable Object val)
{
- return new ObjectStrategyComplexTypeStrategy<>(
- getObjectStrategy(),
- ColumnType.ofComplex(TYPE_NAME),
- new Hash.Strategy<>()
- {
- @Override
- public int hashCode(Object o)
- {
- return StructuredData.wrap(o).equalityHash();
- }
-
- @Override
- public boolean equals(Object a, Object b)
- {
- return StructuredData.wrap(a).compareTo(StructuredData.wrap(b)) ==
0;
- }
- }
- );
+ return getObjectStrategy().toBytes(val);
}
public static class NestedColumnFormatV4 implements ColumnFormat
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
b/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
index 37fad571c33..ac593a07365 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.fasterxml.jackson.databind.SerializationFeature;
+import com.google.common.base.Preconditions;
import com.google.common.primitives.Longs;
import net.jpountz.xxhash.XXHash64;
import net.jpountz.xxhash.XXHashFactory;
@@ -45,17 +46,16 @@ public class StructuredData implements
Comparable<StructuredData>
public static final Comparator<StructuredData> COMPARATOR =
Comparators.naturalNullsFirst();
- /** SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS is required so that hash
computations for JSON objects that
- * have different key orders but are otherwise equivalent will be
consistent. See
- * {@link StructuredDataTest#testCompareToWithDifferentJSONOrder()} for an
example
+ /**
+ * SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS is required so that hash
computations for JSON objects that
+ * have different key orders but are otherwise equivalent will be consistent.
*/
private static final ObjectWriter WRITER =
ColumnSerializerUtils.SMILE_MAPPER.writer(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
- private static long computeHash(StructuredData data)
+ private static byte[] serialized(StructuredData data)
{
try {
- final byte[] bytes = WRITER.writeValueAsBytes(data.value);
- return HASH_FUNCTION.hash(bytes, 0, bytes.length, SEED);
+ return WRITER.writeValueAsBytes(data.value);
}
catch (JsonProcessingException e) {
throw new RuntimeException(e);
@@ -86,13 +86,17 @@ public class StructuredData implements
Comparable<StructuredData>
return new StructuredData(value);
}
-
private final Object value;
private volatile boolean hashInitialized = false;
private volatile long hashValue;
+ private volatile int sizeEstimate = -1;
private final LongSupplier hash = () -> {
if (!hashInitialized) {
- hashValue = computeHash(this);
+ final byte[] bytes = serialized(this);
+ // compute the size estimate, note it's not an accurate representation
of the heap size
+ sizeEstimate = bytes.length + Integer.BYTES; // add 4 bytes for the
length prefix
+ // compute the hash, we might use it for comparison later
+ hashValue = HASH_FUNCTION.hash(bytes, 0, bytes.length, SEED);
hashInitialized = true;
}
return hashValue;
@@ -133,12 +137,23 @@ public class StructuredData implements
Comparable<StructuredData>
return (Number) value;
}
+ @SuppressWarnings("ReturnValueIgnored")
+ public int getSizeEstimate()
+ {
+ if (sizeEstimate < 0) {
+ hash.getAsLong(); // trigger hash computation which also sets
sizeEstimate
+ }
+ Preconditions.checkState(sizeEstimate >= 0, "sizeEstimate not
initialized");
+ return sizeEstimate;
+ }
+
@Override
public int compareTo(StructuredData o)
{
if (this.equals(o)) {
return 0;
}
+
if (isNull()) {
return -1;
}
@@ -191,12 +206,6 @@ public class StructuredData implements
Comparable<StructuredData>
@Override
public int hashCode()
- {
- return Objects.hash(value);
- }
-
- // hashCode that relies on the object equality. Translates the hashcode to
an integer as well
- public int equalityHash()
{
return Longs.hashCode(hash.getAsLong());
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java
b/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java
index 670da7a5cf6..857a6d6a854 100644
---
a/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java
@@ -21,10 +21,12 @@ package org.apache.druid.segment.column;
import com.google.common.collect.Ordering;
import com.google.common.primitives.Longs;
+import org.apache.druid.guice.BuiltInTypesModule;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.segment.nested.StructuredData;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
@@ -34,6 +36,8 @@ import org.junit.rules.ExpectedException;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
import static org.junit.Assert.assertNull;
@@ -49,6 +53,7 @@ public class TypeStrategiesTest
@BeforeClass
public static void setup()
{
+ BuiltInTypesModule.registerHandlersAndSerde();
TypeStrategies.registerComplex(NULLABLE_TEST_PAIR_TYPE.getComplexTypeName(),
new NullableLongPairTypeStrategy());
}
@@ -331,6 +336,25 @@ public class TypeStrategiesTest
);
}
+ @Test
+ public void testJsonComparator()
+ {
+ TypeStrategy<StructuredData> strategy =
ColumnType.NESTED_DATA.getStrategy();
+ Assert.assertEquals(-1, strategy.compare(StructuredData.wrap(null),
StructuredData.wrap(Map.of("key", "val"))));
+
+ NullableTypeStrategy<StructuredData> nullableTypeStrategy =
ColumnType.NESTED_DATA.getNullableStrategy();
+ Assert.assertEquals(-1, nullableTypeStrategy.compare(null,
StructuredData.wrap(Map.of("key", "val"))));
+ Assert.assertEquals(0, nullableTypeStrategy.compare(
+ StructuredData.wrap(Map.of("key1", Map.of("sub-key1", "sub-val1"),
"key2", "val2")),
+ StructuredData.wrap(Map.of("key1", Map.of("sub-key1", "sub-val1"),
"key2", "val2"))
+ ));
+ // hash value is computed based on serialized bytes
+ Assert.assertEquals(-1, nullableTypeStrategy.compare(
+ StructuredData.wrap(Map.of("key1", Map.of("sub-key1",
"sub-val1-different"), "key2", "val2")),
+ StructuredData.wrap(Map.of("key1", Map.of("sub-key1", "sub-val1"),
"key2", "val2"))
+ ));
+ }
+
@Test
public void testNulls()
{
@@ -455,6 +479,17 @@ public class TypeStrategiesTest
assertStrategy(strategy, new NullableLongPair(1234L, null));
}
+ @Test
+ public void testComplexJsonTypeStrategy()
+ {
+ final TypeStrategy strategy =
TypeStrategies.getComplex(ColumnType.NESTED_DATA.getComplexTypeName());
+ Map<String, Object> nested = new HashMap<>();
+ nested.put("key1", "val");
+ assertStrategy(strategy, StructuredData.wrap(nested));
+ nested.put("key2", null);
+ assertStrategy(strategy, StructuredData.wrap(nested));
+ }
+
@Test
public void testArrayTypeStrategy()
{
diff --git
a/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
index 1642107a6b1..b53746cbd09 100644
---
a/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
@@ -20,7 +20,6 @@
package org.apache.druid.segment.nested;
import com.google.common.collect.ImmutableMap;
-import nl.jqno.equalsverifier.EqualsVerifier;
import org.junit.Assert;
import org.junit.Test;
@@ -106,10 +105,4 @@ public class StructuredDataTest
Assert.assertEquals(1, sd1.compareTo(sd2));
Assert.assertEquals(0, sd0.compareTo(sd1));
}
-
- @Test
- public void testEqualsAndHashcode()
- {
-
EqualsVerifier.forClass(StructuredData.class).withIgnoredFields("hashInitialized",
"hashValue", "hash").usingGetClass().verify();
- }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]