(druid) branch master updated: Add size check in DictionaryBuildingSingleValuedRowBasedKeySerdeHelper in putToKeyBuffer (#18541)

yqm Sat, 18 Oct 2025 05:51:08 -0700

This is an automated email from the ASF dual-hosted git repository.

yqm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git



The following commit(s) were added to refs/heads/master by this push:
     new 6afb7dc0fab Add size check in 
DictionaryBuildingSingleValuedRowBasedKeySerdeHelper in putToKeyBuffer (#18541)
6afb7dc0fab is described below

commit 6afb7dc0fabc3fa1fdaaa7fc77fe407cb344b852
Author: Cece Mei <[email protected]>
AuthorDate: Tue Oct 7 18:32:52 2025 -0700

    Add size check in DictionaryBuildingSingleValuedRowBasedKeySerdeHelper in 
putToKeyBuffer (#18541)
    
    * oom
    
    * size
    
    * map
    
    * structured-data
    
    * format
    
    * wrap
    
    * object
    
    * ignore-result
    
    * ignore-result
    
    * format
    
    * format2
    
    * revert-hash
    
    * revert-hash
    
    * revert-equal
    
    * trigger ci / empty commit
---
 .../epinephelinae/RowBasedGrouperHelper.java       |  39 ++++--
 .../druid/segment/column/TypeStrategies.java       | 147 ++++++++++++++++++---
 .../segment/nested/NestedDataComplexTypeSerde.java | 108 ++-------------
 .../druid/segment/nested/StructuredData.java       |  37 ++++--
 .../druid/segment/column/TypeStrategiesTest.java   |  35 +++++
 .../druid/segment/nested/StructuredDataTest.java   |   7 -
 6 files changed, 226 insertions(+), 147 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
index 665eadf7de6..6ea439d7a90 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
@@ -78,6 +78,7 @@ import org.apache.druid.segment.RowBasedColumnSelectorFactory;
 import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
 import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.NullableTypeStrategy;
 import org.apache.druid.segment.column.RowSignature;
 import org.apache.druid.segment.column.TypeSignature;
 import org.apache.druid.segment.column.TypeStrategies;
@@ -158,13 +159,13 @@ public class RowBasedGrouperHelper
   /**
    * Create a {@link Grouper} that groups according to the dimensions and 
aggregators in "query", along with
    * an {@link Accumulator} that accepts ResultRows and forwards them to the 
grouper.
-   *
+   * <p>
    * The pair will operate in one of two modes:
-   *
+   * <p>
    * 1) Combining mode (used if "subquery" is null). In this mode, filters 
from the "query" are ignored, and
    * its aggregators are converted into combining form. The input ResultRows 
are assumed to be partially-grouped
    * results originating from the provided "query".
-   *
+   * <p>
    * 2) Subquery mode (used if "subquery" is nonnull). In this mode, filters 
from the "query" (both intervals
    * and dim filters) are respected, and its aggregators are used in standard 
(not combining) form. The input
    * ResultRows are assumed to be results originating from the provided 
"subquery".
@@ -758,7 +759,10 @@ public class RowBasedGrouperHelper
         case COMPLEX:
           return (InputRawSupplierColumnSelectorStrategy<ColumnValueSelector>)
               columnSelector ->
-                  () -> 
DimensionHandlerUtils.convertObjectToType(columnSelector.getObject(), 
capabilities.toColumnType());
+                  () -> DimensionHandlerUtils.convertObjectToType(
+                      columnSelector.getObject(),
+                      capabilities.toColumnType()
+                  );
         default:
           throw new IAE("Cannot create query type helper from invalid type 
[%s]", capabilities.asTypeString());
       }
@@ -1402,7 +1406,11 @@ public class RowBasedGrouperHelper
                 jp.currentToken() != JsonToken.END_ARRAY,
                 "Unexpected end of array when deserializing timestamp from the 
spilled files"
             );
-            objects[dimsReadSoFar] = 
JacksonUtils.readObjectUsingDeserializationContext(jp, deserializationContext, 
Long.class);
+            objects[dimsReadSoFar] = 
JacksonUtils.readObjectUsingDeserializationContext(
+                jp,
+                deserializationContext,
+                Long.class
+            );
 
             ++dimsReadSoFar;
             jp.nextToken();
@@ -1519,6 +1527,7 @@ public class RowBasedGrouperHelper
             case STRING:
               return new ArrayStringRowBasedKeySerdeHelper(
                   keyBufferPosition,
+                  valueType.getNullableStrategy(),
                   stringComparator
               );
             case LONG:
@@ -1601,10 +1610,15 @@ public class RowBasedGrouperHelper
     private abstract class 
DictionaryBuildingSingleValuedRowBasedKeySerdeHelper implements 
RowBasedKeySerdeHelper
     {
       private final int keyBufferPosition;
+      private final NullableTypeStrategy nullableTypeStrategy;
 
-      public DictionaryBuildingSingleValuedRowBasedKeySerdeHelper(final int 
keyBufferPosition)
+      public DictionaryBuildingSingleValuedRowBasedKeySerdeHelper(
+          final int keyBufferPosition,
+          final NullableTypeStrategy nullableTypeStrategy
+      )
       {
         this.keyBufferPosition = keyBufferPosition;
+        this.nullableTypeStrategy = nullableTypeStrategy;
       }
 
       @Override
@@ -1619,6 +1633,11 @@ public class RowBasedGrouperHelper
         final Object obj = key.getKey()[idx];
         int id = getReverseDictionary().getInt(obj);
         if (id == DimensionDictionary.ABSENT_VALUE_ID) {
+          int size = nullableTypeStrategy.estimateSizeBytes(obj);
+          if (currentEstimatedSize + size > maxDictionarySize) {
+            return false;
+          }
+          currentEstimatedSize += size;
           id = getDictionary().size();
           getReverseDictionary().put(obj, id);
           getDictionary().add(obj);
@@ -1660,7 +1679,7 @@ public class RowBasedGrouperHelper
           ColumnType columnType
       )
       {
-        super(keyBufferPosition);
+        super(keyBufferPosition, columnType.getNullableStrategy());
         validateColumnType(columnType);
         this.columnTypeName = columnType.asTypeString();
         this.dictionary = genericDictionaries.computeIfAbsent(
@@ -1730,7 +1749,7 @@ public class RowBasedGrouperHelper
           ColumnType arrayType
       )
       {
-        super(keyBufferPosition);
+        super(keyBufferPosition, arrayType.getNullableStrategy());
         final TypeSignature<ValueType> elementType = 
arrayType.getElementType();
         this.dictionary = getDictionaryForType(elementType);
         this.reverseDictionary = getReverseDictionaryForType(elementType);
@@ -1812,10 +1831,11 @@ public class RowBasedGrouperHelper
 
       ArrayStringRowBasedKeySerdeHelper(
           int keyBufferPosition,
+          NullableTypeStrategy nullableTypeStrategy,
           @Nullable StringComparator stringComparator
       )
       {
-        super(keyBufferPosition);
+        super(keyBufferPosition, nullableTypeStrategy);
         final Comparator<Object[]> comparator;
         if (useNaturalStringArrayComparator(stringComparator)) {
           comparator = ColumnType.STRING_ARRAY.getNullableStrategy();
@@ -1936,7 +1956,6 @@ public class RowBasedGrouperHelper
        * this returns -1.
        *
        * @param s a string
-       *
        * @return id for this string, or -1
        */
       private int addToDictionary(final String s)
diff --git 
a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java 
b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java
index 7099c7b7891..07585f86dd7 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java
@@ -19,25 +19,31 @@
 
 package org.apache.druid.segment.column;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Ordering;
 import com.google.common.primitives.Floats;
 import com.google.common.primitives.Longs;
+import org.apache.druid.error.DruidException;
 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.segment.nested.StructuredData;
+import org.apache.druid.segment.serde.ColumnSerializerUtils;
 
 import javax.annotation.Nullable;
+import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Comparator;
+import java.util.Objects;
 import java.util.concurrent.ConcurrentHashMap;
 
 public class TypeStrategies
 {
   public static final byte IS_NULL_BYTE = (byte) 1;
   public static final byte IS_NOT_NULL_BYTE = (byte) 0;
-  
+
   public static final int VALUE_OFFSET = Byte.BYTES;
   public static final int NULLABLE_LONG_SIZE = Byte.BYTES + Long.BYTES;
   public static final int NULLABLE_DOUBLE_SIZE = Byte.BYTES + Double.BYTES;
@@ -47,6 +53,7 @@ public class TypeStrategies
   public static final FloatTypeStrategy FLOAT = new FloatTypeStrategy();
   public static final DoubleTypeStrategy DOUBLE = new DoubleTypeStrategy();
   public static final StringTypeStrategy STRING = new StringTypeStrategy();
+  public static final NestedDataTypeStrategy NESTED = new 
NestedDataTypeStrategy();
   public static final ConcurrentHashMap<String, TypeStrategy<?>> 
COMPLEX_STRATEGIES = new ConcurrentHashMap<>();
 
   /**
@@ -63,9 +70,9 @@ public class TypeStrategies
 
   /**
    * hmm... this might look familiar... (see ComplexMetrics)
-   *
+   * <p>
    * Register a complex type name -> {@link TypeStrategy} mapping.
-   *
+   * <p>
    * If the specified type name is already used and the supplied {@link 
TypeStrategy} is not of the
    * same type as the existing value in the map for said key, an {@link ISE} 
is thrown.
    *
@@ -96,10 +103,10 @@ public class TypeStrategies
    * Clear and set the 'null' byte of a nullable value to {@link 
TypeStrategies#IS_NULL_BYTE} to a {@link ByteBuffer} at
    * the supplied position. This method does not change the buffer position, 
limit, or mark, because it does not expect
    * to own the buffer given to it (i.e. buffer aggs)
-   *
+   * <p>
    * Nullable types are stored with a leading byte to indicate if the value is 
null, followed by the value bytes
    * (if not null)
-   *
+   * <p>
    * layout: | null (byte) | value |
    *
    * @return number of bytes written (always 1)
@@ -114,10 +121,10 @@ public class TypeStrategies
    * Checks if a 'nullable' value's null byte is set to {@link 
TypeStrategies#IS_NULL_BYTE}. This method will mask the
    * value of the null byte to only check if the null bit is set, meaning that 
the higher bits can be utilized for
    * flags as necessary (e.g. using high bits to indicate if the value has 
been set or not for aggregators).
-   *
+   * <p>
    * Note that writing nullable values with the methods of {@link Types} will 
always clear and set the null byte to
    * either {@link TypeStrategies#IS_NULL_BYTE} or {@link 
TypeStrategies#IS_NOT_NULL_BYTE}, losing any flag bits.
-   *
+   * <p>
    * layout: | null (byte) | value |
    */
   public static boolean isNullableNull(ByteBuffer buffer, int offset)
@@ -129,9 +136,9 @@ public class TypeStrategies
   /**
    * Write a non-null long value to a {@link ByteBuffer} at the supplied 
offset. The first byte is always cleared and
    * set to {@link TypeStrategies#IS_NOT_NULL_BYTE}, the long value is written 
in the next 8 bytes.
-   *
+   * <p>
    * layout: | null (byte) | long |
-   *
+   * <p>
    * This method does not change the buffer position, limit, or mark, because 
it does not expect to own the buffer
    * given to it (i.e. buffer aggs)
    *
@@ -147,9 +154,9 @@ public class TypeStrategies
   /**
    * Reads a non-null long value from a {@link ByteBuffer} at the supplied 
offset. This method should only be called
    * if and only if {@link #isNullableNull} for the same offset returns false.
-   *
+   * <p>
    * layout: | null (byte) | long |
-   *
+   * <p>
    * This method does not change the buffer position, limit, or mark, because 
it does not expect  to own the buffer
    * given to it (i.e. buffer aggs)
    */
@@ -161,9 +168,9 @@ public class TypeStrategies
   /**
    * Write a non-null double value to a {@link ByteBuffer} at the supplied 
offset. The first byte is always cleared and
    * set to {@link TypeStrategies#IS_NOT_NULL_BYTE}, the double value is 
written in the next 8 bytes.
-   *
+   * <p>
    * layout: | null (byte) | double |
-   *
+   * <p>
    * This method does not change the buffer position, limit, or mark, because 
it does not expect to own the buffer
    * given to it (i.e. buffer aggs)
    *
@@ -179,9 +186,9 @@ public class TypeStrategies
   /**
    * Reads a non-null double value from a {@link ByteBuffer} at the supplied 
offset. This method should only be called
    * if and only if {@link #isNullableNull} for the same offset returns false.
-   *
+   * <p>
    * layout: | null (byte) | double |
-   *
+   * <p>
    * This method does not change the buffer position, limit, or mark, because 
it does not expect to own the buffer
    * given to it (i.e. buffer aggs)
    */
@@ -193,9 +200,9 @@ public class TypeStrategies
   /**
    * Write a non-null float value to a {@link ByteBuffer} at the supplied 
offset. The first byte is always cleared and
    * set to {@link TypeStrategies#IS_NOT_NULL_BYTE}, the float value is 
written in the next 4 bytes.
-   *
+   * <p>
    * layout: | null (byte) | float |
-   *
+   * <p>
    * This method does not change the buffer position, limit, or mark, because 
it does not expect to own the buffer
    * given to it (i.e. buffer aggs)
    *
@@ -211,9 +218,9 @@ public class TypeStrategies
   /**
    * Reads a non-null float value from a {@link ByteBuffer} at the supplied 
offset. This method should only be called
    * if and only if {@link #isNullableNull} for the same offset returns false.
-   *
+   * <p>
    * layout: | null (byte) | float |
-   *
+   * <p>
    * This method does not change the buffer position, limit, or mark, because 
it does not expect to own the buffer
    * given to it (i.e. buffer aggs)
    */
@@ -463,7 +470,7 @@ public class TypeStrategies
   /**
    * Read and write non-null UTF8 encoded String values. Encodes the length in 
bytes as an integer prefix followed by
    * the actual encoded value bytes.
-   *
+   * <p>
    * format: | length (int) | bytes |
    */
   public static final class StringTypeStrategy implements TypeStrategy<String>
@@ -551,7 +558,7 @@ public class TypeStrategies
    * Read and write a non-null ARRAY which is permitted to have null elements 
(all elements are always read and written
    * with a {@link NullableTypeStrategy} wrapper on the {@link TypeStrategy} 
of the
    * {@link TypeSignature#getElementType()}.
-   *
+   * <p>
    * Encodes the number of elements in the array as an integer prefix followed 
by the actual encoded value bytes of
    * each element serially.
    */
@@ -665,6 +672,7 @@ public class TypeStrategies
         return result;
       }
     }
+
     /**
      * Implements {@link Arrays#equals} but the element equality uses the 
element's type strategy
      */
@@ -697,4 +705,101 @@ public class TypeStrategies
       return Object[].class;
     }
   }
+
+  public static final class NestedDataTypeStrategy implements 
TypeStrategy<Object>
+  {
+    @Override
+    public int estimateSizeBytes(Object value)
+    {
+      return 
Objects.requireNonNull(StructuredData.wrap(value)).getSizeEstimate();
+    }
+
+    @Override
+    public Object read(ByteBuffer buffer)
+    {
+      final int len = buffer.getInt();
+      return fromByteBuffer(buffer, len);
+    }
+
+    @Nullable
+    public StructuredData fromByteBuffer(ByteBuffer buffer, int numBytes)
+    {
+      if (numBytes == 0) {
+        return null;
+      }
+
+      final byte[] bytes = new byte[numBytes];
+      buffer.get(bytes, 0, numBytes);
+      try {
+        return ColumnSerializerUtils.SMILE_MAPPER.readValue(bytes, 0, 
bytes.length, StructuredData.class);
+      }
+      catch (IOException e) {
+        throw DruidException.defensive(e, "Unable to deserialize value");
+      }
+    }
+
+    @Override
+    public boolean readRetainsBufferReference()
+    {
+      return false;
+    }
+
+    @Override
+    public int write(ByteBuffer buffer, Object value, int maxSizeBytes)
+    {
+      TypeStrategies.checkMaxSize(buffer.remaining(), maxSizeBytes, 
ColumnType.NESTED_DATA);
+      final byte[] bytes = toBytes(value);
+      final int sizeBytes = Integer.BYTES + bytes.length;
+      if (sizeBytes > maxSizeBytes) {
+        return maxSizeBytes - sizeBytes;
+      }
+      buffer.putInt(bytes.length);
+      buffer.put(bytes);
+      return sizeBytes;
+    }
+
+    @Nullable
+    public byte[] toBytes(@Nullable Object val)
+    {
+      if (val == null) {
+        return new byte[0];
+      }
+      try {
+        return ColumnSerializerUtils.SMILE_MAPPER.writeValueAsBytes(val);
+      }
+      catch (JsonProcessingException e) {
+        throw DruidException.defensive(e, "Unable to serialize value [%s]", 
val);
+      }
+    }
+
+    @Override
+    public int compare(Object o1, Object o2)
+    {
+      return StructuredData.COMPARATOR.compare(StructuredData.wrap(o1), 
StructuredData.wrap(o2));
+    }
+
+    @Override
+    public boolean groupable()
+    {
+      return true;
+    }
+
+    @Override
+    public int hashCode(Object o)
+    {
+      return Objects.hashCode(StructuredData.wrap(o));
+    }
+
+    @Override
+    public boolean equals(Object a, Object b)
+    {
+      return Objects.equals(StructuredData.wrap(a), StructuredData.wrap(b));
+    }
+
+    @Override
+    public Class<?> getClazz()
+    {
+      return StructuredData.class;
+    }
+  }
 }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java
 
b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java
index e3abfff8014..a1496dd8dcc 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java
@@ -19,11 +19,7 @@
 
 package org.apache.druid.segment.nested;
 
-import com.fasterxml.jackson.core.JsonProcessingException;
-import it.unimi.dsi.fastutil.Hash;
 import org.apache.druid.data.input.impl.DimensionSchema;
-import org.apache.druid.error.DruidException;
-import org.apache.druid.java.util.common.guava.Comparators;
 import org.apache.druid.segment.DimensionHandler;
 import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.NestedCommonFormatColumnHandler;
@@ -34,7 +30,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
 import org.apache.druid.segment.column.ColumnConfig;
 import org.apache.druid.segment.column.ColumnFormat;
 import org.apache.druid.segment.column.ColumnType;
-import org.apache.druid.segment.column.ObjectStrategyComplexTypeStrategy;
+import org.apache.druid.segment.column.TypeStrategies;
 import org.apache.druid.segment.column.TypeStrategy;
 import org.apache.druid.segment.data.ObjectStrategy;
 import org.apache.druid.segment.serde.ColumnSerializerUtils;
@@ -42,7 +38,6 @@ import org.apache.druid.segment.serde.ComplexMetricExtractor;
 import org.apache.druid.segment.serde.ComplexMetricSerde;
 
 import javax.annotation.Nullable;
-import java.io.IOException;
 import java.nio.ByteBuffer;
 
 public class NestedDataComplexTypeSerde extends ComplexMetricSerde
@@ -97,131 +92,54 @@ public class NestedDataComplexTypeSerde extends 
ComplexMetricSerde
   }
 
   @Override
-  public ObjectStrategy getObjectStrategy()
+  public ObjectStrategy<Object> getObjectStrategy()
   {
     return new ObjectStrategy<>()
     {
       @Override
       public int compare(Object o1, Object o2)
       {
-        return Comparators.<StructuredData>naturalNullsFirst()
-                          .compare(StructuredData.wrap(o1), 
StructuredData.wrap(o2));
+        return TypeStrategies.NESTED.compare(o1, o2);
       }
 
       @Override
       public Class<? extends Object> getClazz()
       {
-        return StructuredData.class;
+        return TypeStrategies.NESTED.getClazz();
       }
 
       @Nullable
       @Override
-      public Object fromByteBuffer(ByteBuffer buffer, int numBytes)
+      public StructuredData fromByteBuffer(ByteBuffer buffer, int numBytes)
       {
-        return deserializeBuffer(buffer, numBytes);
+        return TypeStrategies.NESTED.fromByteBuffer(buffer, numBytes);
       }
 
       @Nullable
       @Override
       public byte[] toBytes(@Nullable Object val)
       {
-        return serializeToBytes(val);
+        return TypeStrategies.NESTED.toBytes(val);
       }
 
       @Override
       public boolean readRetainsBufferReference()
       {
-        return false;
+        return TypeStrategies.NESTED.readRetainsBufferReference();
       }
     };
   }
 
-  /**
-   * Reads numBytes from the position to the limit of the byte buffer argument 
and deserailizes it into
-   * a {@link StructuredData} object using {@link 
ColumnSerializerUtils#SMILE_MAPPER}.
-   */
-  public static StructuredData deserializeBuffer(ByteBuffer buf)
-  {
-    return deserializeBuffer(buf, buf.remaining());
-  }
-
-  /**
-   * Reads numBytes from the byte buffer argument and deserailizes it into a 
{@link StructuredData} object
-   * using {@link ColumnSerializerUtils#SMILE_MAPPER}.
-   */
-  public static StructuredData deserializeBuffer(ByteBuffer buf, int numBytes)
-  {
-    if (numBytes == 0) {
-      return null;
-    }
-
-    final byte[] bytes = new byte[numBytes];
-    buf.get(bytes, 0, numBytes);
-    return deserializeBytes(bytes);
-  }
-
-  /**
-   * Converts the bytes array into a {@link StructuredData} object using 
{@link ColumnSerializerUtils#SMILE_MAPPER}.
-   */
-  public static StructuredData deserializeBytes(byte[] bytes)
-  {
-    return deserializeBytes(bytes, 0, bytes.length);
-  }
-
-  /**
-   * Reads the bytes between offset and len from the byte array and 
deserializes a {@link StructuredData} object from
-   * it, using {@link ColumnSerializerUtils#SMILE_MAPPER}.
-   */
-  public static StructuredData deserializeBytes(byte[] bytes, int offset, int 
len)
-  {
-    if (len == 0) {
-      return null;
-    }
-    try {
-      return ColumnSerializerUtils.SMILE_MAPPER.readValue(bytes, offset, len, 
StructuredData.class);
-    }
-    catch (IOException e) {
-      throw DruidException.defensive(e, "Unable to deserialize value");
-    }
-  }
-
-  /**
-   * Returns a byte array containing the val as serialized by {@link 
ColumnSerializerUtils#SMILE_MAPPER}.
-   */
-  public static byte[] serializeToBytes(@Nullable Object val)
+  @Override
+  public TypeStrategy<Object> getTypeStrategy()
   {
-    if (val == null) {
-      return new byte[0];
-    }
-    try {
-      return ColumnSerializerUtils.SMILE_MAPPER.writeValueAsBytes(val);
-    }
-    catch (JsonProcessingException e) {
-      throw DruidException.defensive(e, "Unable to serialize value [%s]", val);
-    }
+    return TypeStrategies.NESTED;
   }
 
   @Override
-  public <T extends Comparable<T>> TypeStrategy<T> getTypeStrategy()
+  public byte[] toBytes(@Nullable Object val)
   {
-    return new ObjectStrategyComplexTypeStrategy<>(
-        getObjectStrategy(),
-        ColumnType.ofComplex(TYPE_NAME),
-        new Hash.Strategy<>()
-        {
-          @Override
-          public int hashCode(Object o)
-          {
-            return StructuredData.wrap(o).equalityHash();
-          }
-
-          @Override
-          public boolean equals(Object a, Object b)
-          {
-            return StructuredData.wrap(a).compareTo(StructuredData.wrap(b)) == 
0;
-          }
-        }
-    );
+    return getObjectStrategy().toBytes(val);
   }
 
   public static class NestedColumnFormatV4 implements ColumnFormat
diff --git 
a/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java 
b/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
index 37fad571c33..ac593a07365 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectWriter;
 import com.fasterxml.jackson.databind.SerializationFeature;
+import com.google.common.base.Preconditions;
 import com.google.common.primitives.Longs;
 import net.jpountz.xxhash.XXHash64;
 import net.jpountz.xxhash.XXHashFactory;
@@ -45,17 +46,16 @@ public class StructuredData implements 
Comparable<StructuredData>
 
   public static final Comparator<StructuredData> COMPARATOR = 
Comparators.naturalNullsFirst();
 
-  /** SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS is required so that hash 
computations for JSON objects that
-   *  have different key orders but are otherwise equivalent will be 
consistent. See
-   *  {@link StructuredDataTest#testCompareToWithDifferentJSONOrder()} for an 
example
+  /**
+   * SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS is required so that hash 
computations for JSON objects that
+   * have different key orders but are otherwise equivalent will be consistent.
    */
   private static final ObjectWriter WRITER = 
ColumnSerializerUtils.SMILE_MAPPER.writer(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
 
-  private static long computeHash(StructuredData data)
+  private static byte[] serialized(StructuredData data)
   {
     try {
-      final byte[] bytes = WRITER.writeValueAsBytes(data.value);
-      return HASH_FUNCTION.hash(bytes, 0, bytes.length, SEED);
+      return WRITER.writeValueAsBytes(data.value);
     }
     catch (JsonProcessingException e) {
       throw new RuntimeException(e);
@@ -86,13 +86,17 @@ public class StructuredData implements 
Comparable<StructuredData>
     return new StructuredData(value);
   }
 
-
   private final Object value;
   private volatile boolean hashInitialized = false;
   private volatile long hashValue;
+  private volatile int sizeEstimate = -1;
   private final LongSupplier hash = () -> {
     if (!hashInitialized) {
-      hashValue = computeHash(this);
+      final byte[] bytes = serialized(this);
+      // compute the size estimate, note it's not an accurate representation 
of the heap size
+      sizeEstimate = bytes.length + Integer.BYTES; // add 4 bytes for the 
length prefix
+      // compute the hash, we might use it for comparison later
+      hashValue = HASH_FUNCTION.hash(bytes, 0, bytes.length, SEED);
       hashInitialized = true;
     }
     return hashValue;
@@ -133,12 +137,23 @@ public class StructuredData implements 
Comparable<StructuredData>
     return (Number) value;
   }
 
+  @SuppressWarnings("ReturnValueIgnored")
+  public int getSizeEstimate()
+  {
+    if (sizeEstimate < 0) {
+      hash.getAsLong(); // trigger hash computation which also sets 
sizeEstimate
+    }
+    Preconditions.checkState(sizeEstimate >= 0, "sizeEstimate not 
initialized");
+    return sizeEstimate;
+  }
+
   @Override
   public int compareTo(StructuredData o)
   {
     if (this.equals(o)) {
       return 0;
     }
+
     if (isNull()) {
       return -1;
     }
@@ -191,12 +206,6 @@ public class StructuredData implements 
Comparable<StructuredData>
 
   @Override
   public int hashCode()
-  {
-    return Objects.hash(value);
-  }
-
-  // hashCode that relies on the object equality. Translates the hashcode to 
an integer as well
-  public int equalityHash()
   {
     return Longs.hashCode(hash.getAsLong());
   }
diff --git 
a/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java
 
b/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java
index 670da7a5cf6..857a6d6a854 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/column/TypeStrategiesTest.java
@@ -21,10 +21,12 @@ package org.apache.druid.segment.column;
 
 import com.google.common.collect.Ordering;
 import com.google.common.primitives.Longs;
+import org.apache.druid.guice.BuiltInTypesModule;
 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.Pair;
 import org.apache.druid.java.util.common.guava.Comparators;
 import org.apache.druid.math.expr.ExpressionType;
+import org.apache.druid.segment.nested.StructuredData;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Rule;
@@ -34,6 +36,8 @@ import org.junit.rules.ExpectedException;
 import javax.annotation.Nullable;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
 
 import static org.junit.Assert.assertNull;
 
@@ -49,6 +53,7 @@ public class TypeStrategiesTest
   @BeforeClass
   public static void setup()
   {
+    BuiltInTypesModule.registerHandlersAndSerde();
     
TypeStrategies.registerComplex(NULLABLE_TEST_PAIR_TYPE.getComplexTypeName(), 
new NullableLongPairTypeStrategy());
   }
 
@@ -331,6 +336,25 @@ public class TypeStrategiesTest
     );
   }
 
+  @Test
+  public void testJsonComparator()
+  {
+    TypeStrategy<StructuredData> strategy = 
ColumnType.NESTED_DATA.getStrategy();
+    Assert.assertEquals(-1, strategy.compare(StructuredData.wrap(null), 
StructuredData.wrap(Map.of("key", "val"))));
+
+    NullableTypeStrategy<StructuredData> nullableTypeStrategy = 
ColumnType.NESTED_DATA.getNullableStrategy();
+    Assert.assertEquals(-1, nullableTypeStrategy.compare(null, 
StructuredData.wrap(Map.of("key", "val"))));
+    Assert.assertEquals(0, nullableTypeStrategy.compare(
+        StructuredData.wrap(Map.of("key1", Map.of("sub-key1", "sub-val1"), 
"key2", "val2")),
+        StructuredData.wrap(Map.of("key1", Map.of("sub-key1", "sub-val1"), 
"key2", "val2"))
+    ));
+    // hash value is computed based on serialized bytes
+    Assert.assertEquals(-1, nullableTypeStrategy.compare(
+        StructuredData.wrap(Map.of("key1", Map.of("sub-key1", 
"sub-val1-different"), "key2", "val2")),
+        StructuredData.wrap(Map.of("key1", Map.of("sub-key1", "sub-val1"), 
"key2", "val2"))
+    ));
+  }
+
   @Test
   public void testNulls()
   {
@@ -455,6 +479,17 @@ public class TypeStrategiesTest
     assertStrategy(strategy, new NullableLongPair(1234L, null));
   }
 
+  @Test
+  public void testComplexJsonTypeStrategy()
+  {
+    final TypeStrategy strategy = 
TypeStrategies.getComplex(ColumnType.NESTED_DATA.getComplexTypeName());
+    Map<String, Object> nested = new HashMap<>();
+    nested.put("key1", "val");
+    assertStrategy(strategy, StructuredData.wrap(nested));
+    nested.put("key2", null);
+    assertStrategy(strategy, StructuredData.wrap(nested));
+  }
+
   @Test
   public void testArrayTypeStrategy()
   {
diff --git 
a/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
 
b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
index 1642107a6b1..b53746cbd09 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
@@ -20,7 +20,6 @@
 package org.apache.druid.segment.nested;
 
 import com.google.common.collect.ImmutableMap;
-import nl.jqno.equalsverifier.EqualsVerifier;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -106,10 +105,4 @@ public class StructuredDataTest
     Assert.assertEquals(1, sd1.compareTo(sd2));
     Assert.assertEquals(0, sd0.compareTo(sd1));
   }
-
-  @Test
-  public void testEqualsAndHashcode()
-  {
-    
EqualsVerifier.forClass(StructuredData.class).withIgnoredFields("hashInitialized",
 "hashValue", "hash").usingGetClass().verify();
-  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(druid) branch master updated: Add size check in DictionaryBuildingSingleValuedRowBasedKeySerdeHelper in putToKeyBuffer (#18541)

Reply via email to