This is an automated email from the ASF dual-hosted git repository.

gianm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 953b505d57e refactor: add ColumnFormat implementation for string 
columns (#19410)
953b505d57e is described below

commit 953b505d57ee3a124999706580118fc5f3a09e64
Author: Jay Kanakiya <[email protected]>
AuthorDate: Tue May 19 19:50:00 2026 -0700

    refactor: add ColumnFormat implementation for string columns (#19410)
---
 .../druid/indexing/common/task/CompactionTask.java |   6 +-
 .../data/input/impl/StringDimensionSchema.java     |   2 +-
 .../druid/segment/StringDimensionHandler.java      |  23 +++-
 .../druid/segment/StringDimensionIndexer.java      |  32 +++++
 .../druid/segment/StringDimensionMergerV9.java     |  10 +-
 .../segment/column/CapabilitiesBasedFormat.java    |  11 ++
 .../StringDictionaryEncodedColumnFormat.java       | 150 +++++++++++++++++++++
 .../serde/DictionaryEncodedColumnPartSerde.java    |  36 ++++-
 .../druid/segment/StringDimensionIndexerTest.java  |  23 ++++
 .../StringDictionaryEncodedColumnFormatTest.java   |  98 ++++++++++++++
 .../DictionaryEncodedColumnPartSerdeTest.java      |  26 ++++
 11 files changed, 409 insertions(+), 8 deletions(-)

diff --git 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
index 82310272b36..5804507f5d5 100644
--- 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
+++ 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
@@ -1180,7 +1180,8 @@ public class CompactionTask extends 
AbstractBatchIndexTask implements PendingSeg
               schema = new StringDimensionSchema(
                   schema.getName(),
                   DimensionSchema.MultiValueHandling.ARRAY,
-                  schema.hasBitmapIndex()
+                  schema.hasBitmapIndex(),
+                  ((StringDimensionSchema) schema).getColumnFormatSpec()
               );
             }
             dimensionSchemaMap.put(
@@ -1258,7 +1259,8 @@ public class CompactionTask extends 
AbstractBatchIndexTask implements PendingSeg
                     new StringDimensionSchema(
                         columnSchema.getName(),
                         DimensionSchema.MultiValueHandling.ARRAY,
-                        columnSchema.hasBitmapIndex()
+                        columnSchema.hasBitmapIndex(),
+                        ((StringDimensionSchema) 
columnSchema).getColumnFormatSpec()
                     )
                 );
               } else {
diff --git 
a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
 
b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
index 20daa347664..860f5c59354 100644
--- 
a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
+++ 
b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
@@ -130,7 +130,7 @@ public class StringDimensionSchema extends DimensionSchema
       }
       maxStringLength = columnFormatSpec.getMaxStringLength();
     }
-    return new StringDimensionHandler(getName(), mvh, bitmap, false, 
maxStringLength);
+    return new StringDimensionHandler(getName(), mvh, bitmap, false, 
maxStringLength, columnFormatSpec);
   }
 
   @Override
diff --git 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
index 8deb4aca0ed..ee39aca79d8 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
@@ -108,6 +108,8 @@ public class StringDimensionHandler implements 
DimensionHandler<Integer, int[],
   private final boolean hasSpatialIndexes;
   @Nullable
   private final Integer maxStringLength;
+  @Nullable
+  private final StringColumnFormatSpec columnFormatSpec;
 
   public StringDimensionHandler(
       String dimensionName,
@@ -126,12 +128,25 @@ public class StringDimensionHandler implements 
DimensionHandler<Integer, int[],
       boolean hasSpatialIndexes,
       @Nullable Integer maxStringLength
   )
+  {
+    this(dimensionName, multiValueHandling, hasBitmapIndexes, 
hasSpatialIndexes, maxStringLength, null);
+  }
+
+  public StringDimensionHandler(
+      String dimensionName,
+      MultiValueHandling multiValueHandling,
+      boolean hasBitmapIndexes,
+      boolean hasSpatialIndexes,
+      @Nullable Integer maxStringLength,
+      @Nullable StringColumnFormatSpec columnFormatSpec
+  )
   {
     this.dimensionName = dimensionName;
     this.multiValueHandling = multiValueHandling;
     this.hasBitmapIndexes = hasBitmapIndexes;
     this.hasSpatialIndexes = hasSpatialIndexes;
     this.maxStringLength = maxStringLength;
+    this.columnFormatSpec = columnFormatSpec;
   }
 
   @Override
@@ -146,6 +161,9 @@ public class StringDimensionHandler implements 
DimensionHandler<Integer, int[],
     if (hasSpatialIndexes) {
       return new NewSpatialDimensionSchema(dimensionName, 
Collections.singletonList(dimensionName));
     }
+    if (columnFormatSpec != null) {
+      return new StringDimensionSchema(dimensionName, multiValueHandling, 
hasBitmapIndexes, columnFormatSpec);
+    }
     return new StringDimensionSchema(dimensionName, multiValueHandling, 
hasBitmapIndexes);
   }
 
@@ -176,7 +194,7 @@ public class StringDimensionHandler implements 
DimensionHandler<Integer, int[],
   @Override
   public DimensionIndexer<Integer, int[], String> makeIndexer()
   {
-    return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, 
hasSpatialIndexes, maxStringLength);
+    return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, 
hasSpatialIndexes, maxStringLength, columnFormatSpec);
   }
 
   @Override
@@ -207,7 +225,8 @@ public class StringDimensionHandler implements 
DimensionHandler<Integer, int[],
         capabilities,
         progress,
         segmentBaseDir,
-        closer
+        closer,
+        columnFormatSpec
     );
   }
 }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index 8c7a59e2c24..da8dccb4287 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -38,9 +38,12 @@ import org.apache.druid.query.filter.DruidPredicateMatch;
 import org.apache.druid.query.filter.StringPredicateDruidPredicateFactory;
 import org.apache.druid.query.filter.ValueMatcher;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.CapabilitiesBasedFormat;
 import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnFormat;
 import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.StringDictionaryEncodedColumnFormat;
 import org.apache.druid.segment.data.ArrayBasedIndexedInts;
 import org.apache.druid.segment.data.IndexedInts;
 import org.apache.druid.segment.incremental.IncrementalIndex;
@@ -60,6 +63,8 @@ public class StringDimensionIndexer extends 
DictionaryEncodedColumnIndexer<int[]
   private final boolean hasSpatialIndexes;
   @Nullable
   private final Integer maxStringLength;
+  @Nullable
+  private final StringColumnFormatSpec columnFormatSpec;
   private volatile boolean hasMultipleValues = false;
 
   public StringDimensionIndexer(
@@ -77,12 +82,39 @@ public class StringDimensionIndexer extends 
DictionaryEncodedColumnIndexer<int[]
       boolean hasSpatialIndexes,
       @Nullable Integer maxStringLength
   )
+  {
+    this(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, 
maxStringLength, null);
+  }
+
+  public StringDimensionIndexer(
+      @Nullable MultiValueHandling multiValueHandling,
+      boolean hasBitmapIndexes,
+      boolean hasSpatialIndexes,
+      @Nullable Integer maxStringLength,
+      @Nullable StringColumnFormatSpec columnFormatSpec
+  )
   {
     super(new StringDimensionDictionary());
     this.multiValueHandling = multiValueHandling == null ? 
MultiValueHandling.ofDefault() : multiValueHandling;
     this.hasBitmapIndexes = hasBitmapIndexes;
     this.hasSpatialIndexes = hasSpatialIndexes;
     this.maxStringLength = maxStringLength;
+    this.columnFormatSpec = columnFormatSpec;
+  }
+
+  @Override
+  public ColumnFormat getFormat()
+  {
+    if (columnFormatSpec != null) {
+      return new StringDictionaryEncodedColumnFormat(
+          hasMultipleValues,
+          dimLookup.getIdForNull() != DimensionDictionary.ABSENT_VALUE_ID,
+          hasBitmapIndexes,
+          hasSpatialIndexes,
+          columnFormatSpec
+      );
+    }
+    return CapabilitiesBasedFormat.forColumnIndexer(getColumnCapabilities());
   }
 
   /**
diff --git 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
index cc731e8e618..e5edde897a8 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
@@ -63,6 +63,8 @@ public class StringDimensionMergerV9 extends 
DictionaryEncodedColumnMerger<Strin
 
   @Nullable
   private ByteBufferWriter<ImmutableRTree> spatialWriter;
+  @Nullable
+  private final StringColumnFormatSpec columnFormatSpec;
 
   /**
    * @param dimensionName         column name
@@ -76,6 +78,7 @@ public class StringDimensionMergerV9 extends 
DictionaryEncodedColumnMerger<Strin
    * @param progress              hook to update status of what this merger is 
doing during segment persist and merging
    * @param closer                resource closer if this merger needs to 
attach any closables that should be cleaned up
    *                              when the segment is finished writing
+   * @param columnFormatSpec      string column format spec to persist in 
segment metadata
    */
   public StringDimensionMergerV9(
       String dimensionName,
@@ -85,10 +88,12 @@ public class StringDimensionMergerV9 extends 
DictionaryEncodedColumnMerger<Strin
       ColumnCapabilities capabilities,
       ProgressIndicator progress,
       File segmentBaseDir,
-      Closer closer
+      Closer closer,
+      @Nullable StringColumnFormatSpec columnFormatSpec
   )
   {
     super(dimensionName, outputName, indexSpec, segmentWriteOutMedium, 
capabilities, progress, segmentBaseDir, closer);
+    this.columnFormatSpec = columnFormatSpec;
   }
 
   @Override
@@ -156,7 +161,8 @@ public class StringDimensionMergerV9 extends 
DictionaryEncodedColumnMerger<Strin
         .withBitmapSerdeFactory(bitmapSerdeFactory)
         .withBitmapIndex(bitmapWriter)
         .withSpatialIndex(spatialWriter)
-        .withByteOrder(IndexIO.BYTE_ORDER);
+        .withByteOrder(IndexIO.BYTE_ORDER)
+        .withColumnFormatSpec(columnFormatSpec);
 
     if (writeDictionary) {
       partBuilder = partBuilder.withDictionary(dictionaryWriter);
diff --git 
a/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java
 
b/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java
index 0faf7970121..3831870e01e 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java
@@ -102,6 +102,17 @@ public class CapabilitiesBasedFormat implements 
ColumnFormat
       return this;
     }
 
+    if (otherFormat instanceof StringDictionaryEncodedColumnFormat) {
+      if (!this.capabilities.is(ValueType.STRING)) {
+        throw new ISE(
+            "Cannot merge columns of type[%s] and [%s]",
+            this.capabilities.asTypeString(),
+            otherFormat.getLogicalType()
+        );
+      }
+      return otherFormat.merge(this);
+    }
+
     ColumnCapabilitiesImpl merged = 
ColumnCapabilitiesImpl.copyOf(this.toColumnCapabilities());
     ColumnCapabilitiesImpl otherSnapshot = 
ColumnCapabilitiesImpl.copyOf(otherFormat.toColumnCapabilities());
     final String mergedType = merged.getType() == null ? null : 
merged.asTypeString();
diff --git 
a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormat.java
 
b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormat.java
new file mode 100644
index 00000000000..7d0eec0eeee
--- /dev/null
+++ 
b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormat.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.column;
+
+import org.apache.druid.data.input.impl.DimensionSchema;
+import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling;
+import org.apache.druid.data.input.impl.NewSpatialDimensionSchema;
+import org.apache.druid.data.input.impl.StringDimensionSchema;
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.segment.DimensionHandler;
+import org.apache.druid.segment.StringColumnFormatSpec;
+import org.apache.druid.segment.StringDimensionHandler;
+
+import javax.annotation.Nullable;
+import java.util.Collections;
+
+public class StringDictionaryEncodedColumnFormat implements ColumnFormat
+{
+  private final boolean hasMultipleValues;
+  private final boolean hasNulls;
+  private final boolean hasBitmapIndexes;
+  private final boolean hasSpatialIndexes;
+  @Nullable
+  private final StringColumnFormatSpec columnFormatSpec;
+
+  public StringDictionaryEncodedColumnFormat(
+      boolean hasMultipleValues,
+      boolean hasNulls,
+      boolean hasBitmapIndexes,
+      boolean hasSpatialIndexes,
+      @Nullable StringColumnFormatSpec columnFormatSpec
+  )
+  {
+    this.hasMultipleValues = hasMultipleValues;
+    this.hasNulls = hasNulls;
+    this.hasBitmapIndexes = hasBitmapIndexes;
+    this.hasSpatialIndexes = hasSpatialIndexes;
+    this.columnFormatSpec = columnFormatSpec;
+  }
+
+  @Override
+  public ColumnType getLogicalType()
+  {
+    return ColumnType.STRING;
+  }
+
+  @Override
+  public ColumnCapabilities toColumnCapabilities()
+  {
+    return ColumnCapabilitiesImpl.createDefault()
+                                 .setType(ColumnType.STRING)
+                                 .setDictionaryEncoded(true)
+                                 .setDictionaryValuesSorted(true)
+                                 .setDictionaryValuesUnique(true)
+                                 .setHasMultipleValues(hasMultipleValues)
+                                 .setHasNulls(hasNulls)
+                                 .setHasBitmapIndexes(hasBitmapIndexes)
+                                 .setHasSpatialIndexes(hasSpatialIndexes);
+  }
+
+  @Override
+  public DimensionHandler getColumnHandler(String columnName)
+  {
+    Integer maxStringLength = columnFormatSpec != null ? 
columnFormatSpec.getMaxStringLength() : null;
+    MultiValueHandling mvh = (columnFormatSpec != null && 
columnFormatSpec.getMultiValueHandling() != null)
+        ? columnFormatSpec.getMultiValueHandling()
+        : MultiValueHandling.ofDefault();
+    return new StringDimensionHandler(
+        columnName,
+        mvh,
+        hasBitmapIndexes,
+        hasSpatialIndexes,
+        maxStringLength,
+        columnFormatSpec
+    );
+  }
+
+  @Override
+  public DimensionSchema getColumnSchema(String columnName)
+  {
+    if (hasSpatialIndexes) {
+      return new NewSpatialDimensionSchema(columnName, 
Collections.singletonList(columnName));
+    }
+    return new StringDimensionSchema(columnName, null, hasBitmapIndexes, 
columnFormatSpec);
+  }
+
+  @Override
+  public ColumnFormat merge(@Nullable ColumnFormat otherFormat)
+  {
+    if (otherFormat == null) {
+      return this;
+    }
+
+    if (otherFormat instanceof StringDictionaryEncodedColumnFormat) {
+      final StringDictionaryEncodedColumnFormat other = 
(StringDictionaryEncodedColumnFormat) otherFormat;
+      return new StringDictionaryEncodedColumnFormat(
+          hasMultipleValues || other.hasMultipleValues,
+          hasNulls || other.hasNulls,
+          hasBitmapIndexes && other.hasBitmapIndexes,
+          hasSpatialIndexes || other.hasSpatialIndexes,
+          columnFormatSpec != null ? columnFormatSpec : other.columnFormatSpec
+      );
+    }
+
+    if (otherFormat instanceof CapabilitiesBasedFormat) {
+      final ColumnCapabilities otherCaps = otherFormat.toColumnCapabilities();
+      if (!otherCaps.is(ValueType.STRING)) {
+        throw new ISE(
+            "Cannot merge columns of type[%s] and format[%s] with type[%s] and 
format[%s]",
+            ColumnType.STRING,
+            this.getClass().getName(),
+            otherFormat.getLogicalType(),
+            otherFormat.getClass().getName()
+        );
+      }
+      return new StringDictionaryEncodedColumnFormat(
+          hasMultipleValues || otherCaps.hasMultipleValues().isMaybeTrue(),
+          hasNulls || otherCaps.hasNulls().isMaybeTrue(),
+          hasBitmapIndexes && otherCaps.hasBitmapIndexes(),
+          hasSpatialIndexes || otherCaps.hasSpatialIndexes(),
+          columnFormatSpec
+      );
+    }
+
+    throw new ISE(
+        "Cannot merge columns of type[%s] and format[%s] and with [%s] and 
[%s]",
+        ColumnType.STRING,
+        this.getClass().getName(),
+        otherFormat.getLogicalType(),
+        otherFormat.getClass().getName()
+    );
+  }
+}
diff --git 
a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
 
b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
index 504e474deb6..6015613062a 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
@@ -20,6 +20,7 @@
 package org.apache.druid.segment.serde;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Supplier;
@@ -28,10 +29,12 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
 import org.apache.druid.collections.spatial.ImmutableRTree;
 import org.apache.druid.io.Channels;
 import org.apache.druid.java.util.common.IAE;
+import org.apache.druid.segment.StringColumnFormatSpec;
 import org.apache.druid.segment.column.ColumnBuilder;
 import org.apache.druid.segment.column.ColumnConfig;
 import org.apache.druid.segment.column.ColumnHolder;
 import org.apache.druid.segment.column.SelectableColumn;
+import org.apache.druid.segment.column.StringDictionaryEncodedColumnFormat;
 import org.apache.druid.segment.column.StringEncodingStrategies;
 import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.data.BitmapSerde;
@@ -106,28 +109,34 @@ public class DictionaryEncodedColumnPartSerde implements 
ColumnPartSerde
   @JsonCreator
   public static DictionaryEncodedColumnPartSerde createDeserializer(
       @JsonProperty("bitmapSerdeFactory") @Nullable BitmapSerdeFactory 
bitmapSerdeFactory,
-      @NotNull @JsonProperty("byteOrder") ByteOrder byteOrder
+      @NotNull @JsonProperty("byteOrder") ByteOrder byteOrder,
+      @JsonProperty("columnFormatSpec") @Nullable StringColumnFormatSpec 
columnFormatSpec
   )
   {
     return new DictionaryEncodedColumnPartSerde(
         byteOrder,
         bitmapSerdeFactory != null ? bitmapSerdeFactory : new 
BitmapSerde.LegacyBitmapSerdeFactory(),
+        columnFormatSpec,
         null
     );
   }
 
   private final ByteOrder byteOrder;
   private final BitmapSerdeFactory bitmapSerdeFactory;
+  @Nullable
+  private final StringColumnFormatSpec columnFormatSpec;
   private final Serializer serializer;
 
   private DictionaryEncodedColumnPartSerde(
       ByteOrder byteOrder,
       BitmapSerdeFactory bitmapSerdeFactory,
+      @Nullable StringColumnFormatSpec columnFormatSpec,
       @Nullable Serializer serializer
   )
   {
     this.byteOrder = byteOrder;
     this.bitmapSerdeFactory = bitmapSerdeFactory;
+    this.columnFormatSpec = columnFormatSpec;
     this.serializer = serializer;
   }
 
@@ -143,6 +152,14 @@ public class DictionaryEncodedColumnPartSerde implements 
ColumnPartSerde
     return byteOrder;
   }
 
+  @Nullable
+  @JsonProperty
+  @JsonInclude(JsonInclude.Include.NON_NULL)
+  public StringColumnFormatSpec getColumnFormatSpec()
+  {
+    return columnFormatSpec;
+  }
+
   public static SerializerBuilder serializerBuilder()
   {
     return new SerializerBuilder();
@@ -166,6 +183,8 @@ public class DictionaryEncodedColumnPartSerde implements 
ColumnPartSerde
     private ByteBufferWriter<ImmutableRTree> spatialIndexWriter = null;
     @Nullable
     private ByteOrder byteOrder = null;
+    @Nullable
+    private StringColumnFormatSpec columnFormatSpec = null;
 
     public SerializerBuilder withDictionary(DictionaryWriter<String> 
dictionaryWriter)
     {
@@ -203,6 +222,12 @@ public class DictionaryEncodedColumnPartSerde implements 
ColumnPartSerde
       return this;
     }
 
+    public SerializerBuilder withColumnFormatSpec(@Nullable 
StringColumnFormatSpec columnFormatSpec)
+    {
+      this.columnFormatSpec = columnFormatSpec;
+      return this;
+    }
+
     public SerializerBuilder withValue(ColumnarIntsSerializer valueWriter, 
boolean hasMultiValue, boolean compressed)
     {
       this.valueWriter = valueWriter;
@@ -234,6 +259,7 @@ public class DictionaryEncodedColumnPartSerde implements 
ColumnPartSerde
       return new DictionaryEncodedColumnPartSerde(
           byteOrder,
           bitmapSerdeFactory,
+          columnFormatSpec,
           new Serializer()
           {
             @Override
@@ -380,6 +406,14 @@ public class DictionaryEncodedColumnPartSerde implements 
ColumnPartSerde
               rSpatialIndex != null
           );
         }
+
+        builder.setColumnFormat(new StringDictionaryEncodedColumnFormat(
+            hasMultipleValues,
+            hasNulls,
+            rBitmaps != null,
+            rSpatialIndex != null,
+            columnFormatSpec
+        ));
       }
 
       private WritableSupplier<ColumnarInts> readSingleValuedColumn(
diff --git 
a/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java
 
b/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java
index 77b386fc589..ba6a2618f86 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java
@@ -21,6 +21,7 @@ package org.apache.druid.segment;
 
 import org.apache.druid.data.input.impl.DimensionSchema;
 import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.segment.column.StringDictionaryEncodedColumnFormat;
 import org.apache.druid.testing.InitializedNullHandlingTest;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
@@ -198,6 +199,28 @@ public class StringDimensionIndexerTest extends 
InitializedNullHandlingTest
     );
   }
 
+  @Test
+  public void testGetFormatHasNullsAfterProcessingNull()
+  {
+    final StringColumnFormatSpec spec = new StringColumnFormatSpec(null, null, 
100);
+    final StringDimensionIndexer indexer = new StringDimensionIndexer(
+        DimensionSchema.MultiValueHandling.SORTED_ARRAY,
+        true,
+        false,
+        100,
+        spec
+    );
+
+    StringDictionaryEncodedColumnFormat format =
+        (StringDictionaryEncodedColumnFormat) indexer.getFormat();
+    Assertions.assertFalse(format.toColumnCapabilities().hasNulls().isTrue());
+
+    indexer.processRowValsToUnsortedEncodedKeyComponent(null, false);
+
+    format = (StringDictionaryEncodedColumnFormat) indexer.getFormat();
+    Assertions.assertTrue(format.toColumnCapabilities().hasNulls().isTrue());
+  }
+
   private long verifyEncodedValues(
       StringDimensionIndexer indexer,
       Object dimensionValues,
diff --git 
a/processing/src/test/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormatTest.java
 
b/processing/src/test/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormatTest.java
new file mode 100644
index 00000000000..3fbb3c4711f
--- /dev/null
+++ 
b/processing/src/test/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormatTest.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.column;
+
+import org.apache.druid.data.input.impl.DimensionSchema;
+import org.apache.druid.data.input.impl.StringDimensionSchema;
+import org.apache.druid.segment.StringColumnFormatSpec;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class StringDictionaryEncodedColumnFormatTest
+{
+  private static final StringColumnFormatSpec SPEC = 
StringColumnFormatSpec.builder()
+      .setMaxStringLength(50)
+      .build();
+
+  @Test
+  public void testGetColumnSchemaWithSpec()
+  {
+    StringDictionaryEncodedColumnFormat format = new 
StringDictionaryEncodedColumnFormat(
+        false,
+        false,
+        true,
+        false,
+        SPEC
+    );
+    DimensionSchema schema = format.getColumnSchema("city");
+    StringDimensionSchema stringSchema = (StringDimensionSchema) schema;
+    Assertions.assertNotNull(stringSchema.getColumnFormatSpec());
+    Assertions.assertEquals(Integer.valueOf(50), 
stringSchema.getColumnFormatSpec().getMaxStringLength());
+  }
+
+  @Test
+  public void testMergeTwoFormatsKeepsSpec()
+  {
+    StringDictionaryEncodedColumnFormat formatWithSpec = new 
StringDictionaryEncodedColumnFormat(
+        false,
+        false,
+        true,
+        false,
+        SPEC
+    );
+    StringDictionaryEncodedColumnFormat formatWithoutSpec = new 
StringDictionaryEncodedColumnFormat(
+        false,
+        true,
+        true,
+        false,
+        null
+    );
+    ColumnFormat merged = formatWithSpec.merge(formatWithoutSpec);
+
+    DimensionSchema schema = merged.getColumnSchema("city");
+    Assertions.assertEquals(Integer.valueOf(50), ((StringDimensionSchema) 
schema).getColumnFormatSpec().getMaxStringLength());
+  }
+
+  @Test
+  public void testCapabilitiesBasedFormatMergesDelegatesToStringFormat()
+  {
+    StringDictionaryEncodedColumnFormat formatWithSpec = new 
StringDictionaryEncodedColumnFormat(
+        false,
+        false,
+        true,
+        false,
+        SPEC
+    );
+    ColumnCapabilities caps = ColumnCapabilitiesImpl.createDefault()
+        .setType(ColumnType.STRING)
+        .setDictionaryEncoded(true)
+        .setDictionaryValuesSorted(true)
+        .setDictionaryValuesUnique(true)
+        .setHasMultipleValues(false)
+        .setHasNulls(false)
+        .setHasBitmapIndexes(true);
+    CapabilitiesBasedFormat capFormat = new CapabilitiesBasedFormat(caps);
+
+    ColumnFormat merged = capFormat.merge(formatWithSpec);
+    Assertions.assertInstanceOf(StringDictionaryEncodedColumnFormat.class, 
merged);
+    DimensionSchema schema = merged.getColumnSchema("city");
+    Assertions.assertNotNull(((StringDimensionSchema) 
schema).getColumnFormatSpec());
+  }
+}
diff --git 
a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java
 
b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java
index b276d56b086..81b4386c480 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java
@@ -70,4 +70,30 @@ public class DictionaryEncodedColumnPartSerdeTest
     Assertions.assertEquals(ByteOrder.LITTLE_ENDIAN, serde.getByteOrder());
     Assertions.assertTrue(serde.getBitmapSerdeFactory() instanceof 
RoaringBitmapSerdeFactory);
   }
+
+  @Test
+  public void testSerdeWithColumnFormatSpec() throws Exception
+  {
+    String json = "{\n"
+                  + "\"type\": \"stringDictionary\",\n"
+                  + "\"byteOrder\": \"LITTLE_ENDIAN\",\n"
+                  + "\"bitmapSerdeFactory\": { \"type\": \"roaring\" },\n"
+                  + "\"columnFormatSpec\": { \"maxStringLength\": 100 }\n"
+                  + "}";
+
+    ObjectMapper mapper = TestHelper.makeJsonMapper();
+
+    DictionaryEncodedColumnPartSerde serde = 
(DictionaryEncodedColumnPartSerde) mapper.readValue(
+        mapper.writeValueAsString(
+            mapper.readValue(json, ColumnPartSerde.class)
+        ),
+        ColumnPartSerde.class
+    );
+
+    Assertions.assertEquals(ByteOrder.LITTLE_ENDIAN, serde.getByteOrder());
+    Assertions.assertTrue(serde.getBitmapSerdeFactory() instanceof 
RoaringBitmapSerdeFactory);
+    Assertions.assertNotNull(serde.getColumnFormatSpec());
+    Assertions.assertEquals(Integer.valueOf(100), 
serde.getColumnFormatSpec().getMaxStringLength());
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to