This is an automated email from the ASF dual-hosted git repository.
gianm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 953b505d57e refactor: add ColumnFormat implementation for string
columns (#19410)
953b505d57e is described below
commit 953b505d57ee3a124999706580118fc5f3a09e64
Author: Jay Kanakiya <[email protected]>
AuthorDate: Tue May 19 19:50:00 2026 -0700
refactor: add ColumnFormat implementation for string columns (#19410)
---
.../druid/indexing/common/task/CompactionTask.java | 6 +-
.../data/input/impl/StringDimensionSchema.java | 2 +-
.../druid/segment/StringDimensionHandler.java | 23 +++-
.../druid/segment/StringDimensionIndexer.java | 32 +++++
.../druid/segment/StringDimensionMergerV9.java | 10 +-
.../segment/column/CapabilitiesBasedFormat.java | 11 ++
.../StringDictionaryEncodedColumnFormat.java | 150 +++++++++++++++++++++
.../serde/DictionaryEncodedColumnPartSerde.java | 36 ++++-
.../druid/segment/StringDimensionIndexerTest.java | 23 ++++
.../StringDictionaryEncodedColumnFormatTest.java | 98 ++++++++++++++
.../DictionaryEncodedColumnPartSerdeTest.java | 26 ++++
11 files changed, 409 insertions(+), 8 deletions(-)
diff --git
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
index 82310272b36..5804507f5d5 100644
---
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
+++
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
@@ -1180,7 +1180,8 @@ public class CompactionTask extends
AbstractBatchIndexTask implements PendingSeg
schema = new StringDimensionSchema(
schema.getName(),
DimensionSchema.MultiValueHandling.ARRAY,
- schema.hasBitmapIndex()
+ schema.hasBitmapIndex(),
+ ((StringDimensionSchema) schema).getColumnFormatSpec()
);
}
dimensionSchemaMap.put(
@@ -1258,7 +1259,8 @@ public class CompactionTask extends
AbstractBatchIndexTask implements PendingSeg
new StringDimensionSchema(
columnSchema.getName(),
DimensionSchema.MultiValueHandling.ARRAY,
- columnSchema.hasBitmapIndex()
+ columnSchema.hasBitmapIndex(),
+ ((StringDimensionSchema)
columnSchema).getColumnFormatSpec()
)
);
} else {
diff --git
a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
index 20daa347664..860f5c59354 100644
---
a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
+++
b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
@@ -130,7 +130,7 @@ public class StringDimensionSchema extends DimensionSchema
}
maxStringLength = columnFormatSpec.getMaxStringLength();
}
- return new StringDimensionHandler(getName(), mvh, bitmap, false,
maxStringLength);
+ return new StringDimensionHandler(getName(), mvh, bitmap, false,
maxStringLength, columnFormatSpec);
}
@Override
diff --git
a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
index 8deb4aca0ed..ee39aca79d8 100644
---
a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
+++
b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
@@ -108,6 +108,8 @@ public class StringDimensionHandler implements
DimensionHandler<Integer, int[],
private final boolean hasSpatialIndexes;
@Nullable
private final Integer maxStringLength;
+ @Nullable
+ private final StringColumnFormatSpec columnFormatSpec;
public StringDimensionHandler(
String dimensionName,
@@ -126,12 +128,25 @@ public class StringDimensionHandler implements
DimensionHandler<Integer, int[],
boolean hasSpatialIndexes,
@Nullable Integer maxStringLength
)
+ {
+ this(dimensionName, multiValueHandling, hasBitmapIndexes,
hasSpatialIndexes, maxStringLength, null);
+ }
+
+ public StringDimensionHandler(
+ String dimensionName,
+ MultiValueHandling multiValueHandling,
+ boolean hasBitmapIndexes,
+ boolean hasSpatialIndexes,
+ @Nullable Integer maxStringLength,
+ @Nullable StringColumnFormatSpec columnFormatSpec
+ )
{
this.dimensionName = dimensionName;
this.multiValueHandling = multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
this.hasSpatialIndexes = hasSpatialIndexes;
this.maxStringLength = maxStringLength;
+ this.columnFormatSpec = columnFormatSpec;
}
@Override
@@ -146,6 +161,9 @@ public class StringDimensionHandler implements
DimensionHandler<Integer, int[],
if (hasSpatialIndexes) {
return new NewSpatialDimensionSchema(dimensionName,
Collections.singletonList(dimensionName));
}
+ if (columnFormatSpec != null) {
+ return new StringDimensionSchema(dimensionName, multiValueHandling,
hasBitmapIndexes, columnFormatSpec);
+ }
return new StringDimensionSchema(dimensionName, multiValueHandling,
hasBitmapIndexes);
}
@@ -176,7 +194,7 @@ public class StringDimensionHandler implements
DimensionHandler<Integer, int[],
@Override
public DimensionIndexer<Integer, int[], String> makeIndexer()
{
- return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes,
hasSpatialIndexes, maxStringLength);
+ return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes,
hasSpatialIndexes, maxStringLength, columnFormatSpec);
}
@Override
@@ -207,7 +225,8 @@ public class StringDimensionHandler implements
DimensionHandler<Integer, int[],
capabilities,
progress,
segmentBaseDir,
- closer
+ closer,
+ columnFormatSpec
);
}
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index 8c7a59e2c24..da8dccb4287 100644
---
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -38,9 +38,12 @@ import org.apache.druid.query.filter.DruidPredicateMatch;
import org.apache.druid.query.filter.StringPredicateDruidPredicateFactory;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.CapabilitiesBasedFormat;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnFormat;
import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.StringDictionaryEncodedColumnFormat;
import org.apache.druid.segment.data.ArrayBasedIndexedInts;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.incremental.IncrementalIndex;
@@ -60,6 +63,8 @@ public class StringDimensionIndexer extends
DictionaryEncodedColumnIndexer<int[]
private final boolean hasSpatialIndexes;
@Nullable
private final Integer maxStringLength;
+ @Nullable
+ private final StringColumnFormatSpec columnFormatSpec;
private volatile boolean hasMultipleValues = false;
public StringDimensionIndexer(
@@ -77,12 +82,39 @@ public class StringDimensionIndexer extends
DictionaryEncodedColumnIndexer<int[]
boolean hasSpatialIndexes,
@Nullable Integer maxStringLength
)
+ {
+ this(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes,
maxStringLength, null);
+ }
+
+ public StringDimensionIndexer(
+ @Nullable MultiValueHandling multiValueHandling,
+ boolean hasBitmapIndexes,
+ boolean hasSpatialIndexes,
+ @Nullable Integer maxStringLength,
+ @Nullable StringColumnFormatSpec columnFormatSpec
+ )
{
super(new StringDimensionDictionary());
this.multiValueHandling = multiValueHandling == null ?
MultiValueHandling.ofDefault() : multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
this.hasSpatialIndexes = hasSpatialIndexes;
this.maxStringLength = maxStringLength;
+ this.columnFormatSpec = columnFormatSpec;
+ }
+
+ @Override
+ public ColumnFormat getFormat()
+ {
+ if (columnFormatSpec != null) {
+ return new StringDictionaryEncodedColumnFormat(
+ hasMultipleValues,
+ dimLookup.getIdForNull() != DimensionDictionary.ABSENT_VALUE_ID,
+ hasBitmapIndexes,
+ hasSpatialIndexes,
+ columnFormatSpec
+ );
+ }
+ return CapabilitiesBasedFormat.forColumnIndexer(getColumnCapabilities());
}
/**
diff --git
a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
index cc731e8e618..e5edde897a8 100644
---
a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
+++
b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
@@ -63,6 +63,8 @@ public class StringDimensionMergerV9 extends
DictionaryEncodedColumnMerger<Strin
@Nullable
private ByteBufferWriter<ImmutableRTree> spatialWriter;
+ @Nullable
+ private final StringColumnFormatSpec columnFormatSpec;
/**
* @param dimensionName column name
@@ -76,6 +78,7 @@ public class StringDimensionMergerV9 extends
DictionaryEncodedColumnMerger<Strin
* @param progress hook to update status of what this merger is
doing during segment persist and merging
* @param closer resource closer if this merger needs to
attach any closables that should be cleaned up
* when the segment is finished writing
+ * @param columnFormatSpec string column format spec to persist in
segment metadata
*/
public StringDimensionMergerV9(
String dimensionName,
@@ -85,10 +88,12 @@ public class StringDimensionMergerV9 extends
DictionaryEncodedColumnMerger<Strin
ColumnCapabilities capabilities,
ProgressIndicator progress,
File segmentBaseDir,
- Closer closer
+ Closer closer,
+ @Nullable StringColumnFormatSpec columnFormatSpec
)
{
super(dimensionName, outputName, indexSpec, segmentWriteOutMedium,
capabilities, progress, segmentBaseDir, closer);
+ this.columnFormatSpec = columnFormatSpec;
}
@Override
@@ -156,7 +161,8 @@ public class StringDimensionMergerV9 extends
DictionaryEncodedColumnMerger<Strin
.withBitmapSerdeFactory(bitmapSerdeFactory)
.withBitmapIndex(bitmapWriter)
.withSpatialIndex(spatialWriter)
- .withByteOrder(IndexIO.BYTE_ORDER);
+ .withByteOrder(IndexIO.BYTE_ORDER)
+ .withColumnFormatSpec(columnFormatSpec);
if (writeDictionary) {
partBuilder = partBuilder.withDictionary(dictionaryWriter);
diff --git
a/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java
b/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java
index 0faf7970121..3831870e01e 100644
---
a/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java
+++
b/processing/src/main/java/org/apache/druid/segment/column/CapabilitiesBasedFormat.java
@@ -102,6 +102,17 @@ public class CapabilitiesBasedFormat implements
ColumnFormat
return this;
}
+ if (otherFormat instanceof StringDictionaryEncodedColumnFormat) {
+ if (!this.capabilities.is(ValueType.STRING)) {
+ throw new ISE(
+ "Cannot merge columns of type[%s] and [%s]",
+ this.capabilities.asTypeString(),
+ otherFormat.getLogicalType()
+ );
+ }
+ return otherFormat.merge(this);
+ }
+
ColumnCapabilitiesImpl merged =
ColumnCapabilitiesImpl.copyOf(this.toColumnCapabilities());
ColumnCapabilitiesImpl otherSnapshot =
ColumnCapabilitiesImpl.copyOf(otherFormat.toColumnCapabilities());
final String mergedType = merged.getType() == null ? null :
merged.asTypeString();
diff --git
a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormat.java
b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormat.java
new file mode 100644
index 00000000000..7d0eec0eeee
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormat.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.column;
+
+import org.apache.druid.data.input.impl.DimensionSchema;
+import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling;
+import org.apache.druid.data.input.impl.NewSpatialDimensionSchema;
+import org.apache.druid.data.input.impl.StringDimensionSchema;
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.segment.DimensionHandler;
+import org.apache.druid.segment.StringColumnFormatSpec;
+import org.apache.druid.segment.StringDimensionHandler;
+
+import javax.annotation.Nullable;
+import java.util.Collections;
+
+public class StringDictionaryEncodedColumnFormat implements ColumnFormat
+{
+ private final boolean hasMultipleValues;
+ private final boolean hasNulls;
+ private final boolean hasBitmapIndexes;
+ private final boolean hasSpatialIndexes;
+ @Nullable
+ private final StringColumnFormatSpec columnFormatSpec;
+
+ public StringDictionaryEncodedColumnFormat(
+ boolean hasMultipleValues,
+ boolean hasNulls,
+ boolean hasBitmapIndexes,
+ boolean hasSpatialIndexes,
+ @Nullable StringColumnFormatSpec columnFormatSpec
+ )
+ {
+ this.hasMultipleValues = hasMultipleValues;
+ this.hasNulls = hasNulls;
+ this.hasBitmapIndexes = hasBitmapIndexes;
+ this.hasSpatialIndexes = hasSpatialIndexes;
+ this.columnFormatSpec = columnFormatSpec;
+ }
+
+ @Override
+ public ColumnType getLogicalType()
+ {
+ return ColumnType.STRING;
+ }
+
+ @Override
+ public ColumnCapabilities toColumnCapabilities()
+ {
+ return ColumnCapabilitiesImpl.createDefault()
+ .setType(ColumnType.STRING)
+ .setDictionaryEncoded(true)
+ .setDictionaryValuesSorted(true)
+ .setDictionaryValuesUnique(true)
+ .setHasMultipleValues(hasMultipleValues)
+ .setHasNulls(hasNulls)
+ .setHasBitmapIndexes(hasBitmapIndexes)
+ .setHasSpatialIndexes(hasSpatialIndexes);
+ }
+
+ @Override
+ public DimensionHandler getColumnHandler(String columnName)
+ {
+ Integer maxStringLength = columnFormatSpec != null ?
columnFormatSpec.getMaxStringLength() : null;
+ MultiValueHandling mvh = (columnFormatSpec != null &&
columnFormatSpec.getMultiValueHandling() != null)
+ ? columnFormatSpec.getMultiValueHandling()
+ : MultiValueHandling.ofDefault();
+ return new StringDimensionHandler(
+ columnName,
+ mvh,
+ hasBitmapIndexes,
+ hasSpatialIndexes,
+ maxStringLength,
+ columnFormatSpec
+ );
+ }
+
+ @Override
+ public DimensionSchema getColumnSchema(String columnName)
+ {
+ if (hasSpatialIndexes) {
+ return new NewSpatialDimensionSchema(columnName,
Collections.singletonList(columnName));
+ }
+ return new StringDimensionSchema(columnName, null, hasBitmapIndexes,
columnFormatSpec);
+ }
+
+ @Override
+ public ColumnFormat merge(@Nullable ColumnFormat otherFormat)
+ {
+ if (otherFormat == null) {
+ return this;
+ }
+
+ if (otherFormat instanceof StringDictionaryEncodedColumnFormat) {
+ final StringDictionaryEncodedColumnFormat other =
(StringDictionaryEncodedColumnFormat) otherFormat;
+ return new StringDictionaryEncodedColumnFormat(
+ hasMultipleValues || other.hasMultipleValues,
+ hasNulls || other.hasNulls,
+ hasBitmapIndexes && other.hasBitmapIndexes,
+ hasSpatialIndexes || other.hasSpatialIndexes,
+ columnFormatSpec != null ? columnFormatSpec : other.columnFormatSpec
+ );
+ }
+
+ if (otherFormat instanceof CapabilitiesBasedFormat) {
+ final ColumnCapabilities otherCaps = otherFormat.toColumnCapabilities();
+ if (!otherCaps.is(ValueType.STRING)) {
+ throw new ISE(
+ "Cannot merge columns of type[%s] and format[%s] with type[%s] and
format[%s]",
+ ColumnType.STRING,
+ this.getClass().getName(),
+ otherFormat.getLogicalType(),
+ otherFormat.getClass().getName()
+ );
+ }
+ return new StringDictionaryEncodedColumnFormat(
+ hasMultipleValues || otherCaps.hasMultipleValues().isMaybeTrue(),
+ hasNulls || otherCaps.hasNulls().isMaybeTrue(),
+ hasBitmapIndexes && otherCaps.hasBitmapIndexes(),
+ hasSpatialIndexes || otherCaps.hasSpatialIndexes(),
+ columnFormatSpec
+ );
+ }
+
+ throw new ISE(
+ "Cannot merge columns of type[%s] and format[%s] and with [%s] and
[%s]",
+ ColumnType.STRING,
+ this.getClass().getName(),
+ otherFormat.getLogicalType(),
+ otherFormat.getClass().getName()
+ );
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
index 504e474deb6..6015613062a 100644
---
a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
+++
b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java
@@ -20,6 +20,7 @@
package org.apache.druid.segment.serde;
import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
@@ -28,10 +29,12 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.spatial.ImmutableRTree;
import org.apache.druid.io.Channels;
import org.apache.druid.java.util.common.IAE;
+import org.apache.druid.segment.StringColumnFormatSpec;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.SelectableColumn;
+import org.apache.druid.segment.column.StringDictionaryEncodedColumnFormat;
import org.apache.druid.segment.column.StringEncodingStrategies;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.BitmapSerde;
@@ -106,28 +109,34 @@ public class DictionaryEncodedColumnPartSerde implements
ColumnPartSerde
@JsonCreator
public static DictionaryEncodedColumnPartSerde createDeserializer(
@JsonProperty("bitmapSerdeFactory") @Nullable BitmapSerdeFactory
bitmapSerdeFactory,
- @NotNull @JsonProperty("byteOrder") ByteOrder byteOrder
+ @NotNull @JsonProperty("byteOrder") ByteOrder byteOrder,
+ @JsonProperty("columnFormatSpec") @Nullable StringColumnFormatSpec
columnFormatSpec
)
{
return new DictionaryEncodedColumnPartSerde(
byteOrder,
bitmapSerdeFactory != null ? bitmapSerdeFactory : new
BitmapSerde.LegacyBitmapSerdeFactory(),
+ columnFormatSpec,
null
);
}
private final ByteOrder byteOrder;
private final BitmapSerdeFactory bitmapSerdeFactory;
+ @Nullable
+ private final StringColumnFormatSpec columnFormatSpec;
private final Serializer serializer;
private DictionaryEncodedColumnPartSerde(
ByteOrder byteOrder,
BitmapSerdeFactory bitmapSerdeFactory,
+ @Nullable StringColumnFormatSpec columnFormatSpec,
@Nullable Serializer serializer
)
{
this.byteOrder = byteOrder;
this.bitmapSerdeFactory = bitmapSerdeFactory;
+ this.columnFormatSpec = columnFormatSpec;
this.serializer = serializer;
}
@@ -143,6 +152,14 @@ public class DictionaryEncodedColumnPartSerde implements
ColumnPartSerde
return byteOrder;
}
+ @Nullable
+ @JsonProperty
+ @JsonInclude(JsonInclude.Include.NON_NULL)
+ public StringColumnFormatSpec getColumnFormatSpec()
+ {
+ return columnFormatSpec;
+ }
+
public static SerializerBuilder serializerBuilder()
{
return new SerializerBuilder();
@@ -166,6 +183,8 @@ public class DictionaryEncodedColumnPartSerde implements
ColumnPartSerde
private ByteBufferWriter<ImmutableRTree> spatialIndexWriter = null;
@Nullable
private ByteOrder byteOrder = null;
+ @Nullable
+ private StringColumnFormatSpec columnFormatSpec = null;
public SerializerBuilder withDictionary(DictionaryWriter<String>
dictionaryWriter)
{
@@ -203,6 +222,12 @@ public class DictionaryEncodedColumnPartSerde implements
ColumnPartSerde
return this;
}
+ public SerializerBuilder withColumnFormatSpec(@Nullable
StringColumnFormatSpec columnFormatSpec)
+ {
+ this.columnFormatSpec = columnFormatSpec;
+ return this;
+ }
+
public SerializerBuilder withValue(ColumnarIntsSerializer valueWriter,
boolean hasMultiValue, boolean compressed)
{
this.valueWriter = valueWriter;
@@ -234,6 +259,7 @@ public class DictionaryEncodedColumnPartSerde implements
ColumnPartSerde
return new DictionaryEncodedColumnPartSerde(
byteOrder,
bitmapSerdeFactory,
+ columnFormatSpec,
new Serializer()
{
@Override
@@ -380,6 +406,14 @@ public class DictionaryEncodedColumnPartSerde implements
ColumnPartSerde
rSpatialIndex != null
);
}
+
+ builder.setColumnFormat(new StringDictionaryEncodedColumnFormat(
+ hasMultipleValues,
+ hasNulls,
+ rBitmaps != null,
+ rSpatialIndex != null,
+ columnFormatSpec
+ ));
}
private WritableSupplier<ColumnarInts> readSingleValuedColumn(
diff --git
a/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java
b/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java
index 77b386fc589..ba6a2618f86 100644
---
a/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java
@@ -21,6 +21,7 @@ package org.apache.druid.segment;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.segment.column.StringDictionaryEncodedColumnFormat;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -198,6 +199,28 @@ public class StringDimensionIndexerTest extends
InitializedNullHandlingTest
);
}
+ @Test
+ public void testGetFormatHasNullsAfterProcessingNull()
+ {
+ final StringColumnFormatSpec spec = new StringColumnFormatSpec(null, null,
100);
+ final StringDimensionIndexer indexer = new StringDimensionIndexer(
+ DimensionSchema.MultiValueHandling.SORTED_ARRAY,
+ true,
+ false,
+ 100,
+ spec
+ );
+
+ StringDictionaryEncodedColumnFormat format =
+ (StringDictionaryEncodedColumnFormat) indexer.getFormat();
+ Assertions.assertFalse(format.toColumnCapabilities().hasNulls().isTrue());
+
+ indexer.processRowValsToUnsortedEncodedKeyComponent(null, false);
+
+ format = (StringDictionaryEncodedColumnFormat) indexer.getFormat();
+ Assertions.assertTrue(format.toColumnCapabilities().hasNulls().isTrue());
+ }
+
private long verifyEncodedValues(
StringDimensionIndexer indexer,
Object dimensionValues,
diff --git
a/processing/src/test/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormatTest.java
b/processing/src/test/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormatTest.java
new file mode 100644
index 00000000000..3fbb3c4711f
--- /dev/null
+++
b/processing/src/test/java/org/apache/druid/segment/column/StringDictionaryEncodedColumnFormatTest.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.column;
+
+import org.apache.druid.data.input.impl.DimensionSchema;
+import org.apache.druid.data.input.impl.StringDimensionSchema;
+import org.apache.druid.segment.StringColumnFormatSpec;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class StringDictionaryEncodedColumnFormatTest
+{
+ private static final StringColumnFormatSpec SPEC =
StringColumnFormatSpec.builder()
+ .setMaxStringLength(50)
+ .build();
+
+ @Test
+ public void testGetColumnSchemaWithSpec()
+ {
+ StringDictionaryEncodedColumnFormat format = new
StringDictionaryEncodedColumnFormat(
+ false,
+ false,
+ true,
+ false,
+ SPEC
+ );
+ DimensionSchema schema = format.getColumnSchema("city");
+ StringDimensionSchema stringSchema = (StringDimensionSchema) schema;
+ Assertions.assertNotNull(stringSchema.getColumnFormatSpec());
+ Assertions.assertEquals(Integer.valueOf(50),
stringSchema.getColumnFormatSpec().getMaxStringLength());
+ }
+
+ @Test
+ public void testMergeTwoFormatsKeepsSpec()
+ {
+ StringDictionaryEncodedColumnFormat formatWithSpec = new
StringDictionaryEncodedColumnFormat(
+ false,
+ false,
+ true,
+ false,
+ SPEC
+ );
+ StringDictionaryEncodedColumnFormat formatWithoutSpec = new
StringDictionaryEncodedColumnFormat(
+ false,
+ true,
+ true,
+ false,
+ null
+ );
+ ColumnFormat merged = formatWithSpec.merge(formatWithoutSpec);
+
+ DimensionSchema schema = merged.getColumnSchema("city");
+ Assertions.assertEquals(Integer.valueOf(50), ((StringDimensionSchema)
schema).getColumnFormatSpec().getMaxStringLength());
+ }
+
+ @Test
+ public void testCapabilitiesBasedFormatMergesDelegatesToStringFormat()
+ {
+ StringDictionaryEncodedColumnFormat formatWithSpec = new
StringDictionaryEncodedColumnFormat(
+ false,
+ false,
+ true,
+ false,
+ SPEC
+ );
+ ColumnCapabilities caps = ColumnCapabilitiesImpl.createDefault()
+ .setType(ColumnType.STRING)
+ .setDictionaryEncoded(true)
+ .setDictionaryValuesSorted(true)
+ .setDictionaryValuesUnique(true)
+ .setHasMultipleValues(false)
+ .setHasNulls(false)
+ .setHasBitmapIndexes(true);
+ CapabilitiesBasedFormat capFormat = new CapabilitiesBasedFormat(caps);
+
+ ColumnFormat merged = capFormat.merge(formatWithSpec);
+ Assertions.assertInstanceOf(StringDictionaryEncodedColumnFormat.class,
merged);
+ DimensionSchema schema = merged.getColumnSchema("city");
+ Assertions.assertNotNull(((StringDimensionSchema)
schema).getColumnFormatSpec());
+ }
+}
diff --git
a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java
b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java
index b276d56b086..81b4386c480 100644
---
a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerdeTest.java
@@ -70,4 +70,30 @@ public class DictionaryEncodedColumnPartSerdeTest
Assertions.assertEquals(ByteOrder.LITTLE_ENDIAN, serde.getByteOrder());
Assertions.assertTrue(serde.getBitmapSerdeFactory() instanceof
RoaringBitmapSerdeFactory);
}
+
+ @Test
+ public void testSerdeWithColumnFormatSpec() throws Exception
+ {
+ String json = "{\n"
+ + "\"type\": \"stringDictionary\",\n"
+ + "\"byteOrder\": \"LITTLE_ENDIAN\",\n"
+ + "\"bitmapSerdeFactory\": { \"type\": \"roaring\" },\n"
+ + "\"columnFormatSpec\": { \"maxStringLength\": 100 }\n"
+ + "}";
+
+ ObjectMapper mapper = TestHelper.makeJsonMapper();
+
+ DictionaryEncodedColumnPartSerde serde =
(DictionaryEncodedColumnPartSerde) mapper.readValue(
+ mapper.writeValueAsString(
+ mapper.readValue(json, ColumnPartSerde.class)
+ ),
+ ColumnPartSerde.class
+ );
+
+ Assertions.assertEquals(ByteOrder.LITTLE_ENDIAN, serde.getByteOrder());
+ Assertions.assertTrue(serde.getBitmapSerdeFactory() instanceof
RoaringBitmapSerdeFactory);
+ Assertions.assertNotNull(serde.getColumnFormatSpec());
+ Assertions.assertEquals(Integer.valueOf(100),
serde.getColumnFormatSpec().getMaxStringLength());
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]