This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new b284154731 StarTree non deterministic ordering of functions in
startree_index_map and other places (#10231)
b284154731 is described below
commit b284154731d2f658eb9ddfc14d25ea509c76919d
Author: Saurabh Dubey <[email protected]>
AuthorDate: Thu Feb 9 00:42:22 2023 +0530
StarTree non deterministic ordering of functions in startree_index_map and
other places (#10231)
---
.../startree/v2/builder/MultipleTreesBuilder.java | 4 +--
.../startree/v2/builder/StarTreeIndexCombiner.java | 15 ++++----
.../v2/builder/StarTreeV2BuilderConfig.java | 8 ++---
.../startree/v2/store/StarTreeIndexMapUtils.java | 19 +++++++---
.../segment/store/StarTreeIndexReaderTest.java | 41 +++++++++++-----------
.../startree/AggregationFunctionColumnPair.java | 10 +++++-
6 files changed, 57 insertions(+), 40 deletions(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
index 50f04abe0b..ec40df5f97 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/MultipleTreesBuilder.java
@@ -24,12 +24,12 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
-import java.util.Map;
import javax.annotation.Nullable;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.tuple.Pair;
import
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
import org.apache.pinot.segment.local.startree.StarTreeBuilderUtils;
import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils;
@@ -130,7 +130,7 @@ public class MultipleTreesBuilder implements Closeable {
File starTreeIndexDir = new File(_segmentDirectory,
StarTreeV2Constants.STAR_TREE_TEMP_DIR);
FileUtils.forceMkdir(starTreeIndexDir);
_metadataProperties.addProperty(MetadataKey.STAR_TREE_COUNT,
numStarTrees);
- List<Map<IndexKey, IndexValue>> indexMaps = new
ArrayList<>(numStarTrees);
+ List<List<Pair<IndexKey, IndexValue>>> indexMaps = new
ArrayList<>(numStarTrees);
// Build all star-trees
for (int i = 0; i < numStarTrees; i++) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java
index dbe44c7a13..4d4c805d39 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeIndexCombiner.java
@@ -24,9 +24,10 @@ import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.pinot.segment.spi.V1Constants;
import
org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants;
@@ -52,19 +53,19 @@ public class StarTreeIndexCombiner implements Closeable {
/**
* Combines the index files inside the given directory into the single index
file, then cleans the directory.
*/
- public Map<IndexKey, IndexValue> combine(StarTreeV2BuilderConfig
builderConfig, File starTreeIndexDir)
+ public List<Pair<IndexKey, IndexValue>> combine(StarTreeV2BuilderConfig
builderConfig, File starTreeIndexDir)
throws IOException {
- Map<IndexKey, IndexValue> indexMap = new HashMap<>();
+ List<Pair<IndexKey, IndexValue>> indexMap = new ArrayList<>();
// Write star-tree index
File starTreeIndexFile = new File(starTreeIndexDir,
StarTreeV2Constants.STAR_TREE_INDEX_FILE_NAME);
- indexMap.put(STAR_TREE_INDEX_KEY, writeFile(starTreeIndexFile));
+ indexMap.add(Pair.of(STAR_TREE_INDEX_KEY, writeFile(starTreeIndexFile)));
// Write dimension indexes
for (String dimension : builderConfig.getDimensionsSplitOrder()) {
File dimensionIndexFile =
new File(starTreeIndexDir, dimension +
V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION);
- indexMap.put(new IndexKey(IndexType.FORWARD_INDEX, dimension),
writeFile(dimensionIndexFile));
+ indexMap.add(Pair.of(new IndexKey(IndexType.FORWARD_INDEX, dimension),
writeFile(dimensionIndexFile)));
}
// Write metric (function-column pair) indexes
@@ -72,7 +73,7 @@ public class StarTreeIndexCombiner implements Closeable {
String metric = functionColumnPair.toColumnName();
File metricIndexFile =
new File(starTreeIndexDir, metric +
V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
- indexMap.put(new IndexKey(IndexType.FORWARD_INDEX, metric),
writeFile(metricIndexFile));
+ indexMap.add(Pair.of(new IndexKey(IndexType.FORWARD_INDEX, metric),
writeFile(metricIndexFile)));
}
FileUtils.cleanDirectory(starTreeIndexDir);
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
index 87ea5214cd..56e778a38b 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
@@ -21,10 +21,10 @@ package org.apache.pinot.segment.local.startree.v2.builder;
import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
+import java.util.TreeSet;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.pinot.segment.spi.AggregationFunctionType;
@@ -56,7 +56,7 @@ public class StarTreeV2BuilderConfig {
Set<String> skipStarNodeCreationForDimensions;
if (indexConfig.getSkipStarNodeCreationForDimensions() != null) {
- skipStarNodeCreationForDimensions = new
HashSet<>(indexConfig.getSkipStarNodeCreationForDimensions());
+ skipStarNodeCreationForDimensions = new
TreeSet<>(indexConfig.getSkipStarNodeCreationForDimensions());
Preconditions.checkArgument(dimensionsSplitOrder.containsAll(skipStarNodeCreationForDimensions),
"Can not skip star-node creation for dimensions not in the split
order, dimensionsSplitOrder: %s, "
+ "skipStarNodeCreationForDimensions: %s",
@@ -65,7 +65,7 @@ public class StarTreeV2BuilderConfig {
skipStarNodeCreationForDimensions = Collections.emptySet();
}
- Set<AggregationFunctionColumnPair> functionColumnPairs = new HashSet<>();
+ Set<AggregationFunctionColumnPair> functionColumnPairs = new TreeSet<>();
for (String functionColumnPair : indexConfig.getFunctionColumnPairs()) {
functionColumnPairs.add(AggregationFunctionColumnPair.fromColumnName(functionColumnPair));
}
@@ -145,7 +145,7 @@ public class StarTreeV2BuilderConfig {
}
Preconditions.checkState(!dimensionsSplitOrder.isEmpty(), "No qualified
dimension found for star-tree split order");
- Set<AggregationFunctionColumnPair> functionColumnPairs = new HashSet<>();
+ Set<AggregationFunctionColumnPair> functionColumnPairs = new TreeSet<>();
functionColumnPairs.add(AggregationFunctionColumnPair.COUNT_STAR);
for (String numericMetric : numericMetrics) {
functionColumnPairs.add(new
AggregationFunctionColumnPair(AggregationFunctionType.SUM, numericMetric));
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
index d50e507372..3b989d3c79 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/store/StarTreeIndexMapUtils.java
@@ -22,6 +22,7 @@ import com.google.common.base.Preconditions;
import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -30,9 +31,9 @@ import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.pinot.spi.env.CommonsConfigurationUtils;
-
/**
* The {@code StarTreeIndexMapUtils} class is a utility class to store/load
star-tree index map to/from file.
* <p>
@@ -81,7 +82,7 @@ public class StarTreeIndexMapUtils {
/**
* Key of the index map.
*/
- public static class IndexKey {
+ public static class IndexKey implements Comparable<IndexKey> {
public final IndexType _indexType;
// For star-tree index, column will be null
public final String _column;
@@ -112,6 +113,14 @@ public class StarTreeIndexMapUtils {
return false;
}
}
+
+ @Override
+ public int compareTo(IndexKey other) {
+ return Comparator
+ .comparing((IndexKey i) -> i._column,
Comparator.nullsLast(Comparator.naturalOrder()))
+ .thenComparing((IndexKey i) -> i._indexType)
+ .compare(this, other);
+ }
}
/**
@@ -138,14 +147,14 @@ public class StarTreeIndexMapUtils {
/**
* Stores the index maps for multiple star-trees into a file.
*/
- public static void storeToFile(List<Map<IndexKey, IndexValue>> indexMaps,
File indexMapFile) {
+ public static void storeToFile(List<List<Pair<IndexKey, IndexValue>>>
indexMaps, File indexMapFile) {
Preconditions.checkState(!indexMapFile.exists(), "Star-tree index map file
already exists");
PropertiesConfiguration configuration =
CommonsConfigurationUtils.fromFile(indexMapFile);
int numStarTrees = indexMaps.size();
for (int i = 0; i < numStarTrees; i++) {
- Map<IndexKey, IndexValue> indexMap = indexMaps.get(i);
- for (Map.Entry<IndexKey, IndexValue> entry : indexMap.entrySet()) {
+ List<Pair<IndexKey, IndexValue>> indexMap = indexMaps.get(i);
+ for (Pair<IndexKey, IndexValue> entry : indexMap) {
IndexKey key = entry.getKey();
IndexValue value = entry.getValue();
configuration.addProperty(key.getPropertyName(i, OFFSET_SUFFIX),
value._offset);
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java
index c2eaba1142..8a60975940 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/StarTreeIndexReaderTest.java
@@ -24,10 +24,9 @@ import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.pinot.segment.local.startree.v2.store.StarTreeIndexMapUtils;
import org.apache.pinot.segment.spi.AggregationFunctionType;
import org.apache.pinot.segment.spi.creator.SegmentVersion;
@@ -88,26 +87,26 @@ public class StarTreeIndexReaderTest {
Collections.singleton(new
AggregationFunctionColumnPair(AggregationFunctionType.SUM, "dimX")));
when(_segmentMetadata.getStarTreeV2MetadataList()).thenReturn(Arrays.asList(stMeta1,
stMeta2));
// Mock the offset/sizes for the index buffers.
- List<Map<StarTreeIndexMapUtils.IndexKey,
StarTreeIndexMapUtils.IndexValue>> indexMaps = new ArrayList<>();
- Map<StarTreeIndexMapUtils.IndexKey, StarTreeIndexMapUtils.IndexValue>
indexMap = new HashMap<>();
- indexMap.put(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null),
- new StarTreeIndexMapUtils.IndexValue(0, 1));
- indexMap.put(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"dim0"),
- new StarTreeIndexMapUtils.IndexValue(1, 1));
- indexMap.put(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"dim1"),
- new StarTreeIndexMapUtils.IndexValue(2, 1));
- indexMap.put(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"count__*"),
- new StarTreeIndexMapUtils.IndexValue(3, 1));
+ List<List<Pair<StarTreeIndexMapUtils.IndexKey,
StarTreeIndexMapUtils.IndexValue>>> indexMaps = new ArrayList<>();
+ List<Pair<StarTreeIndexMapUtils.IndexKey,
StarTreeIndexMapUtils.IndexValue>> indexMap = new ArrayList<>();
+ indexMap.add(Pair.of(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null),
+ new StarTreeIndexMapUtils.IndexValue(0, 1)));
+ indexMap.add(Pair.of(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"dim0"),
+ new StarTreeIndexMapUtils.IndexValue(1, 1)));
+ indexMap.add(Pair.of(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"dim1"),
+ new StarTreeIndexMapUtils.IndexValue(2, 1)));
+ indexMap.add(Pair.of(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"count__*"),
+ new StarTreeIndexMapUtils.IndexValue(3, 1)));
indexMaps.add(indexMap);
- indexMap = new HashMap<>();
- indexMap.put(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null),
- new StarTreeIndexMapUtils.IndexValue(10, 3));
- indexMap.put(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"dimX"),
- new StarTreeIndexMapUtils.IndexValue(13, 3));
- indexMap.put(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"dimY"),
- new StarTreeIndexMapUtils.IndexValue(16, 3));
- indexMap.put(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"sum__dimX"),
- new StarTreeIndexMapUtils.IndexValue(19, 3));
+ indexMap = new ArrayList<>();
+ indexMap.add(Pair.of(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.STAR_TREE, null),
+ new StarTreeIndexMapUtils.IndexValue(10, 3)));
+ indexMap.add(Pair.of(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"dimX"),
+ new StarTreeIndexMapUtils.IndexValue(13, 3)));
+ indexMap.add(Pair.of(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"dimY"),
+ new StarTreeIndexMapUtils.IndexValue(16, 3)));
+ indexMap.add(Pair.of(new
StarTreeIndexMapUtils.IndexKey(StarTreeIndexMapUtils.IndexType.FORWARD_INDEX,
"sum__dimX"),
+ new StarTreeIndexMapUtils.IndexValue(19, 3)));
indexMaps.add(indexMap);
File indexMapFile = new File(TEMP_DIR,
StarTreeV2Constants.INDEX_MAP_FILE_NAME);
StarTreeIndexMapUtils.storeToFile(indexMaps, indexMapFile);
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java
index 86f0cc7cd1..e77c65d028 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/startree/AggregationFunctionColumnPair.java
@@ -18,10 +18,11 @@
*/
package org.apache.pinot.segment.spi.index.startree;
+import java.util.Comparator;
import org.apache.pinot.segment.spi.AggregationFunctionType;
-public class AggregationFunctionColumnPair {
+public class AggregationFunctionColumnPair implements
Comparable<AggregationFunctionColumnPair> {
public static final String DELIMITER = "__";
public static final String STAR = "*";
public static final AggregationFunctionColumnPair COUNT_STAR =
@@ -86,4 +87,11 @@ public class AggregationFunctionColumnPair {
public String toString() {
return toColumnName();
}
+
+ @Override
+ public int compareTo(AggregationFunctionColumnPair other) {
+ return Comparator.comparing((AggregationFunctionColumnPair o) -> o._column)
+ .thenComparing((AggregationFunctionColumnPair o) -> o._functionType)
+ .compare(this, other);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]