This is an automated email from the ASF dual-hosted git repository.

lakshsingla pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new ab0d6eb620f Fix string array grouping comparator (#17183)
ab0d6eb620f is described below

commit ab0d6eb620ffc4783153349c1c297662e053f25a
Author: Clint Wylie <[email protected]>
AuthorDate: Tue Oct 8 00:17:28 2024 -0400

    Fix string array grouping comparator (#17183)
---
 .../epinephelinae/RowBasedGrouperHelper.java       | 47 ++++++++++++++++------
 .../query/groupby/NestedGroupByArrayQueryTest.java | 40 ++++++++++++++++++
 .../druid/sql/calcite/CalciteArraysQueryTest.java  | 45 +++++++++++++++++++++
 3 files changed, 119 insertions(+), 13 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
index 8c42bb935d8..9de8ec17f76 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
@@ -1112,12 +1112,17 @@ public class RowBasedGrouperHelper
               DimensionHandlerUtils.convertObjectToType(rhs, fieldType)
           );
         } else if (fieldType.equals(ColumnType.STRING_ARRAY)) {
-          cmp = new DimensionComparisonUtils.ArrayComparator<String>(
-              comparator == null ? StringComparators.LEXICOGRAPHIC : comparator
-          ).compare(
-              DimensionHandlerUtils.coerceToStringArray(lhs),
-              DimensionHandlerUtils.coerceToStringArray(rhs)
-          );
+          if (useNaturalStringArrayComparator(comparator)) {
+            cmp = fieldType.getNullableStrategy().compare(
+                DimensionHandlerUtils.coerceToStringArray(lhs),
+                DimensionHandlerUtils.coerceToStringArray(rhs)
+            );
+          } else {
+            cmp = new 
DimensionComparisonUtils.ArrayComparator<>(comparator).compare(
+                DimensionHandlerUtils.coerceToStringArray(lhs),
+                DimensionHandlerUtils.coerceToStringArray(rhs)
+            );
+          }
         } else if (fieldType.equals(ColumnType.LONG_ARRAY)
                    || fieldType.equals(ColumnType.DOUBLE_ARRAY)) {
           cmp = fieldType.getNullableStrategy().compare(
@@ -1806,13 +1811,17 @@ public class RowBasedGrouperHelper
       )
       {
         super(keyBufferPosition);
+        final Comparator<Object[]> comparator;
+        if (useNaturalStringArrayComparator(stringComparator)) {
+          comparator = ColumnType.STRING_ARRAY.getNullableStrategy();
+        } else {
+          comparator = new 
DimensionComparisonUtils.ArrayComparator<>(stringComparator);
+        }
         bufferComparator = (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) ->
-            new DimensionComparisonUtils.ArrayComparator<>(
-                stringComparator == null ? StringComparators.LEXICOGRAPHIC : 
stringComparator)
-                .compare(
-                    stringArrayDictionary.get(lhsBuffer.getInt(lhsPosition + 
keyBufferPosition)),
-                    stringArrayDictionary.get(rhsBuffer.getInt(rhsPosition + 
keyBufferPosition))
-                );
+            comparator.compare(
+                stringArrayDictionary.get(lhsBuffer.getInt(lhsPosition + 
keyBufferPosition)),
+                stringArrayDictionary.get(rhsBuffer.getInt(rhsPosition + 
keyBufferPosition))
+            );
       }
 
       @Override
@@ -1859,7 +1868,7 @@ public class RowBasedGrouperHelper
               rankOfDictionaryIds[rhsBuffer.getInt(rhsPosition + 
keyBufferPosition)]
           );
         } else {
-          final StringComparator realComparator = stringComparator == null ?
+          final StringComparator realComparator = 
useNaturalStringArrayComparator(stringComparator) ?
                                                   
StringComparators.LEXICOGRAPHIC :
                                                   stringComparator;
           bufferComparator = (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) 
-> {
@@ -2182,4 +2191,16 @@ public class RowBasedGrouperHelper
       }
     }
   }
+
+  /**
+   * Check if the {@link StringComparator} is the 'natural' {@link 
ColumnType#STRING_ARRAY} comparator. If so,
+   * callers can safely use the column type to compare values. If false, the 
{@link StringComparator} must be called
+   * against each array element for the comparison of values.
+   */
+  private static boolean useNaturalStringArrayComparator(@Nullable 
StringComparator stringComparator)
+  {
+    return stringComparator == null
+           || StringComparators.NATURAL.equals(stringComparator)
+           || StringComparators.LEXICOGRAPHIC.equals(stringComparator);
+  }
 }
diff --git 
a/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java
 
b/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java
index f0e581291d3..dd93978b6fd 100644
--- 
a/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java
@@ -32,6 +32,9 @@ import org.apache.druid.query.QueryContexts;
 import org.apache.druid.query.aggregation.AggregationTestHelper;
 import org.apache.druid.query.aggregation.CountAggregatorFactory;
 import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
+import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
+import org.apache.druid.query.ordering.StringComparators;
 import org.apache.druid.segment.Segment;
 import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.column.RowSignature;
@@ -147,6 +150,43 @@ public class NestedGroupByArrayQueryTest
     );
   }
 
+  @Test
+  public void testGroupByRootArrayStringOrderAndLimit()
+  {
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          
.setDimensions(DefaultDimensionSpec.of("arrayString", ColumnType.STRING_ARRAY))
+                                          .setAggregatorSpecs(new 
CountAggregatorFactory("count"))
+                                          .setContext(getContext())
+                                          .setLimitSpec(
+                                              new DefaultLimitSpec(
+                                                  ImmutableList.of(
+                                                      new OrderByColumnSpec(
+                                                          "arrayString",
+                                                          
OrderByColumnSpec.Direction.ASCENDING,
+                                                          
StringComparators.NATURAL
+                                                      )
+                                                  ),
+                                                  100
+                                              )
+                                          )
+                                          .build();
+
+
+    runResults(
+        groupQuery,
+        ImmutableList.of(
+            new Object[]{null, 8L},
+            new Object[]{new Object[]{"a", "b"}, 8L},
+            new Object[]{new Object[]{"a", "b", "c"}, 4L},
+            new Object[]{new Object[]{"b", "c"}, 4L},
+            new Object[]{new Object[]{"d", "e"}, 4L}
+        )
+    );
+  }
+
   @Test
   public void testGroupByRootArrayLong()
   {
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
index d98d5759b81..f4816621134 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
@@ -7481,4 +7481,49 @@ public class CalciteArraysQueryTest extends 
BaseCalciteQueryTest
         )
     );
   }
+
+  @Test
+  public void testArrayGroupStringArrayColumnLimit()
+  {
+    cannotVectorize();
+    testQuery(
+        "SELECT arrayStringNulls, SUM(cnt) FROM druid.arrays GROUP BY 1 ORDER 
BY 1 DESC LIMIT 10",
+        QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
+        ImmutableList.of(
+            GroupByQuery.builder()
+                        .setDataSource(CalciteTests.ARRAYS_DATASOURCE)
+                        .setInterval(querySegmentSpec(Filtration.eternity()))
+                        .setGranularity(Granularities.ALL)
+                        .setDimensions(
+                            dimensions(
+                                new DefaultDimensionSpec("arrayStringNulls", 
"d0", ColumnType.STRING_ARRAY)
+                            )
+                        )
+                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "cnt")))
+                        .setLimitSpec(
+                            new DefaultLimitSpec(
+                                ImmutableList.of(
+                                    new OrderByColumnSpec(
+                                        "d0",
+                                        OrderByColumnSpec.Direction.DESCENDING,
+                                        StringComparators.NATURAL
+                                    )
+                                ),
+                                10
+                            )
+                        )
+                        .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+                        .build()
+        ),
+        ImmutableList.of(
+            new Object[]{Arrays.asList("d", null, "b"), 2L},
+            new Object[]{Arrays.asList("b", "b"), 2L},
+            new Object[]{Arrays.asList("a", "b"), 3L},
+            new Object[]{Arrays.asList(null, "b"), 2L},
+            new Object[]{Collections.singletonList(null), 1L},
+            new Object[]{Collections.emptyList(), 1L},
+            new Object[]{null, 3L}
+        )
+    );
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to