gianm commented on code in PR #18207:
URL: https://github.com/apache/druid/pull/18207#discussion_r2206083624


##########
embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/AutoCompactionTest.java:
##########
@@ -85,68 +88,141 @@
 import org.joda.time.Interval;
 import org.joda.time.Period;
 import org.joda.time.chrono.ISOChronology;
-import org.testng.Assert;
-import org.testng.annotations.BeforeMethod;
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Guice;
-import org.testng.annotations.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
 
 import java.io.Closeable;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
-import java.util.UUID;
+import java.util.function.Supplier;
 import java.util.stream.Collectors;
 
-@Test(groups = {TestNGGroup.COMPACTION})
-@Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITAutoCompactionTest extends AbstractIndexerTest
+/**
+ * Embedded mode of integration-tests originally present in {@code 
ITAutoCompactionTest}.
+ */
+public class AutoCompactionTest extends CompactionTestBase
 {
-  private static final Logger LOG = new Logger(ITAutoCompactionTest.class);
-  private static final String INDEX_TASK = 
"/indexer/wikipedia_index_task.json";
-  private static final String INDEX_TASK_WITH_GRANULARITY_SPEC = 
"/indexer/wikipedia_index_task_with_granularity_spec.json";
-  private static final String INDEX_TASK_WITH_DIMENSION_SPEC = 
"/indexer/wikipedia_index_task_with_dimension_spec.json";
-  private static final String INDEX_ROLLUP_QUERIES_RESOURCE = 
"/indexer/wikipedia_index_rollup_queries.json";
-  private static final String INDEX_ROLLUP_SKETCH_QUERIES_RESOURCE = 
"/indexer/wikipedia_index_sketch_queries.json";
-  private static final String INDEX_QUERIES_RESOURCE = 
"/indexer/wikipedia_index_queries.json";
-  private static final String INDEX_TASK_WITH_ROLLUP_FOR_PRESERVE_METRICS = 
"/indexer/wikipedia_index_rollup_preserve_metric.json";
-  private static final String INDEX_TASK_WITHOUT_ROLLUP_FOR_PRESERVE_METRICS = 
"/indexer/wikipedia_index_no_rollup_preserve_metric.json";
+  private static final Logger LOG = new Logger(AutoCompactionTest.class);
+  private static final Supplier<TaskBuilder.Index> INDEX_TASK = 
Resources.Task.BASIC_INDEX;
+
+  private static final Supplier<TaskBuilder.Index> 
INDEX_TASK_WITH_GRANULARITY_SPEC =
+      () -> 
INDEX_TASK.get().dimensions("language").dynamicPartitionWithMaxRows(10);
+  private static final Supplier<TaskBuilder.Index> 
INDEX_TASK_WITH_DIMENSION_SPEC =
+      () -> INDEX_TASK.get().granularitySpec("DAY", "DAY", true);
+
+  private static final String SELECT_APPROX_COUNT_DISTINCT = 
Resources.Query.SELECT_APPROX_COUNT_DISTINCT;
+  private static final List<Pair<String, String>> INDEX_QUERIES_RESOURCE = 
List.of(
+      Pair.of(Resources.Query.SELECT_MIN_MAX_TIME, 
"2013-08-31T01:02:33.000Z,2013-09-01T12:41:27.000Z"),
+      Pair.of(Resources.Query.SELECT_APPROX_COUNT_DISTINCT, "5,5"),
+      Pair.of(Resources.Query.SELECT_EARLIEST_LATEST_USER, "nuclear,stringer"),
+      Pair.of(Resources.Query.SELECT_COUNT_OF_CHINESE_PAGES, "Crimson 
Typhoon,1,905.0,9050.0")
+  );
+  private static final Supplier<TaskBuilder.IndexParallel> 
INDEX_TASK_WITH_ROLLUP_FOR_PRESERVE_METRICS =
+      () -> TaskBuilder
+          .ofTypeIndexParallel()
+          .jsonInputFormat()
+          .inlineInputSourceWithData(Resources.InlineData.JSON_2_ROWS)
+          .isoTimestampColumn("timestamp")

Review Comment:
   I wonder if it would be nicer to have `.timestampColumn("timestamp", 
"iso")`. Makes it easier to sub in `"auto"` or whatever other format.



##########
embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/Resources.java:
##########
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.indexing;
+
+import org.apache.druid.indexing.common.task.TaskBuilder;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
+import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory;
+import 
org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory;
+import 
org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchAggregatorFactory;
+import 
org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory;
+
+import java.util.function.Supplier;
+
+/**
+ * Constants and utility methods used in embedded cluster tests.
+ */
+public class Resources

Review Comment:
   Moving this to `embedded-tests` means that it's now tougher for extensions 
to write their own tests that pull in `Resources`. Can you split this up? Such 
as having some `Resources` in `services`, and `MoreResources` in 
`embedded-tests`.



##########
embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/AutoCompactionTest.java:
##########
@@ -1989,6 +1884,7 @@ private void forceTriggerAutoCompaction(
       );
 
       // Wait for scheduler to pick up the compaction job
+      // TODO: make this latch-based

Review Comment:
   Yes, please don't commit todo comments.



##########
embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/AutoCompactionTest.java:
##########
@@ -1742,84 +1639,85 @@ public void 
testAutoCompactionDutyWithOverlappingInterval() throws Exception
       );
       // Compact the MONTH segment
       forceTriggerAutoCompaction(2);
-      verifyQuery(INDEX_ROLLUP_QUERIES_RESOURCE, queryAndResultFields);
+      verifyScanResult("added", "57.0||459.0");
+      verifyScanResult("COUNT(*)", "2");
 
       // Compact the WEEK segment
       forceTriggerAutoCompaction(2);
-      verifyQuery(INDEX_ROLLUP_QUERIES_RESOURCE, queryAndResultFields);
+      verifyScanResult("added", "57.0||459.0");
+      verifyScanResult("COUNT(*)", "2");
 
       // Verify all task succeed
-      List<TaskResponseObject> compactTasksBefore = 
indexer.getCompleteTasksForDataSource(fullDatasourceName);
-      for (TaskResponseObject taskResponseObject : compactTasksBefore) {
-        Assert.assertEquals(TaskState.SUCCESS, taskResponseObject.getStatus());
+      List<TaskStatusPlus> compactTasksBefore = 
getCompleteTasksForDataSource(fullDatasourceName);
+      for (TaskStatusPlus taskResponseObject : compactTasksBefore) {
+        Assertions.assertEquals(TaskState.SUCCESS, 
taskResponseObject.getStatusCode());
       }
 
       // Verify compacted segments does not get compacted again
       forceTriggerAutoCompaction(2);
-      List<TaskResponseObject> compactTasksAfter = 
indexer.getCompleteTasksForDataSource(fullDatasourceName);
-      Assert.assertEquals(compactTasksAfter.size(), compactTasksBefore.size());
+      List<TaskStatusPlus> compactTasksAfter = 
getCompleteTasksForDataSource(fullDatasourceName);
+      Assertions.assertEquals(compactTasksAfter.size(), 
compactTasksBefore.size());
     }
   }
 
-  private void loadData(String indexTask) throws Exception
+  private <T extends TaskBuilder.IndexCommon<?, ?, ?>> void 
loadData(Supplier<T> updatePayload)
   {
-    loadData(indexTask, ImmutableMap.of());
+    loadData(updatePayload, null);
   }
 
-  private void loadData(String indexTask, Map<String, Object> specs) throws 
Exception
+  private <T extends TaskBuilder.IndexCommon<?, ?, ?>> void loadData(
+      Supplier<T> taskPayloadSupplier,
+      GranularitySpec granularitySpec
+  )
   {
-    String taskSpec = getResourceAsString(indexTask);
-    taskSpec = StringUtils.replace(taskSpec, "%%DATASOURCE%%", 
fullDatasourceName);
-    taskSpec = StringUtils.replace(
-        taskSpec,
-        "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%",
-        jsonMapper.writeValueAsString("0")
-    );
-    for (Map.Entry<String, Object> entry : specs.entrySet()) {
-      taskSpec = StringUtils.replace(
-          taskSpec,
-          entry.getKey(),
-          jsonMapper.writeValueAsString(entry.getValue())
-      );
+    final TaskBuilder.IndexCommon<?, ?, ?> taskBuilder = 
taskPayloadSupplier.get();
+    taskBuilder.dataSource(fullDatasourceName);
+    if (granularitySpec != null) {
+      taskBuilder.granularitySpec(granularitySpec);
     }
-    final String taskId = indexer.submitTask(taskSpec);
-    LOG.info("Submitted task[%s] to load data", taskId);
-    indexer.waitUntilTaskCompletes(taskId);
 
-    ITRetryUtil.retryUntilTrue(
-        () -> coordinator.areSegmentsLoaded(fullDatasourceName),
-        "Segments are loaded"
+    runTask(taskBuilder, fullDatasourceName);
+  }
+
+  private void verifyQuery(List<Pair<String, String>> queries)
+  {
+    queries.forEach(
+        query -> verifyQuery(query.lhs, query.rhs)
     );
   }
 
-  private void verifyQuery(String queryResource) throws Exception
+  private void verifyQuery(String query, String result)
   {
-    verifyQuery(queryResource, ImmutableMap.of());
+    Assertions.assertEquals(
+        result,
+        cluster.runSql(query, dataSource),
+        StringUtils.format("Query[%s] failed", query)
+    );
   }
 
-  private void verifyQuery(String queryResource, Map<String, Object> 
keyValueToReplace) throws Exception
+  /**
+   * Verifies the result of a SELECT query
+   *
+   * @param field  Field to select
+   * @param result CSV result with special strings {@code ||} to represent

Review Comment:
   why do this rather than accept the CSV as-is?



##########
embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionTestBase.java:
##########
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.compact;
+
+import org.apache.druid.indexing.common.task.TaskBuilder;
+import org.apache.druid.indexing.overlord.Segments;
+import org.apache.druid.java.util.common.guava.Comparators;
+import org.apache.druid.testing.embedded.EmbeddedBroker;
+import org.apache.druid.testing.embedded.EmbeddedClusterApis;
+import org.apache.druid.testing.embedded.EmbeddedCoordinator;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import org.apache.druid.testing.embedded.EmbeddedHistorical;
+import org.apache.druid.testing.embedded.EmbeddedIndexer;
+import org.apache.druid.testing.embedded.EmbeddedOverlord;
+import org.apache.druid.testing.embedded.EmbeddedRouter;
+import org.apache.druid.testing.embedded.junit5.EmbeddedClusterTestBase;
+import org.apache.druid.timeline.DataSegment;
+import org.joda.time.Interval;
+import org.junit.jupiter.api.Assertions;
+
+import java.io.Closeable;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+public abstract class CompactionTestBase extends EmbeddedClusterTestBase
+{
+  protected final EmbeddedOverlord overlord = new EmbeddedOverlord();
+  protected final EmbeddedCoordinator coordinator = new EmbeddedCoordinator();
+
+  @Override
+  protected EmbeddedDruidCluster createCluster()
+  {
+    return EmbeddedDruidCluster.withEmbeddedDerbyAndZookeeper()
+                               .useLatchableEmitter()
+                               .addServer(overlord)
+                               .addServer(coordinator)
+                               .addServer(new EmbeddedIndexer())
+                               .addServer(new EmbeddedBroker())
+                               .addServer(new EmbeddedHistorical())
+                               .addServer(new EmbeddedRouter());
+  }
+
+  /**
+   * Deletes all the data for the given datasource so that compaction tasks for
+   * this datasource do not take up task slots unnecessarily.
+   */
+  protected Closeable unloader(String dataSource)
+  {
+    return () -> {
+      
overlord.bindings().segmentsMetadataStorage().markAllSegmentsAsUnused(dataSource);
+    };
+  }
+
+  /**
+   * Creates a Task using the given builder and runs it.
+   *
+   * @return ID of the task.
+   */
+  protected String runTask(TaskBuilder<?, ?, ?> taskBuilder, String dataSource)

Review Comment:
   IMO, `protected` methods on a base class aren't the best way to do utility 
APIs. Other test cases might want some of these utility methods, but not to be 
"compaction tests". Also, some tests might want to extend multiple base 
classes, and this approach makes it impossible.
   
   With MSQ tests we have `EmbeddedMSQApis`, something that collects together 
utility APIs without using a base class approach. Something similar might work 
here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to