kgyrtkirk commented on code in PR #17406:
URL: https://github.com/apache/druid/pull/17406#discussion_r1818970241
##########
sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java:
##########
@@ -245,6 +269,159 @@ static class DruidPlanCommand extends
AbstractRelPlanCommand
}
}
+ static class HintPlanCommand extends AbstractRelPlanCommand
+ {
+ HintPlanCommand(List<String> lines, List<String> content)
+ {
+ super(lines, content, DruidHook.DRUID_PLAN);
+ }
+
+ @Override
+ protected String getString(RelNode node)
+ {
+ final List<String> hintsCollect = new ArrayList<>();
Review Comment:
note: this could be created as a field in the `HintCollector`
##########
sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java:
##########
@@ -245,6 +269,159 @@ static class DruidPlanCommand extends
AbstractRelPlanCommand
}
}
+ static class HintPlanCommand extends AbstractRelPlanCommand
+ {
+ HintPlanCommand(List<String> lines, List<String> content)
+ {
+ super(lines, content, DruidHook.DRUID_PLAN);
+ }
+
+ @Override
+ protected String getString(RelNode node)
+ {
+ final List<String> hintsCollect = new ArrayList<>();
+ final HintCollector collector = new HintCollector(hintsCollect);
+ node.accept(collector);
+ StringBuilder builder = new StringBuilder();
+ for (String hintLine : hintsCollect) {
+ builder.append(hintLine).append("\n");
+ }
+
+ return builder.toString();
+ }
+
+ private static class HintCollector extends RelShuttleImpl
+ {
+ private final List<String> hintsCollect;
+
+ HintCollector(List<String> hintsCollect)
+ {
+ this.hintsCollect = hintsCollect;
+ }
+
+ @Override
+ public RelNode visit(TableScan scan)
+ {
+ if (!scan.getHints().isEmpty()) {
+ this.hintsCollect.add("TableScan:" + scan.getHints());
+ }
+ return super.visit(scan);
+ }
+
+ @Override
+ public RelNode visit(LogicalJoin join)
+ {
+ if (!join.getHints().isEmpty()) {
+ this.hintsCollect.add("LogicalJoin:" + join.getHints());
+ }
+ return super.visit(join);
+ }
+
+ @Override
+ public RelNode visit(LogicalProject project)
+ {
+ if (!project.getHints().isEmpty()) {
+ this.hintsCollect.add("Project:" + project.getHints());
+ }
+ return super.visit(project);
+ }
+
+ @Override
+ public RelNode visit(LogicalAggregate aggregate)
+ {
+ if (!aggregate.getHints().isEmpty()) {
+ this.hintsCollect.add("Aggregate:" + aggregate.getHints());
+ }
+ return super.visit(aggregate);
+ }
+
+ @Override
+ public RelNode visit(LogicalCorrelate correlate)
+ {
+ if (!correlate.getHints().isEmpty()) {
+ this.hintsCollect.add("Correlate:" + correlate.getHints());
+ }
+ return super.visit(correlate);
+ }
+
+ @Override
+ public RelNode visit(LogicalFilter filter)
+ {
+ if (!filter.getHints().isEmpty()) {
+ this.hintsCollect.add("Filter:" + filter.getHints());
+ }
+ return super.visit(filter);
+ }
+
+ @Override
+ public RelNode visit(LogicalUnion union)
+ {
+ if (!union.getHints().isEmpty()) {
+ this.hintsCollect.add("Union:" + union.getHints());
+ }
+ return super.visit(union);
+ }
+
+ @Override
+ public RelNode visit(LogicalIntersect intersect)
+ {
+ if (!intersect.getHints().isEmpty()) {
+ this.hintsCollect.add("Intersect:" + intersect.getHints());
+ }
+ return super.visit(intersect);
+ }
+
+ @Override
+ public RelNode visit(LogicalMinus minus)
+ {
+ if (!minus.getHints().isEmpty()) {
+ this.hintsCollect.add("Minus:" + minus.getHints());
+ }
+ return super.visit(minus);
+ }
+
+ @Override
+ public RelNode visit(LogicalSort sort)
+ {
+ if (!sort.getHints().isEmpty()) {
+ this.hintsCollect.add("Sort:" + sort.getHints());
+ }
+ return super.visit(sort);
+ }
+
+ @Override
+ public RelNode visit(LogicalValues values)
+ {
+ if (!values.getHints().isEmpty()) {
+ this.hintsCollect.add("Values:" + values.getHints());
+ }
+ return super.visit(values);
+ }
+
+ @Override
+ public RelNode visit(RelNode other)
+ {
+ if (other instanceof Window) {
Review Comment:
note: shouldn't you be looking for the `Hintable` interface ?
##########
sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java:
##########
@@ -153,7 +154,7 @@ public void onMatch(RelOptRuleCall call)
leftFilter = null;
}
- if (!plannerContext.getJoinAlgorithm().requiresSubquery()
+ if (!QueryUtils.getJoinAlgorithm(join, plannerContext).requiresSubquery()
Review Comment:
note: I feel like both `PlannerContext` and `QueryUtils` is a bit
unfortunate place for these....
have you considered placing it in `JoinAlgorithm`?
you should probably also extract a `JoinAlgorithm` to a local variable
##########
sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java:
##########
@@ -202,10 +220,16 @@ protected final void executeExplain(Context context)
throws IOException
if (node instanceof DruidRel<?>) {
node = ((DruidRel<?>) node).unwrapLogicalPlan();
}
- String str = RelOptUtil.dumpPlan("", node, SqlExplainFormat.TEXT,
SqlExplainLevel.EXPPLAN_ATTRIBUTES);
+ String str = getString(node);
context.echo(ImmutableList.of(str));
}
}
+
+ protected String getString(RelNode node)
Review Comment:
note: I feel like `getString` is a bit too generic - and it could have a
better name: `getStringForRel` ; `dumpRelToString` ; `convertRelToString`
...not that these are that much better...
##########
sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java:
##########
@@ -245,6 +269,159 @@ static class DruidPlanCommand extends
AbstractRelPlanCommand
}
}
+ static class HintPlanCommand extends AbstractRelPlanCommand
+ {
+ HintPlanCommand(List<String> lines, List<String> content)
+ {
+ super(lines, content, DruidHook.DRUID_PLAN);
+ }
+
+ @Override
+ protected String getString(RelNode node)
+ {
+ final List<String> hintsCollect = new ArrayList<>();
+ final HintCollector collector = new HintCollector(hintsCollect);
+ node.accept(collector);
+ StringBuilder builder = new StringBuilder();
+ for (String hintLine : hintsCollect) {
+ builder.append(hintLine).append("\n");
+ }
+
+ return builder.toString();
+ }
+
+ private static class HintCollector extends RelShuttleImpl
Review Comment:
note: instead of implementing all these methods; you could subclass
`RelHomogeneousShuttle` and implemented it only for `RelNode`
##########
sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java:
##########
@@ -502,4 +505,24 @@ void from(CalcitePlanner planner)
+ " to " + this);
}
}
+
+ /** Define some tool members and methods for hints. */
+ private static class HintTools
+ {
+ static final HintStrategyTable HINT_STRATEGY_TABLE =
createHintStrategies();
+
+ /**
+ * Creates hint strategies.
+ *
+ * @return HintStrategyTable instance
+ */
+ private static HintStrategyTable createHintStrategies()
Review Comment:
note: I'm not sure if the `HintTools` class is needed; you could just have
this method alone...its already `static`
##########
sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java:
##########
@@ -245,6 +269,159 @@ static class DruidPlanCommand extends
AbstractRelPlanCommand
}
}
+ static class HintPlanCommand extends AbstractRelPlanCommand
+ {
+ HintPlanCommand(List<String> lines, List<String> content)
+ {
+ super(lines, content, DruidHook.DRUID_PLAN);
+ }
+
+ @Override
+ protected String getString(RelNode node)
+ {
+ final List<String> hintsCollect = new ArrayList<>();
+ final HintCollector collector = new HintCollector(hintsCollect);
+ node.accept(collector);
+ StringBuilder builder = new StringBuilder();
+ for (String hintLine : hintsCollect) {
+ builder.append(hintLine).append("\n");
+ }
+
+ return builder.toString();
+ }
+
+ private static class HintCollector extends RelShuttleImpl
+ {
+ private final List<String> hintsCollect;
+
+ HintCollector(List<String> hintsCollect)
+ {
+ this.hintsCollect = hintsCollect;
+ }
+
+ @Override
+ public RelNode visit(TableScan scan)
+ {
+ if (!scan.getHints().isEmpty()) {
+ this.hintsCollect.add("TableScan:" + scan.getHints());
+ }
Review Comment:
note: you could remove all these conditionals; add a `collectHints` method
with kinda the same content; and use `relNode.getClass().getSimpleName()` to
get `TableScan` and similar names
##########
extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqJoinHint.iq:
##########
@@ -0,0 +1,1022 @@
+!use druidtest://?componentSupplier=DrillWindowQueryMSQComponentSupplier
+!set outputformat mysql
+
+select w1.cityName, w2.countryName
+from (select cityName, countryName from wikipedia where cityName='New York') w1
+JOIN wikipedia w2 ON w1.cityName = w2.cityName
+where w1.cityName='New York';
+
+
+!hints
+
+[ {
+ "stageNumber" : 0,
+ "definition" : {
+ "id" : "<taskId>_0",
+ "input" : [ {
+ "type" : "table",
+ "dataSource" : "wikipedia",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ],
+ "filter" : {
+ "type" : "equals",
+ "column" : "cityName",
+ "matchValueType" : "STRING",
+ "matchValue" : "New York"
+ },
+ "filterFields" : [ "cityName" ]
+ } ],
+ "processor" : {
+ "type" : "scan",
+ "query" : {
+ "queryType" : "scan",
+ "dataSource" : {
+ "type" : "inputNumber",
+ "inputNumber" : 0
+ },
+ "intervals" : {
+ "type" : "intervals",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ },
+ "resultFormat" : "compactedList",
+ "filter" : {
+ "type" : "equals",
+ "column" : "cityName",
+ "matchValueType" : "STRING",
+ "matchValue" : "New York"
+ },
+ "columns" : [ "cityName" ],
+ "context" : {
+ "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"}]",
+ "sqlInsertSegmentGranularity" : null,
+ "sqlQueryId" : __SQL_QUERY_ID__
+ "sqlStringifyArrays" : false
+ },
+ "columnTypes" : [ "STRING" ],
+ "granularity" : {
+ "type" : "all"
+ },
+ "legacy" : false
+ }
+ },
+ "signature" : [ {
+ "name" : "__boost",
+ "type" : "LONG"
+ }, {
+ "name" : "cityName",
+ "type" : "STRING"
+ } ],
+ "shuffleSpec" : {
+ "type" : "maxCount",
+ "clusterBy" : {
+ "columns" : [ {
+ "columnName" : "__boost",
+ "order" : "ASCENDING"
+ } ]
+ },
+ "partitions" : 1
+ },
+ "maxWorkerCount" : 1
+ },
+ "phase" : "FINISHED",
+ "workerCount" : 1,
+ "partitionCount" : 1,
+ "shuffle" : "globalSort",
+ "output" : "localStorage",
+ "startTime" : __TIMESTAMP__
+ "duration" : __DURATION__
+ "sort" : true
+}, {
+ "stageNumber" : 1,
+ "definition" : {
+ "id" : "<taskId>_1",
+ "input" : [ {
+ "type" : "table",
+ "dataSource" : "wikipedia",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ } ],
+ "processor" : {
+ "type" : "scan",
+ "query" : {
+ "queryType" : "scan",
+ "dataSource" : {
+ "type" : "inputNumber",
+ "inputNumber" : 0
+ },
+ "intervals" : {
+ "type" : "intervals",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ },
+ "resultFormat" : "compactedList",
+ "columns" : [ "cityName", "countryName" ],
+ "context" : {
+ "scanSignature" :
"[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]",
+ "sqlInsertSegmentGranularity" : null,
+ "sqlQueryId" : __SQL_QUERY_ID__
+ "sqlStringifyArrays" : false
+ },
+ "columnTypes" : [ "STRING", "STRING" ],
+ "granularity" : {
+ "type" : "all"
+ },
+ "legacy" : false
+ }
+ },
+ "signature" : [ {
+ "name" : "__boost",
+ "type" : "LONG"
+ }, {
+ "name" : "cityName",
+ "type" : "STRING"
+ }, {
+ "name" : "countryName",
+ "type" : "STRING"
+ } ],
+ "shuffleSpec" : {
+ "type" : "maxCount",
+ "clusterBy" : {
+ "columns" : [ {
+ "columnName" : "__boost",
+ "order" : "ASCENDING"
+ } ]
+ },
+ "partitions" : 1
+ },
+ "maxWorkerCount" : 1
+ },
+ "phase" : "FINISHED",
+ "workerCount" : 1,
+ "partitionCount" : 1,
+ "shuffle" : "globalSort",
+ "output" : "localStorage",
+ "startTime" : __TIMESTAMP__
+ "duration" : __DURATION__
+ "sort" : true
+}, {
+ "stageNumber" : 2,
+ "definition" : {
+ "id" : "<taskId>_2",
+ "input" : [ {
+ "type" : "stage",
+ "stage" : 0
+ }, {
+ "type" : "stage",
+ "stage" : 1
+ } ],
+ "broadcast" : [ 1 ],
+ "processor" : {
+ "type" : "scan",
+ "query" : {
+ "queryType" : "scan",
+ "dataSource" : {
+ "type" : "join",
+ "left" : {
+ "type" : "inputNumber",
+ "inputNumber" : 0
+ },
+ "right" : {
+ "type" : "inputNumber",
+ "inputNumber" : 1
+ },
+ "rightPrefix" : "j0.",
+ "condition" : "(\"cityName\" == \"j0.cityName\")",
+ "joinType" : "INNER",
+ "preferredJoinAlgorithm" : null
+ },
+ "intervals" : {
+ "type" : "intervals",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ },
+ "virtualColumns" : [ {
+ "type" : "expression",
+ "name" : "v0",
+ "expression" : "'New York'",
+ "outputType" : "STRING"
+ } ],
+ "resultFormat" : "compactedList",
+ "columns" : [ "j0.countryName", "v0" ],
+ "context" : {
+ "__user" : null,
+ "finalize" : true,
+ "maxParseExceptions" : 0,
+ "scanSignature" :
"[{\"name\":\"j0.countryName\",\"type\":\"STRING\"},{\"name\":\"v0\",\"type\":\"STRING\"}]",
+ "sqlQueryId" : __SQL_QUERY_ID__
+ "sqlStringifyArrays" : false
+ },
+ "columnTypes" : [ "STRING", "STRING" ],
+ "granularity" : {
+ "type" : "all"
+ },
+ "legacy" : false
+ }
+ },
+ "signature" : [ {
+ "name" : "__boost",
+ "type" : "LONG"
+ }, {
+ "name" : "j0.countryName",
+ "type" : "STRING"
+ }, {
+ "name" : "v0",
+ "type" : "STRING"
+ } ],
+ "shuffleSpec" : {
+ "type" : "maxCount",
+ "clusterBy" : {
+ "columns" : [ {
+ "columnName" : "__boost",
+ "order" : "ASCENDING"
+ } ]
+ },
+ "partitions" : 1
+ },
+ "maxWorkerCount" : 1
+ },
+ "phase" : "FINISHED",
+ "workerCount" : 1,
+ "partitionCount" : 1,
+ "shuffle" : "globalSort",
+ "output" : "localStorage",
+ "startTime" : __TIMESTAMP__
+ "duration" : __DURATION__
+ "sort" : true
+} ]
+!msqPlan
Review Comment:
note: its near impossible to read these msq plans....not sure what we could
do about that
##########
sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryUtils.java:
##########
@@ -55,4 +57,13 @@ public static List<ColumnMapping> buildColumnMappings(
return columnMappings;
}
+
+ public static JoinAlgorithm getJoinAlgorithm(Join join, PlannerContext
plannerContext)
+ {
+ if (join.getHints().isEmpty()) {
+ return plannerContext.getJoinAlgorithm();
+ }
+
+ return
JoinHint.fromString(join.getHints().get(0).hintName).getJoinAlgorithm();
Review Comment:
this may work right now but it assumes that there is only one hint and that
may only be a `JoinHint`
##########
sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java:
##########
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.sql.calcite.planner;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
+import org.apache.druid.java.util.common.IAE;
+import org.apache.druid.query.JoinAlgorithm;
+
+import java.util.Arrays;
+
+public enum JoinHint
+{
+ USE_MERGE_JOIN("use_merge_join") {
Review Comment:
why the `use_` and `no_` prefixes?
I feel like it would be better to not overthink it for now.... we could have
`broadcast` and `sort_merge` to align with the internal logic...
I think its better to not create alternates if there is no specific need for
it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]