kgyrtkirk commented on code in PR #17541: URL: https://github.com/apache/druid/pull/17541#discussion_r1881502162
########## sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java: ########## @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.planner; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.JoinAlgorithm; + +import java.util.Arrays; + +public enum JoinHint Review Comment: I think it would be better to not have this as an enum - and you could probably use `as(JoinAlgorithm.class)` or `asJoinAlgorithm()` to obtain the current value ########## sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java: ########## @@ -502,4 +510,41 @@ void from(CalcitePlanner planner) + " to " + this); } } + + /** Define some tool members and methods for hints. */ + private static class HintTools + { + static final HintStrategyTable HINT_STRATEGY_TABLE = createHintStrategies(); + + /** + * Creates hint strategies. + * + * @return HintStrategyTable instance + */ + private static HintStrategyTable createHintStrategies() + { + return HintStrategyTable.builder() + .hintStrategy("broadcast", HintPredicates.JOIN) Review Comment: note: I think this class should be moved in the `DruidHints` or something class you rename/restructure the `JoinHint` class to ########## sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java: ########## @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.planner; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.JoinAlgorithm; + +import java.util.Arrays; + +public enum JoinHint +{ + SORT_MERGE("sort_merge") { + @Override + public JoinAlgorithm getJoinAlgorithm() + { + return JoinAlgorithm.SORT_MERGE; + } + }, + BROADCAST("broadcast") { + @Override + public JoinAlgorithm getJoinAlgorithm() + { + return JoinAlgorithm.BROADCAST; + } + }; + + private final String id; + + JoinHint(String id) + { + this.id = id; + } + + @JsonCreator + public static JoinHint fromString(final String id) + { Review Comment: I don't think this should be serializable by jackson; if it gets on the wire it should be a `JoinAlgorithm` already ########## extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqNestedJoinHint.iq: ########## @@ -0,0 +1,859 @@ +!use druidtest://?componentSupplier=DrillWindowQueryMSQComponentSupplier +!set outputformat mysql + +select w1.cityName, w2.countryName +from +( + select w3.cityName AS cityName, w4.countryName AS countryName from wikipedia w3 LEFT JOIN wikipedia w4 ON w3.regionName = w4.regionName +) w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; + +[ { + "stageNumber" : 0, + "definition" : { + "id" : "<taskId>_0", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "regionName" ], + "context" : { + "scanSignature" : "[{\"name\":\"regionName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "regionName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 1, + "definition" : { + "id" : "<taskId>_1", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "cityName", "countryName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 2, + "definition" : { + "id" : "<taskId>_2", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "filterFields" : [ "cityName" ] + }, { + "type" : "stage", + "stage" : 0 + }, { + "type" : "stage", + "stage" : 1 + } ], + "broadcast" : [ 1, 2 ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "join", + "left" : { + "type" : "join", + "left" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 1 + }, + "rightPrefix" : "j0.", + "condition" : "(\"regionName\" == \"j0.regionName\")", + "joinType" : "LEFT", + "preferredJoinAlgorithm" : "broadcast" + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 2 + }, + "rightPrefix" : "_j0.", + "condition" : "(\"cityName\" == \"_j0.cityName\")", + "joinType" : "INNER", + "preferredJoinAlgorithm" : "broadcast" + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "v0", "_j0.countryName" ], + "context" : { + "__user" : null, + "finalize" : true, + "maxParseExceptions" : 0, + "scanSignature" : "[{\"name\":\"v0\",\"type\":\"STRING\"},{\"name\":\"_j0.countryName\",\"type\":\"STRING\"}]", + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false, + "windowFunctionOperatorTransformation" : true + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "v0", + "type" : "STRING" + }, { + "name" : "_j0.countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +} ] +!msqPlan + +select w1.cityName, w2.countryName +from +( + select /*+ broadcast */ w3.cityName AS cityName, w4.countryName AS countryName from wikipedia w3 LEFT JOIN wikipedia w4 ON w3.regionName = w4.regionName Review Comment: isn't this `broadcast` a no-op compared to the original plan? would it make sense to set the default to be sort-merge in the beggining to see the effect of this? ########## sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java: ########## @@ -502,4 +510,41 @@ void from(CalcitePlanner planner) + " to " + this); } } + + /** Define some tool members and methods for hints. */ + private static class HintTools + { + static final HintStrategyTable HINT_STRATEGY_TABLE = createHintStrategies(); + + /** + * Creates hint strategies. + * + * @return HintStrategyTable instance + */ + private static HintStrategyTable createHintStrategies() + { + return HintStrategyTable.builder() + .hintStrategy("broadcast", HintPredicates.JOIN) Review Comment: nit: is it really needed to burn in the `"broadcast"` literal here? ########## extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqNestedJoinHint.iq: ########## @@ -0,0 +1,859 @@ +!use druidtest://?componentSupplier=DrillWindowQueryMSQComponentSupplier +!set outputformat mysql + +select w1.cityName, w2.countryName +from +( + select w3.cityName AS cityName, w4.countryName AS countryName from wikipedia w3 LEFT JOIN wikipedia w4 ON w3.regionName = w4.regionName +) w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; + +[ { + "stageNumber" : 0, + "definition" : { + "id" : "<taskId>_0", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "regionName" ], + "context" : { + "scanSignature" : "[{\"name\":\"regionName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "regionName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 1, + "definition" : { + "id" : "<taskId>_1", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "cityName", "countryName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 2, + "definition" : { + "id" : "<taskId>_2", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "filterFields" : [ "cityName" ] + }, { + "type" : "stage", + "stage" : 0 + }, { + "type" : "stage", + "stage" : 1 + } ], + "broadcast" : [ 1, 2 ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "join", + "left" : { + "type" : "join", + "left" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 1 + }, + "rightPrefix" : "j0.", + "condition" : "(\"regionName\" == \"j0.regionName\")", + "joinType" : "LEFT", + "preferredJoinAlgorithm" : "broadcast" + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 2 + }, + "rightPrefix" : "_j0.", + "condition" : "(\"cityName\" == \"_j0.cityName\")", + "joinType" : "INNER", + "preferredJoinAlgorithm" : "broadcast" + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "v0", "_j0.countryName" ], + "context" : { + "__user" : null, + "finalize" : true, + "maxParseExceptions" : 0, + "scanSignature" : "[{\"name\":\"v0\",\"type\":\"STRING\"},{\"name\":\"_j0.countryName\",\"type\":\"STRING\"}]", + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false, + "windowFunctionOperatorTransformation" : true + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "v0", + "type" : "STRING" + }, { + "name" : "_j0.countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +} ] +!msqPlan + +select w1.cityName, w2.countryName +from +( + select /*+ broadcast */ w3.cityName AS cityName, w4.countryName AS countryName from wikipedia w3 LEFT JOIN wikipedia w4 ON w3.regionName = w4.regionName +) w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; + +LogicalJoin:[[broadcast inheritPath:[0]]] + +!hints + +[ { + "stageNumber" : 0, + "definition" : { + "id" : "<taskId>_0", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "regionName" ], + "context" : { + "scanSignature" : "[{\"name\":\"regionName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "regionName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 1, + "definition" : { + "id" : "<taskId>_1", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "cityName", "countryName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 2, + "definition" : { + "id" : "<taskId>_2", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "filterFields" : [ "cityName" ] + }, { + "type" : "stage", + "stage" : 0 + }, { + "type" : "stage", + "stage" : 1 + } ], + "broadcast" : [ 1, 2 ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "join", + "left" : { + "type" : "join", + "left" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 1 + }, + "rightPrefix" : "j0.", + "condition" : "(\"regionName\" == \"j0.regionName\")", + "joinType" : "LEFT", + "preferredJoinAlgorithm" : "broadcast" + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 2 + }, + "rightPrefix" : "_j0.", + "condition" : "(\"cityName\" == \"_j0.cityName\")", + "joinType" : "INNER", + "preferredJoinAlgorithm" : "broadcast" + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "v0", "_j0.countryName" ], + "context" : { + "__user" : null, + "finalize" : true, + "maxParseExceptions" : 0, + "scanSignature" : "[{\"name\":\"v0\",\"type\":\"STRING\"},{\"name\":\"_j0.countryName\",\"type\":\"STRING\"}]", + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false, + "windowFunctionOperatorTransformation" : true + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "v0", + "type" : "STRING" + }, { + "name" : "_j0.countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +} ] +!msqPlan + +select /*+ sort_merge */ w1.cityName, w2.countryName +from +( + select /*+ broadcast */ w3.cityName AS cityName, w4.countryName AS countryName from wikipedia w3 LEFT JOIN wikipedia w4 ON w3.regionName = w4.regionName +) w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; + +[ { + "stageNumber" : 0, + "definition" : { + "id" : "<taskId>_0", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "regionName" ], + "context" : { + "scanSignature" : "[{\"name\":\"regionName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "regionName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 1, + "definition" : { + "id" : "<taskId>_1", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, { + "type" : "stage", + "stage" : 0 + } ], + "broadcast" : [ 1 ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "join", + "left" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 1 + }, + "rightPrefix" : "j0.", + "condition" : "(\"regionName\" == \"j0.regionName\")", + "joinType" : "LEFT", + "preferredJoinAlgorithm" : "broadcast" Review Comment: this should be `joinAlgorithm` ########## processing/src/main/java/org/apache/druid/query/planning/PreJoinableClause.java: ########## @@ -38,18 +40,31 @@ public class PreJoinableClause private final DataSource dataSource; private final JoinType joinType; private final JoinConditionAnalysis condition; + private final JoinAlgorithm preferredJoinAlgorithm; public PreJoinableClause( Review Comment: inline this constructor ########## sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryUtils.java: ########## @@ -55,4 +58,20 @@ public static List<ColumnMapping> buildColumnMappings( return columnMappings; } + + public static JoinAlgorithm getJoinAlgorithm(Join join, PlannerContext plannerContext) + { + RelHint closestHint = null; + for (RelHint hint : join.getHints()) { + if (closestHint == null || hint.inheritPath.size() < closestHint.inheritPath.size()) { + closestHint = hint; + } Review Comment: this assumes that all hints are joinhints - which may not be true in the future ########## sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java: ########## @@ -502,4 +510,41 @@ void from(CalcitePlanner planner) + " to " + this); } } + + /** Define some tool members and methods for hints. */ + private static class HintTools + { + static final HintStrategyTable HINT_STRATEGY_TABLE = createHintStrategies(); + + /** + * Creates hint strategies. + * + * @return HintStrategyTable instance + */ + private static HintStrategyTable createHintStrategies() + { + return HintStrategyTable.builder() + .hintStrategy("broadcast", HintPredicates.JOIN) + .hintStrategy("sort_merge", HintPredicates.JOIN) + //.hintStrategy("sort_merge", HintPredicates.and(HintPredicates.JOIN, joinWithFixedTableName())) + .build(); + } + + /** Returns a {@link HintPredicate} for join with specified table references. */ + private static HintPredicate joinWithFixedTableName() Review Comment: unused method; comment a few lines above ########## processing/src/main/java/org/apache/druid/query/JoinDataSource.java: ########## @@ -96,6 +96,8 @@ public class JoinDataSource implements DataSource private final DimFilter leftFilter; @Nullable private final JoinableFactoryWrapper joinableFactoryWrapper; + @Nullable + private final JoinAlgorithm preferredJoinAlgorithm; Review Comment: rename to `joinAlgorithm` note: I feel like that in reality this field will never be `null` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
