This is an automated email from the ASF dual-hosted git repository. jcamacho pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 0884899 HIVE-22962: Reuse HiveRelFieldTrimmer instance across queries (Jesus Camacho Rodriguez, reviewed by Vineet Garg) 0884899 is described below commit 0884899a7ba3c6b488a78a95428188df5c611bee Author: Jesus Camacho Rodriguez <jcama...@apache.org> AuthorDate: Mon Mar 2 17:09:59 2020 -0800 HIVE-22962: Reuse HiveRelFieldTrimmer instance across queries (Jesus Camacho Rodriguez, reviewed by Vineet Garg) Close apache/hive#943 --- .../java/org/apache/hadoop/hive/cli/CliDriver.java | 4 +- .../hive/benchmark/calcite/FieldTrimmerBench.java | 242 +++++ .../calcite/HiveDefaultRelMetadataProvider.java | 57 +- .../ql/optimizer/calcite/HiveRelFactories.java | 3 +- .../calcite/rules/HiveFieldTrimmerRule.java | 5 +- .../optimizer/calcite/rules/HiveReflectUtil.java | 334 ++++++ .../calcite/rules/HiveRelFieldTrimmer.java | 132 ++- .../optimizer/calcite/rules/RelFieldTrimmer.java | 1130 ++++++++++++++++++++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 69 +- .../calcite/TestCBORuleFiredOnlyOnce.java | 2 +- .../apache/hive/service/server/HiveServer2.java | 4 +- 11 files changed, 1895 insertions(+), 87 deletions(-) diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index cfea602..0475264 100644 --- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -788,8 +788,8 @@ public class CliDriver { ss.updateThreadName(); - // Initialize metadata provider class - CalcitePlanner.initializeMetadataProviderClass(); + // Initialize metadata provider class and trimmer + CalcitePlanner.warmup(); // Create views registry HiveMaterializedViewsRegistry.get().init(); diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/calcite/FieldTrimmerBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/calcite/FieldTrimmerBench.java new file mode 100644 index 0000000..d98e251 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/calcite/FieldTrimmerBench.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.benchmark.calcite; + +import com.google.common.collect.Lists; +import java.util.concurrent.TimeUnit; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.BiRel; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelRecordType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.tools.RelBuilder; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * This test measures the performance for field trimmer. + * <p/> + * This test uses JMH framework for benchmarking. + * You may execute this benchmark tool using JMH command line in different ways: + * <p/> + * To use the settings shown in the main() function, use: + * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.calcite.FieldTrimmerBench + * <p/> + * To use the default settings used by JMH, use: + * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.calcite.FieldTrimmerBench + */ +@State(Scope.Thread) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Fork(1) +public class FieldTrimmerBench { + + RelOptCluster relOptCluster; + RelBuilder relBuilder; + RelNode root; + org.apache.calcite.sql2rel.RelFieldTrimmer cft; + HiveRelFieldTrimmer ft; + HiveRelFieldTrimmer hft; + + @Setup(Level.Trial) + public void initTrial() { + // Init cluster and builder + final RelOptPlanner planner = CalcitePlanner.createPlanner(new HiveConf()); + final RexBuilder rexBuilder = new RexBuilder( + new JavaTypeFactoryImpl(new HiveTypeSystemImpl())); + relOptCluster = RelOptCluster.create(planner, rexBuilder); + relBuilder = HiveRelFactories.HIVE_BUILDER.create(relOptCluster, null); + // Create operator tree + DummyNode0 d0 = new DummyNode0(relOptCluster, relOptCluster.traitSet()); + DummyNode1 d1 = new DummyNode1(relOptCluster, relOptCluster.traitSet()); + DummyNode2 d2 = new DummyNode2(relOptCluster, relOptCluster.traitSet()); + DummyNode3 d3 = new DummyNode3(relOptCluster, relOptCluster.traitSet()); + DummyNode4 d4 = new DummyNode4(relOptCluster, relOptCluster.traitSet(), d0); + DummyNode5 d5 = new DummyNode5(relOptCluster, relOptCluster.traitSet(), d1); + DummyNode6 d6 = new DummyNode6(relOptCluster, relOptCluster.traitSet(), d2); + DummyNode7 d7 = new DummyNode7(relOptCluster, relOptCluster.traitSet(), d3); + DummyNode8 d8 = new DummyNode8(relOptCluster, relOptCluster.traitSet(), d4, d5); + DummyNode9 d9 = new DummyNode9(relOptCluster, relOptCluster.traitSet(), d6, d7); + root = new DummyNode9(relOptCluster, relOptCluster.traitSet(), d8, d9); + } + + @Benchmark + @BenchmarkMode({Mode.Throughput, Mode.AverageTime}) + @Warmup(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 10, time = 2, timeUnit = TimeUnit.SECONDS) + public void baseRelFieldTrimmer() { + // We initialize the field trimmer for every execution of the benchmark + cft = new org.apache.calcite.sql2rel.RelFieldTrimmer(null, relBuilder); + cft.trim(root); + cft = null; + } + + @Benchmark + @BenchmarkMode({Mode.Throughput, Mode.AverageTime}) + @Warmup(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 10, time = 2, timeUnit = TimeUnit.SECONDS) + public void modBaseRelFieldTrimmer() { + // We initialize the field trimmer for every execution of the benchmark + ft = HiveRelFieldTrimmer.get(false, false); + ft.trim(relBuilder, root); + ft = null; + } + + @Benchmark + @BenchmarkMode({Mode.Throughput, Mode.AverageTime}) + @Warmup(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 10, time = 2, timeUnit = TimeUnit.SECONDS) + public void hiveRelFieldTrimmer() { + // We initialize the field trimmer for every execution of the benchmark + hft = HiveRelFieldTrimmer.get(false); + hft.trim(relBuilder, root); + hft = null; + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder().include(".*" + FieldTrimmerBench.class.getSimpleName() + + ".*").build(); + new Runner(opt).run(); + } + + // ~ 10 rel node classes to use in the benchmark. + + private class DummyNode0 extends AbstractRelNode { + protected DummyNode0(RelOptCluster cluster, RelTraitSet traits) { + super(cluster, cluster.traitSet()); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + private class DummyNode1 extends AbstractRelNode { + protected DummyNode1(RelOptCluster cluster, RelTraitSet traits) { + super(cluster, cluster.traitSet()); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + private class DummyNode2 extends AbstractRelNode { + protected DummyNode2(RelOptCluster cluster, RelTraitSet traits) { + super(cluster, cluster.traitSet()); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + private class DummyNode3 extends AbstractRelNode { + protected DummyNode3(RelOptCluster cluster, RelTraitSet traits) { + super(cluster, cluster.traitSet()); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + private class DummyNode4 extends SingleRel { + protected DummyNode4(RelOptCluster cluster, RelTraitSet traits, RelNode input) { + super(cluster, cluster.traitSet(), input); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + private class DummyNode5 extends SingleRel { + protected DummyNode5(RelOptCluster cluster, RelTraitSet traits, RelNode input) { + super(cluster, cluster.traitSet(), input); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + private class DummyNode6 extends SingleRel { + protected DummyNode6(RelOptCluster cluster, RelTraitSet traits, RelNode input) { + super(cluster, cluster.traitSet(), input); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + private class DummyNode7 extends SingleRel { + protected DummyNode7(RelOptCluster cluster, RelTraitSet traits, RelNode input) { + super(cluster, cluster.traitSet(), input); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + private class DummyNode8 extends BiRel { + protected DummyNode8(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right) { + super(cluster, cluster.traitSet(), left, right); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + private class DummyNode9 extends BiRel { + protected DummyNode9(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right) { + super(cluster, cluster.traitSet(), left, right); + } + + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index 7d55f64..7ad3214 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -94,53 +94,14 @@ public class HiveDefaultRelMetadataProvider { HiveRelMdPredicates.SOURCE, JaninoRelMetadataProvider.DEFAULT))); - /** - * This is the list of operators that are specifically used in Hive and - * should be loaded by the metadata providers. - */ - private static final List<Class<? extends RelNode>> HIVE_REL_NODE_CLASSES = - ImmutableList.of( - RelNode.class, - AbstractRelNode.class, - RelSubset.class, - HepRelVertex.class, - ConverterImpl.class, - AbstractConverter.class, - - HiveTableScan.class, - HiveAggregate.class, - HiveExcept.class, - HiveFilter.class, - HiveIntersect.class, - HiveJoin.class, - HiveMultiJoin.class, - HiveProject.class, - HiveRelNode.class, - HiveSemiJoin.class, - HiveSortExchange.class, - HiveSortLimit.class, - HiveTableFunctionScan.class, - HiveUnion.class, - - DruidQuery.class, - - HiveJdbcConverter.class, - JdbcHiveTableScan.class, - JdbcAggregate.class, - JdbcFilter.class, - JdbcJoin.class, - JdbcProject.class, - JdbcSort.class, - JdbcUnion.class); - private final RelMetadataProvider metadataProvider; - public HiveDefaultRelMetadataProvider(HiveConf hiveConf) { - this.metadataProvider = init(hiveConf); + public HiveDefaultRelMetadataProvider(HiveConf hiveConf, List<Class<? extends RelNode>> nodeClasses) { + this.metadataProvider = init(hiveConf, nodeClasses); } - private RelMetadataProvider init(HiveConf hiveConf) { + private RelMetadataProvider init(HiveConf hiveConf, List<Class<? extends RelNode>> nodeClasses) { // Create cost metadata provider if (HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) { @@ -167,7 +128,10 @@ public class HiveDefaultRelMetadataProvider { HiveRelMdPredicates.SOURCE, JaninoRelMetadataProvider.DEFAULT))); - metadataProvider.register(HIVE_REL_NODE_CLASSES); + if (nodeClasses != null) { + // If classes were passed, pre-register them + metadataProvider.register(nodeClasses); + } return metadataProvider; } @@ -184,11 +148,10 @@ public class HiveDefaultRelMetadataProvider { * additional Hive classes (compared to Calcite core classes) that may * be visited during the planning phase. */ - public static void initializeMetadataProviderClass() { + public static void initializeMetadataProviderClass(List<Class<? extends RelNode>> nodeClasses) { // This will register the classes in the default Janino implementation - JaninoRelMetadataProvider.DEFAULT.register( - HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES); + JaninoRelMetadataProvider.DEFAULT.register(nodeClasses); // This will register the classes in the default Hive implementation - DEFAULT.register(HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES); + DEFAULT.register(nodeClasses); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java index f71d3f0..04b3888 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java @@ -79,7 +79,8 @@ public class HiveRelFactories { public static final RelBuilderFactory HIVE_BUILDER = HiveRelBuilder.proto( - Contexts.of(HIVE_PROJECT_FACTORY, + Contexts.of( + HIVE_PROJECT_FACTORY, HIVE_FILTER_FACTORY, HIVE_JOIN_FACTORY, HIVE_SEMI_JOIN_FACTORY, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java index ac050df..73ff1bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java @@ -63,9 +63,8 @@ public class HiveFieldTrimmerRule extends RelOptRule { final HepPlanner tmpPlanner = new HepPlanner(PROGRAM); tmpPlanner.setRoot(node); node = tmpPlanner.findBestExp(); - final HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, - relBuilderFactory.create(node.getCluster(), null), fetchStats); - call.transformTo(fieldTrimmer.trim(node)); + call.transformTo( + HiveRelFieldTrimmer.get(fetchStats).trim(call.builder(), node)); triggered = true; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReflectUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReflectUtil.java new file mode 100644 index 0000000..5e327da --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReflectUtil.java @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import com.google.common.collect.ImmutableList; +import java.lang.invoke.CallSite; +import java.lang.invoke.LambdaMetafactory; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import org.apache.calcite.util.ReflectUtil; +import org.apache.calcite.util.ReflectUtil.MethodDispatcher; +import org.apache.calcite.util.ReflectiveVisitor; + +/** + * Static utilities for Java reflection. This is based on Calcite + * {@link ReflectUtil}. It contains methods to wrap a Calcite dispatcher + * (based on reflection) into a Hive dispatcher as well as a Hive + * dispatcher implementation based on {@link LambdaMetafactory}. + */ +public class HiveReflectUtil { + + /** + * Creates a Hive dispatcher that wraps a Calcite one. + */ + protected static <T, E> MethodDispatcherWrapper<T, E> createCalciteMethodDispatcherWrapper( + final MethodDispatcher<T> methodDispatcher) { + return new MethodDispatcherWrapper<>(methodDispatcher); + } + + /** + * Creates a dispatcher for calls to a single multi-method on a particular + * object. + * + * <p>Calls to that multi-method are resolved by looking for a method on + * the runtime type of that object, with the required name, and with + * the correct type or a subclass for the first argument, and precisely the + * same types for other arguments. + * + * <p>For instance, a dispatcher created for the method + * + * <blockquote>String foo(Vehicle, int, List)</blockquote> + * + * <p>could be used to call the methods + * + * <blockquote>String foo(Car, int, List)<br> + * String foo(Bus, int, List)</blockquote> + * + * <p>(because Car and Bus are subclasses of Vehicle, and they occur in the + * polymorphic first argument) but not the method + * + * <blockquote>String foo(Car, int, ArrayList)</blockquote> + * + * <p>(only the first argument is polymorphic). + * + * <p>You must create an implementation of the method for the base class. + * Otherwise throws {@link IllegalArgumentException}. + * + * @param returnClazz Return type of method + * @param visitor Object on which to invoke the method + * @param methodName Name of method + * @param arg0Clazz Base type of argument zero + * @param otherArgClasses Types of remaining arguments + */ + protected static <E, T> HiveMethodDispatcher<T, E> createMethodDispatcher( + final Class<T> returnClazz, + final ReflectiveVisitor visitor, + final String methodName, + final Class<E> arg0Clazz, + final Class... otherArgClasses) { + final List<Class> otherArgClassList = + ImmutableList.copyOf(otherArgClasses); + final VisitDispatcher<ReflectiveVisitor, E> dispatcher = + createDispatcher((Class<ReflectiveVisitor>) visitor.getClass(), arg0Clazz); + return new HiveMethodDispatcher<>(dispatcher, returnClazz, visitor, methodName, + arg0Clazz, otherArgClassList); + } + + /** + * Creates a dispatcher for calls to {@link VisitDispatcher#lookupVisitFunc}. The + * dispatcher caches methods between invocations and it is thread-safe. + * + * @param visitorBaseClazz Visitor base class + * @param visiteeBaseClazz Visitee base class + * @return cache of methods + */ + private static <R extends ReflectiveVisitor, E> VisitDispatcher<R, E> createDispatcher( + final Class<R> visitorBaseClazz, + final Class<E> visiteeBaseClazz) { + assert ReflectiveVisitor.class.isAssignableFrom(visitorBaseClazz); + assert Object.class.isAssignableFrom(visiteeBaseClazz); + return new VisitDispatcher<>(); + } + + private static Class<? extends VarArgsFunc> getVarArgsFuncClass(int length) { + switch (length) { + case 1: + return VarArgsFunc1.class; + case 2: + return VarArgsFunc2.class; + case 3: + return VarArgsFunc3.class; + case 4: + return VarArgsFunc4.class; + default: + throw new RuntimeException("Unsupported function with length " + length); + } + } + + private static VarArgsFunc getVarArgsFunc(int length, CallSite site) throws Throwable { + switch (length) { + case 1: + return (VarArgsFunc1) site.getTarget().invokeExact(); + case 2: + return (VarArgsFunc2) site.getTarget().invokeExact(); + case 3: + return (VarArgsFunc3) site.getTarget().invokeExact(); + case 4: + return (VarArgsFunc4) site.getTarget().invokeExact(); + default: + throw new RuntimeException("Unsupported function with length " + length); + } + } + + protected static class VisitDispatcher<R extends ReflectiveVisitor, E> { + final Map<List<Object>, VarArgsFunc> map = new ConcurrentHashMap<>(); + + public VarArgsFunc lookupVisitFunc( + Class<? extends R> visitorClass, + Class<? extends E> visiteeClass, + String visitMethodName, + List<Class> additionalParameterTypes) + throws Throwable { + final List<Object> key = + ImmutableList.of( + visitorClass, + visiteeClass, + visitMethodName, + additionalParameterTypes); + VarArgsFunc method = map.get(key); + if (method == null) { + if (map.containsKey(key)) { + // We already looked for the method and found nothing. + } else { + Method method1 = + ReflectUtil.lookupVisitMethod( + visitorClass, + visiteeClass, + visitMethodName, + additionalParameterTypes); + MethodHandles.Lookup lookup = MethodHandles.lookup(); + MethodHandle methodHandle = lookup.unreflect(method1); + int argsLength = 1 + method1.getParameterTypes().length; + MethodType invokedType = MethodType.methodType( + getVarArgsFuncClass(argsLength)); + MethodType functionMethodType = MethodType.methodType( + method1.getReturnType(), visitorClass, method1.getParameterTypes()); + CallSite site = LambdaMetafactory.metafactory( + lookup, + "apply", + invokedType, + functionMethodType.generic(), + methodHandle, + methodHandle.type()); + method = getVarArgsFunc(argsLength, site); + map.put(key, method); + } + } + return method; + } + } + + protected static class HiveMethodDispatcher<T, E> implements ClassMethodDispatcher<T, E> { + + private final VisitDispatcher<ReflectiveVisitor, E> dispatcher; + private final Class<T> returnClazz; + private final ReflectiveVisitor visitor; + private final String methodName; + private final Class<E> arg0Clazz; + private final List<Class> otherArgClassList; + + public HiveMethodDispatcher ( + final VisitDispatcher<ReflectiveVisitor, E> dispatcher, + final Class<T> returnClazz, + final ReflectiveVisitor visitor, + final String methodName, + final Class<E> arg0Clazz, + final List<Class> otherArgClassList) { + this.dispatcher = dispatcher; + this.returnClazz = returnClazz; + this.visitor = visitor; + this.methodName = methodName; + this.arg0Clazz = arg0Clazz; + this.otherArgClassList = otherArgClassList; + } + + @Override + public T invoke(Object... args) { + VarArgsFunc method = null; + try { + method = lookupVisitFunc(args[0]); + final Object o = method.apply(visitor, args[0], args[1], args[2]); + return returnClazz.cast(o); + } catch (Throwable e) { + throw new RuntimeException("While invoking method " + + (method != null ? "'" + method + "'" : ""), + e); + } + } + + private VarArgsFunc lookupVisitFunc(final Object arg0) throws Throwable { + if (!arg0Clazz.isInstance(arg0)) { + throw new IllegalArgumentException(); + } + VarArgsFunc method = + dispatcher.lookupVisitFunc( + visitor.getClass(), + (Class<? extends E>) arg0.getClass(), + methodName, + otherArgClassList); + if (method == null) { + List<Class> classList = new ArrayList<>(); + classList.add(arg0Clazz); + classList.addAll(otherArgClassList); + throw new IllegalArgumentException("Method not found: " + methodName + + "(" + classList + ")"); + } + return method; + } + + @Override + public void register(Iterable<Class<? extends E>> classes) + throws Throwable { + for (Class<? extends E> c : classes) { + VarArgsFunc method = + dispatcher.lookupVisitFunc( + visitor.getClass(), + c, + methodName, + otherArgClassList); + if (method == null) { + List<Class> classList = new ArrayList<>(); + classList.add(arg0Clazz); + classList.addAll(otherArgClassList); + throw new IllegalArgumentException("Method not found: " + methodName + + "(" + classList + ")"); + } + } + } + } + + private static class MethodDispatcherWrapper<T, E> implements ClassMethodDispatcher<T, E> { + + private final MethodDispatcher<T> methodDispatcher; + + public MethodDispatcherWrapper ( + final MethodDispatcher<T> methodDispatcher) { + this.methodDispatcher = methodDispatcher; + } + + @Override + public T invoke(Object... args) { + return this.methodDispatcher.invoke(args); + } + } + + public interface ClassMethodDispatcher<T, E> extends MethodDispatcher<T> { + default void register(Iterable<Class<? extends E>> classes) throws Throwable { + // Do nothing by default + } + } + + @FunctionalInterface + private interface VarArgsFunc1<T, R> extends VarArgsFunc<R> { + default R apply(Object... args) { + return apply((T) args[0]); + } + + R apply(T t); + } + + @FunctionalInterface + private interface VarArgsFunc2<T, U, R> extends VarArgsFunc<R> { + default R apply(Object... args) { + return apply((T) args[0], (U) args[1]); + } + + R apply(T t, U u); + } + + @FunctionalInterface + private interface VarArgsFunc3<T, U, V, R> extends VarArgsFunc<R> { + default R apply(Object... args) { + return apply((T) args[0], (U) args[1], (V) args[2]); + } + + R apply(T t, U u, V v); + } + + @FunctionalInterface + private interface VarArgsFunc4<T, U, V, W, R> extends VarArgsFunc<R> { + default R apply(Object... args) { + return apply((T) args[0], (U) args[1], (V) args[2], (W) args[3]); + } + + R apply(T t, U u, V v, W w); + } + + private interface VarArgsFunc<R> { + default R apply(Object... args) { + throw new UnsupportedOperationException(); + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index d218fac..53d68e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -52,9 +52,7 @@ import org.apache.calcite.rex.RexTableInputRef; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitor; import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql2rel.CorrelationReferenceFinder; -import org.apache.calcite.sql2rel.RelFieldTrimmer; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; @@ -81,29 +79,92 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { protected static final Logger LOG = LoggerFactory.getLogger(HiveRelFieldTrimmer.class); - private ColumnAccessInfo columnAccessInfo; - private Map<HiveProject, Table> viewProjectToTableSchema; - private final RelBuilder relBuilder; + // We initialize the field trimmer statically here and we will reuse it across + // queries. The reason is that otherwise we will create a new dispatcher with + // each instantiation, thus effectively removing the caching mechanism that is + // built within the dispatcher. + private static final HiveRelFieldTrimmer FIELD_TRIMMER_STATS = + new HiveRelFieldTrimmer(true); + private static final HiveRelFieldTrimmer FIELD_TRIMMER_NO_STATS = + new HiveRelFieldTrimmer(false); + // For testing + private static final HiveRelFieldTrimmer FIELD_TRIMMER_STATS_METHOD_DISPATCHER = + new HiveRelFieldTrimmer(true, false); + private static final HiveRelFieldTrimmer FIELD_TRIMMER_NO_STATS_METHOD_DISPATCHER = + new HiveRelFieldTrimmer(false, false); + private final boolean fetchStats; - public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder) { - this(validator, relBuilder, false); - } + private static final ThreadLocal<ColumnAccessInfo> COLUMN_ACCESS_INFO = + new ThreadLocal<>(); + private static final ThreadLocal<Map<HiveProject, Table>> VIEW_PROJECT_TO_TABLE_SCHEMA = + new ThreadLocal<>(); - public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, - ColumnAccessInfo columnAccessInfo, Map<HiveProject, Table> viewToTableSchema) { - this(validator, relBuilder, false); - this.columnAccessInfo = columnAccessInfo; - this.viewProjectToTableSchema = viewToTableSchema; + + private HiveRelFieldTrimmer(boolean fetchStats) { + this(fetchStats, true); } - public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, boolean fetchStats) { - super(validator, relBuilder); - this.relBuilder = relBuilder; + private HiveRelFieldTrimmer(boolean fetchStats, boolean useLMFBasedDispatcher) { + super(useLMFBasedDispatcher); this.fetchStats = fetchStats; } /** + * Returns a HiveRelFieldTrimmer instance that does not retrieve + * stats. + */ + public static HiveRelFieldTrimmer get() { + return get(false); + } + + /** + * Returns a HiveRelFieldTrimmer instance that can retrieve stats. + */ + public static HiveRelFieldTrimmer get(boolean fetchStats) { + return get(fetchStats, true); + } + + /** + * Returns a HiveRelFieldTrimmer instance that can retrieve stats and use + * a custom dispatcher. + */ + public static HiveRelFieldTrimmer get(boolean fetchStats, boolean useLMFBasedDispatcher) { + return fetchStats ? + (useLMFBasedDispatcher ? FIELD_TRIMMER_STATS : FIELD_TRIMMER_STATS_METHOD_DISPATCHER) : + (useLMFBasedDispatcher ? FIELD_TRIMMER_NO_STATS : FIELD_TRIMMER_NO_STATS_METHOD_DISPATCHER); + } + + /** + * Trims unused fields from a relational expression. + * + * <p>We presume that all fields of the relational expression are wanted by + * its consumer, so only trim fields that are not used within the tree. + * + * @param root Root node of relational expression + * @return Trimmed relational expression + */ + @Override + public RelNode trim(RelBuilder relBuilder, RelNode root) { + return trim(relBuilder, root, null, null); + } + + public RelNode trim(RelBuilder relBuilder, RelNode root, + ColumnAccessInfo columnAccessInfo, Map<HiveProject, Table> viewToTableSchema) { + try { + // Set local thread variables + COLUMN_ACCESS_INFO.set(columnAccessInfo); + VIEW_PROJECT_TO_TABLE_SCHEMA.set(viewToTableSchema); + // Execute pruning + return super.trim(relBuilder, root); + } finally { + // Always remove the local thread variables to avoid leaks + COLUMN_ACCESS_INFO.remove(); + VIEW_PROJECT_TO_TABLE_SCHEMA.remove(); + } + } + + /** * Trims the fields of an input relational expression. * * @param rel Relational expression @@ -251,7 +312,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { return trimFields( (RelNode) dq, fieldsUsed, extraFields); } - final RelNode newTableAccessRel = project(dq, fieldsUsed, extraFields, relBuilder); + final RelNode newTableAccessRel = project(dq, fieldsUsed, extraFields, REL_BUILDER.get()); // Some parts of the system can't handle rows with zero fields, so // pretend that one field is used. @@ -512,6 +573,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { newProjects.add(rexBuilder.makeInputRef(input, i)); } } + final RelBuilder relBuilder = REL_BUILDER.get(); relBuilder.push(input); relBuilder.project(newProjects); Aggregate newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(), @@ -641,6 +703,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { } // Now create new agg calls, and populate mapping for them. + final RelBuilder relBuilder = REL_BUILDER.get(); relBuilder.push(newInput); final List<RelBuilder.AggCall> newAggCallList = new ArrayList<>(); j = originalGroupCount; // because lookup in fieldsUsed is done using original group count @@ -675,12 +738,14 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) { // set columnAccessInfo for ViewColumnAuthorization - if (this.columnAccessInfo != null && this.viewProjectToTableSchema != null - && this.viewProjectToTableSchema.containsKey(project)) { + final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get(); + final Map<HiveProject, Table> viewProjectToTableSchema = VIEW_PROJECT_TO_TABLE_SCHEMA.get(); + if (columnAccessInfo != null && viewProjectToTableSchema != null + && viewProjectToTableSchema.containsKey(project)) { for (Ord<RexNode> ord : Ord.zip(project.getProjects())) { if (fieldsUsed.get(ord.i)) { - Table tab = this.viewProjectToTableSchema.get(project); - this.columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName()); + Table tab = viewProjectToTableSchema.get(project); + columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName()); } } } @@ -690,7 +755,8 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) { final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields); - if (this.columnAccessInfo != null) { + final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get(); + if (columnAccessInfo != null) { // Store information about column accessed by the table so it can be used // to send only this information for column masking final RelOptHiveTable tab = (RelOptHiveTable) tableAccessRel.getTable(); @@ -789,4 +855,26 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { Mapping mapping = Mappings.createIdentity(fieldCount); return result(newTabFun, mapping); } + + /** + * This method can be called to pre-register all the classes that may be + * visited during the planning phase. + */ + protected void register(List<Class<? extends RelNode>> nodeClasses) throws Throwable { + this.trimFieldsDispatcher.register(nodeClasses); + } + + /** + * This method can be called at startup time to pre-register all the + * Hive classes that may be visited during the planning phase. + */ + public static void initializeFieldTrimmerClass(List<Class<? extends RelNode>> nodeClasses) { + try { + FIELD_TRIMMER_STATS.register(nodeClasses); + FIELD_TRIMMER_NO_STATS.register(nodeClasses); + } catch (Throwable t) { + // LOG it but do not fail + LOG.warn("Error initializing field trimmer instance", t); + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java new file mode 100644 index 0000000..77cf75b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java @@ -0,0 +1,1130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.CorrelationId; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.logical.LogicalTableFunctionScan; +import org.apache.calcite.rel.logical.LogicalTableModify; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rel.type.RelDataTypeImpl; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexDynamicParam; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexPermuteInputsShuttle; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitor; +import org.apache.calcite.sql.SqlExplainFormat; +import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.sql2rel.CorrelationReferenceFinder; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.Bug; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.ReflectUtil; +import org.apache.calcite.util.ReflectiveVisitor; +import org.apache.calcite.util.Util; +import org.apache.calcite.util.mapping.IntPair; +import org.apache.calcite.util.mapping.Mapping; +import org.apache.calcite.util.mapping.MappingType; +import org.apache.calcite.util.mapping.Mappings; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class comes from Calcite almost as-is. The only change concerns + * the dispatcher and the builder, so the trimmer is thread-safe and can + * be reused across different queries. Definition follows. + * + * <p>Transformer that walks over a tree of relational expressions, replacing each + * {@link RelNode} with a 'slimmed down' relational expression that projects + * only the columns required by its consumer. + * + * <p>Uses multi-methods to fire the right rule for each type of relational + * expression. This allows the transformer to be extended without having to + * add a new method to RelNode, and without requiring a collection of rule + * classes scattered to the four winds. + */ +public class RelFieldTrimmer implements ReflectiveVisitor { + //~ Static fields/initializers --------------------------------------------- + + private static final Logger LOG = LoggerFactory.getLogger(RelFieldTrimmer.class); + + protected static final ThreadLocal<RelBuilder> REL_BUILDER = + new ThreadLocal<>(); + + //~ Instance fields -------------------------------------------------------- + + protected final HiveReflectUtil.ClassMethodDispatcher<TrimResult, RelNode> trimFieldsDispatcher; + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates a RelFieldTrimmer. + * @param useLMFBasedDispatcher True if we want to create a dispatcher based on + * {@link java.lang.invoke.LambdaMetafactory} that is + * thread-safe, or false if we want to create a + * Calcite dispatcher based on reflection that is not + * thread-safe. False should only be used for + * testing/benchmarking purposes + */ + protected RelFieldTrimmer(boolean useLMFBasedDispatcher) { + if (useLMFBasedDispatcher) { + this.trimFieldsDispatcher = + HiveReflectUtil.createMethodDispatcher( + TrimResult.class, + this, + "trimFields", + RelNode.class, + ImmutableBitSet.class, + Set.class); + } else { + this.trimFieldsDispatcher = + HiveReflectUtil.createCalciteMethodDispatcherWrapper( + ReflectUtil.createMethodDispatcher( + TrimResult.class, + this, + "trimFields", + RelNode.class, + ImmutableBitSet.class, + Set.class)); + } + } + + //~ Methods ---------------------------------------------------------------- + + /** + * Trims unused fields from a relational expression. + * + * <p>We presume that all fields of the relational expression are wanted by + * its consumer, so only trim fields that are not used within the tree. + * + * @param relBuilder Rel builder + * @param root Root node of relational expression + * @return Trimmed relational expression + */ + public RelNode trim(RelBuilder relBuilder, RelNode root) { + try { + REL_BUILDER.set(relBuilder); + final int fieldCount = root.getRowType().getFieldCount(); + final ImmutableBitSet fieldsUsed = ImmutableBitSet.range(fieldCount); + final Set<RelDataTypeField> extraFields = Collections.emptySet(); + final TrimResult trimResult = + dispatchTrimFields(root, fieldsUsed, extraFields); + if (!trimResult.right.isIdentity()) { + throw new IllegalArgumentException(); + } + if (LOG.isDebugEnabled()) { + LOG.debug( + RelOptUtil.dumpPlan("Plan after trimming unused fields", + trimResult.left, SqlExplainFormat.TEXT, + SqlExplainLevel.EXPPLAN_ATTRIBUTES)); + } + return trimResult.left; + } finally { + REL_BUILDER.remove(); + } + } + + /** + * Trims the fields of an input relational expression. + * + * @param rel Relational expression + * @param input Input relational expression, whose fields to trim + * @param fieldsUsed Bitmap of fields needed by the consumer + * @return New relational expression and its field mapping + */ + protected TrimResult trimChild( + RelNode rel, + RelNode input, + final ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final ImmutableBitSet.Builder fieldsUsedBuilder = fieldsUsed.rebuild(); + + // Fields that define the collation cannot be discarded. + final RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); + final ImmutableList<RelCollation> collations = mq.collations(input); + for (RelCollation collation : collations) { + for (RelFieldCollation fieldCollation : collation.getFieldCollations()) { + fieldsUsedBuilder.set(fieldCollation.getFieldIndex()); + } + } + + // Correlating variables are a means for other relational expressions to use + // fields. + for (final CorrelationId correlation : rel.getVariablesSet()) { + rel.accept( + new CorrelationReferenceFinder() { + protected RexNode handle(RexFieldAccess fieldAccess) { + final RexCorrelVariable v = + (RexCorrelVariable) fieldAccess.getReferenceExpr(); + if (v.id.equals(correlation)) { + fieldsUsedBuilder.set(fieldAccess.getField().getIndex()); + } + return fieldAccess; + } + }); + } + + return dispatchTrimFields(input, fieldsUsedBuilder.build(), extraFields); + } + + /** + * Trims a child relational expression, then adds back a dummy project to + * restore the fields that were removed. + * + * <p>Sounds pointless? It causes unused fields to be removed + * further down the tree (towards the leaves), but it ensure that the + * consuming relational expression continues to see the same fields. + * + * @param rel Relational expression + * @param input Input relational expression, whose fields to trim + * @param fieldsUsed Bitmap of fields needed by the consumer + * @return New relational expression and its field mapping + */ + protected TrimResult trimChildRestore( + RelNode rel, + RelNode input, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + TrimResult trimResult = trimChild(rel, input, fieldsUsed, extraFields); + if (trimResult.right.isIdentity()) { + return trimResult; + } + final RelDataType rowType = input.getRowType(); + List<RelDataTypeField> fieldList = rowType.getFieldList(); + final List<RexNode> exprList = new ArrayList<>(); + final List<String> nameList = rowType.getFieldNames(); + RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); + assert trimResult.right.getSourceCount() == fieldList.size(); + for (int i = 0; i < fieldList.size(); i++) { + int source = trimResult.right.getTargetOpt(i); + RelDataTypeField field = fieldList.get(i); + exprList.add( + source < 0 + ? rexBuilder.makeZeroLiteral(field.getType()) + : rexBuilder.makeInputRef(field.getType(), source)); + } + final RelBuilder relBuilder = REL_BUILDER.get(); + relBuilder.push(trimResult.left) + .project(exprList, nameList); + return result(relBuilder.build(), + Mappings.createIdentity(fieldList.size())); + } + + /** + * Invokes {@link #trimFields}, or the appropriate method for the type + * of the rel parameter, using multi-method dispatch. + * + * @param rel Relational expression + * @param fieldsUsed Bitmap of fields needed by the consumer + * @return New relational expression and its field mapping + */ + protected final TrimResult dispatchTrimFields( + RelNode rel, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final TrimResult trimResult = + trimFieldsDispatcher.invoke(rel, fieldsUsed, extraFields); + final RelNode newRel = trimResult.left; + final Mapping mapping = trimResult.right; + final int fieldCount = rel.getRowType().getFieldCount(); + assert mapping.getSourceCount() == fieldCount + : "source: " + mapping.getSourceCount() + " != " + fieldCount; + final int newFieldCount = newRel.getRowType().getFieldCount(); + assert mapping.getTargetCount() + extraFields.size() == newFieldCount + || Bug.TODO_FIXED + : "target: " + mapping.getTargetCount() + + " + " + extraFields.size() + + " != " + newFieldCount; + if (Bug.TODO_FIXED) { + assert newFieldCount > 0 : "rel has no fields after trim: " + rel; + } + if (newRel.equals(rel)) { + return result(rel, mapping); + } + return trimResult; + } + + protected TrimResult result(RelNode r, final Mapping mapping) { + final RelBuilder relBuilder = REL_BUILDER.get(); + final RexBuilder rexBuilder = relBuilder.getRexBuilder(); + for (final CorrelationId correlation : r.getVariablesSet()) { + r = r.accept( + new CorrelationReferenceFinder() { + protected RexNode handle(RexFieldAccess fieldAccess) { + final RexCorrelVariable v = + (RexCorrelVariable) fieldAccess.getReferenceExpr(); + if (v.id.equals(correlation) + && v.getType().getFieldCount() == mapping.getSourceCount()) { + final int old = fieldAccess.getField().getIndex(); + final int new_ = mapping.getTarget(old); + final RelDataTypeFactory.Builder typeBuilder = + relBuilder.getTypeFactory().builder(); + for (int target : Util.range(mapping.getTargetCount())) { + typeBuilder.add( + v.getType().getFieldList().get(mapping.getSource(target))); + } + final RexNode newV = + rexBuilder.makeCorrel(typeBuilder.build(), v.id); + if (old != new_) { + return rexBuilder.makeFieldAccess(newV, new_); + } + } + return fieldAccess; + } + }); + } + return new TrimResult(r, mapping); + } + + /** + * Visit method, per {@link org.apache.calcite.util.ReflectiveVisitor}. + * + * <p>This method is invoked reflectively, so there may not be any apparent + * calls to it. The class (or derived classes) may contain overloads of + * this method with more specific types for the {@code rel} parameter. + * + * <p>Returns a pair: the relational expression created, and the mapping + * between the original fields and the fields of the newly created + * relational expression. + * + * @param rel Relational expression + * @param fieldsUsed Fields needed by the consumer + * @return relational expression and mapping + */ + public TrimResult trimFields( + RelNode rel, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + // We don't know how to trim this kind of relational expression, so give + // it back intact. + Util.discard(fieldsUsed); + return result(rel, + Mappings.createIdentity(rel.getRowType().getFieldCount())); + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalProject}. + */ + public TrimResult trimFields( + Project project, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final RelDataType rowType = project.getRowType(); + final int fieldCount = rowType.getFieldCount(); + final RelNode input = project.getInput(); + + // Which fields are required from the input? + final Set<RelDataTypeField> inputExtraFields = + new LinkedHashSet<>(extraFields); + RelOptUtil.InputFinder inputFinder = + new RelOptUtil.InputFinder(inputExtraFields); + for (Ord<RexNode> ord : Ord.zip(project.getProjects())) { + if (fieldsUsed.get(ord.i)) { + ord.e.accept(inputFinder); + } + } + ImmutableBitSet inputFieldsUsed = inputFinder.inputBitSet.build(); + + // Create input with trimmed columns. + TrimResult trimResult = + trimChild(project, input, inputFieldsUsed, inputExtraFields); + RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + // If the input is unchanged, and we need to project all columns, + // there's nothing we can do. + if (newInput == input + && fieldsUsed.cardinality() == fieldCount) { + return result(project, Mappings.createIdentity(fieldCount)); + } + + // Some parts of the system can't handle rows with zero fields, so + // pretend that one field is used. + if (fieldsUsed.cardinality() == 0) { + return dummyProject(fieldCount, newInput); + } + + // Build new project expressions, and populate the mapping. + final List<RexNode> newProjects = new ArrayList<>(); + final RexVisitor<RexNode> shuttle = + new RexPermuteInputsShuttle( + inputMapping, newInput); + final Mapping mapping = + Mappings.create( + MappingType.INVERSE_SURJECTION, + fieldCount, + fieldsUsed.cardinality()); + for (Ord<RexNode> ord : Ord.zip(project.getProjects())) { + if (fieldsUsed.get(ord.i)) { + mapping.set(ord.i, newProjects.size()); + RexNode newProjectExpr = ord.e.accept(shuttle); + newProjects.add(newProjectExpr); + } + } + + final RelDataType newRowType = + RelOptUtil.permute(project.getCluster().getTypeFactory(), rowType, + mapping); + + final RelBuilder relBuilder = REL_BUILDER.get(); + relBuilder.push(newInput); + relBuilder.project(newProjects, newRowType.getFieldNames()); + return result(relBuilder.build(), mapping); + } + + /** Creates a project with a dummy column, to protect the parts of the system + * that cannot handle a relational expression with no columns. + * + * @param fieldCount Number of fields in the original relational expression + * @param input Trimmed input + * @return Dummy project, or null if no dummy is required + */ + protected TrimResult dummyProject(int fieldCount, RelNode input) { + final RelOptCluster cluster = input.getCluster(); + final Mapping mapping = + Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, 1); + if (input.getRowType().getFieldCount() == 1) { + // Input already has one field (and may in fact be a dummy project we + // created for the child). We can't do better. + return result(input, mapping); + } + final RexLiteral expr = + cluster.getRexBuilder().makeExactLiteral(BigDecimal.ZERO); + final RelBuilder relBuilder = REL_BUILDER.get(); + relBuilder.push(input); + relBuilder.project(ImmutableList.<RexNode>of(expr), ImmutableList.of("DUMMY")); + return result(relBuilder.build(), mapping); + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalFilter}. + */ + public TrimResult trimFields( + Filter filter, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final RelDataType rowType = filter.getRowType(); + final int fieldCount = rowType.getFieldCount(); + final RexNode conditionExpr = filter.getCondition(); + final RelNode input = filter.getInput(); + + // We use the fields used by the consumer, plus any fields used in the + // filter. + final Set<RelDataTypeField> inputExtraFields = + new LinkedHashSet<>(extraFields); + RelOptUtil.InputFinder inputFinder = + new RelOptUtil.InputFinder(inputExtraFields); + inputFinder.inputBitSet.addAll(fieldsUsed); + conditionExpr.accept(inputFinder); + final ImmutableBitSet inputFieldsUsed = inputFinder.inputBitSet.build(); + + // Create input with trimmed columns. + TrimResult trimResult = + trimChild(filter, input, inputFieldsUsed, inputExtraFields); + RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + // If the input is unchanged, and we need to project all columns, + // there's nothing we can do. + if (newInput == input + && fieldsUsed.cardinality() == fieldCount) { + return result(filter, Mappings.createIdentity(fieldCount)); + } + + // Build new project expressions, and populate the mapping. + final RexVisitor<RexNode> shuttle = + new RexPermuteInputsShuttle(inputMapping, newInput); + RexNode newConditionExpr = + conditionExpr.accept(shuttle); + + // Build new filter with trimmed input and condition. + final RelBuilder relBuilder = REL_BUILDER.get(); + relBuilder.push(newInput) + .filter(filter.getVariablesSet(), newConditionExpr); + + // The result has the same mapping as the input gave us. Sometimes we + // return fields that the consumer didn't ask for, because the filter + // needs them for its condition. + return result(relBuilder.build(), inputMapping); + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.core.Sort}. + */ + public TrimResult trimFields( + Sort sort, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final RelDataType rowType = sort.getRowType(); + final int fieldCount = rowType.getFieldCount(); + final RelCollation collation = sort.getCollation(); + final RelNode input = sort.getInput(); + + // We use the fields used by the consumer, plus any fields used as sort + // keys. + final ImmutableBitSet.Builder inputFieldsUsed = fieldsUsed.rebuild(); + for (RelFieldCollation field : collation.getFieldCollations()) { + inputFieldsUsed.set(field.getFieldIndex()); + } + + // Create input with trimmed columns. + final Set<RelDataTypeField> inputExtraFields = Collections.emptySet(); + TrimResult trimResult = + trimChild(sort, input, inputFieldsUsed.build(), inputExtraFields); + RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + // If the input is unchanged, and we need to project all columns, + // there's nothing we can do. + if (newInput == input + && inputMapping.isIdentity() + && fieldsUsed.cardinality() == fieldCount) { + return result(sort, Mappings.createIdentity(fieldCount)); + } + + // leave the Sort unchanged in case we have dynamic limits + if (sort.offset instanceof RexDynamicParam + || sort.fetch instanceof RexDynamicParam) { + return result(sort, inputMapping); + } + + final RelBuilder relBuilder = REL_BUILDER.get(); + relBuilder.push(newInput); + final int offset = + sort.offset == null ? 0 : RexLiteral.intValue(sort.offset); + final int fetch = + sort.fetch == null ? -1 : RexLiteral.intValue(sort.fetch); + final ImmutableList<RexNode> fields = + relBuilder.fields(RexUtil.apply(inputMapping, collation)); + relBuilder.sortLimit(offset, fetch, fields); + + // The result has the same mapping as the input gave us. Sometimes we + // return fields that the consumer didn't ask for, because the filter + // needs them for its condition. + return result(relBuilder.build(), inputMapping); + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalJoin}. + */ + public TrimResult trimFields( + Join join, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final int fieldCount = join.getSystemFieldList().size() + + join.getLeft().getRowType().getFieldCount() + + join.getRight().getRowType().getFieldCount(); + final RexNode conditionExpr = join.getCondition(); + final int systemFieldCount = join.getSystemFieldList().size(); + + // Add in fields used in the condition. + final Set<RelDataTypeField> combinedInputExtraFields = + new LinkedHashSet<>(extraFields); + RelOptUtil.InputFinder inputFinder = + new RelOptUtil.InputFinder(combinedInputExtraFields); + inputFinder.inputBitSet.addAll(fieldsUsed); + conditionExpr.accept(inputFinder); + final ImmutableBitSet fieldsUsedPlus = inputFinder.inputBitSet.build(); + + // If no system fields are used, we can remove them. + int systemFieldUsedCount = 0; + for (int i = 0; i < systemFieldCount; ++i) { + if (fieldsUsed.get(i)) { + ++systemFieldUsedCount; + } + } + final int newSystemFieldCount; + if (systemFieldUsedCount == 0) { + newSystemFieldCount = 0; + } else { + newSystemFieldCount = systemFieldCount; + } + + int offset = systemFieldCount; + int changeCount = 0; + int newFieldCount = newSystemFieldCount; + final List<RelNode> newInputs = new ArrayList<>(2); + final List<Mapping> inputMappings = new ArrayList<>(); + final List<Integer> inputExtraFieldCounts = new ArrayList<>(); + for (RelNode input : join.getInputs()) { + final RelDataType inputRowType = input.getRowType(); + final int inputFieldCount = inputRowType.getFieldCount(); + + // Compute required mapping. + ImmutableBitSet.Builder inputFieldsUsed = ImmutableBitSet.builder(); + for (int bit : fieldsUsedPlus) { + if (bit >= offset && bit < offset + inputFieldCount) { + inputFieldsUsed.set(bit - offset); + } + } + + // If there are system fields, we automatically use the + // corresponding field in each input. + inputFieldsUsed.set(0, newSystemFieldCount); + + // FIXME: We ought to collect extra fields for each input + // individually. For now, we assume that just one input has + // on-demand fields. + Set<RelDataTypeField> inputExtraFields = + RelDataTypeImpl.extra(inputRowType) == null + ? Collections.emptySet() + : combinedInputExtraFields; + inputExtraFieldCounts.add(inputExtraFields.size()); + TrimResult trimResult = + trimChild(join, input, inputFieldsUsed.build(), inputExtraFields); + newInputs.add(trimResult.left); + if (trimResult.left != input) { + ++changeCount; + } + + final Mapping inputMapping = trimResult.right; + inputMappings.add(inputMapping); + + // Move offset to point to start of next input. + offset += inputFieldCount; + newFieldCount += + inputMapping.getTargetCount() + inputExtraFields.size(); + } + + Mapping mapping = + Mappings.create( + MappingType.INVERSE_SURJECTION, + fieldCount, + newFieldCount); + for (int i = 0; i < newSystemFieldCount; ++i) { + mapping.set(i, i); + } + offset = systemFieldCount; + int newOffset = newSystemFieldCount; + for (int i = 0; i < inputMappings.size(); i++) { + Mapping inputMapping = inputMappings.get(i); + for (IntPair pair : inputMapping) { + mapping.set(pair.source + offset, pair.target + newOffset); + } + offset += inputMapping.getSourceCount(); + newOffset += inputMapping.getTargetCount() + + inputExtraFieldCounts.get(i); + } + + if (changeCount == 0 + && mapping.isIdentity()) { + return result(join, Mappings.createIdentity(fieldCount)); + } + + // Build new join. + final RexVisitor<RexNode> shuttle = + new RexPermuteInputsShuttle( + mapping, newInputs.get(0), newInputs.get(1)); + RexNode newConditionExpr = + conditionExpr.accept(shuttle); + + final RelBuilder relBuilder = REL_BUILDER.get(); + relBuilder.push(newInputs.get(0)); + relBuilder.push(newInputs.get(1)); + + switch (join.getJoinType()) { + case SEMI: + case ANTI: + // For SemiJoins and AntiJoins only map fields from the left-side + if (join.getJoinType() == JoinRelType.SEMI) { + relBuilder.semiJoin(newConditionExpr); + } else { + relBuilder.antiJoin(newConditionExpr); + } + Mapping inputMapping = inputMappings.get(0); + mapping = Mappings.create(MappingType.INVERSE_SURJECTION, + join.getRowType().getFieldCount(), + newSystemFieldCount + inputMapping.getTargetCount()); + for (int i = 0; i < newSystemFieldCount; ++i) { + mapping.set(i, i); + } + offset = systemFieldCount; + newOffset = newSystemFieldCount; + for (IntPair pair : inputMapping) { + mapping.set(pair.source + offset, pair.target + newOffset); + } + break; + default: + relBuilder.join(join.getJoinType(), newConditionExpr); + } + + return result(relBuilder.build(), mapping); + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.core.SetOp} (including UNION and UNION ALL). + */ + public TrimResult trimFields( + SetOp setOp, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final RelDataType rowType = setOp.getRowType(); + final int fieldCount = rowType.getFieldCount(); + int changeCount = 0; + + // Fennel abhors an empty row type, so pretend that the parent rel + // wants the last field. (The last field is the least likely to be a + // system field.) + if (fieldsUsed.isEmpty()) { + fieldsUsed = ImmutableBitSet.of(rowType.getFieldCount() - 1); + } + + // Compute the desired field mapping. Give the consumer the fields they + // want, in the order that they appear in the bitset. + final Mapping mapping = createMapping(fieldsUsed, fieldCount); + + // Create input with trimmed columns. + for (RelNode input : setOp.getInputs()) { + TrimResult trimResult = + trimChild(setOp, input, fieldsUsed, extraFields); + + // We want "mapping", the input gave us "inputMapping", compute + // "remaining" mapping. + // | | | + // |---------------- mapping ---------->| + // |-- inputMapping -->| | + // | |-- remaining -->| + // + // For instance, suppose we have columns [a, b, c, d], + // the consumer asked for mapping = [b, d], + // and the transformed input has columns inputMapping = [d, a, b]. + // remaining will permute [b, d] to [d, a, b]. + Mapping remaining = Mappings.divide(mapping, trimResult.right); + + // Create a projection; does nothing if remaining is identity. + final RelBuilder relBuilder = REL_BUILDER.get(); + relBuilder.push(trimResult.left); + relBuilder.permute(remaining); + + if (input != relBuilder.peek()) { + ++changeCount; + } + } + + final RelBuilder relBuilder = REL_BUILDER.get(); + // If the input is unchanged, and we need to project all columns, + // there's to do. + if (changeCount == 0 + && mapping.isIdentity()) { + for (RelNode input : setOp.getInputs()) { + relBuilder.build(); + } + return result(setOp, mapping); + } + + switch (setOp.kind) { + case UNION: + relBuilder.union(setOp.all, setOp.getInputs().size()); + break; + case INTERSECT: + relBuilder.intersect(setOp.all, setOp.getInputs().size()); + break; + case EXCEPT: + assert setOp.getInputs().size() == 2; + relBuilder.minus(setOp.all); + break; + default: + throw new AssertionError("unknown setOp " + setOp); + } + return result(relBuilder.build(), mapping); + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalAggregate}. + */ + public TrimResult trimFields( + Aggregate aggregate, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + // Fields: + // + // | sys fields | group fields | indicator fields | agg functions | + // + // Two kinds of trimming: + // + // 1. If agg rel has system fields but none of these are used, create an + // agg rel with no system fields. + // + // 2. If aggregate functions are not used, remove them. + // + // But group and indicator fields stay, even if they are not used. + + final RelDataType rowType = aggregate.getRowType(); + + // Compute which input fields are used. + // 1. group fields are always used + final ImmutableBitSet.Builder inputFieldsUsed = + aggregate.getGroupSet().rebuild(); + // 2. agg functions + for (AggregateCall aggCall : aggregate.getAggCallList()) { + inputFieldsUsed.addAll(aggCall.getArgList()); + if (aggCall.filterArg >= 0) { + inputFieldsUsed.set(aggCall.filterArg); + } + inputFieldsUsed.addAll(RelCollations.ordinals(aggCall.collation)); + } + + // Create input with trimmed columns. + final RelNode input = aggregate.getInput(); + final Set<RelDataTypeField> inputExtraFields = Collections.emptySet(); + final TrimResult trimResult = + trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields); + final RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + // We have to return group keys and (if present) indicators. + // So, pretend that the consumer asked for them. + final int groupCount = aggregate.getGroupSet().cardinality(); + fieldsUsed = + fieldsUsed.union(ImmutableBitSet.range(groupCount)); + + // If the input is unchanged, and we need to project all columns, + // there's nothing to do. + if (input == newInput + && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) { + return result(aggregate, + Mappings.createIdentity(rowType.getFieldCount())); + } + + // Which agg calls are used by our consumer? + int j = groupCount; + int usedAggCallCount = 0; + for (int i = 0; i < aggregate.getAggCallList().size(); i++) { + if (fieldsUsed.get(j++)) { + ++usedAggCallCount; + } + } + + // Offset due to the number of system fields having changed. + Mapping mapping = + Mappings.create( + MappingType.INVERSE_SURJECTION, + rowType.getFieldCount(), + groupCount + usedAggCallCount); + + final ImmutableBitSet newGroupSet = + Mappings.apply(inputMapping, aggregate.getGroupSet()); + + final ImmutableList<ImmutableBitSet> newGroupSets = + ImmutableList.copyOf( + Iterables.transform(aggregate.getGroupSets(), + input1 -> Mappings.apply(inputMapping, input1))); + + // Populate mapping of where to find the fields. System, group key and + // indicator fields first. + for (j = 0; j < groupCount; j++) { + mapping.set(j, j); + } + + // Now create new agg calls, and populate mapping for them. + final RelBuilder relBuilder = REL_BUILDER.get(); + relBuilder.push(newInput); + final List<RelBuilder.AggCall> newAggCallList = new ArrayList<>(); + j = groupCount; + for (AggregateCall aggCall : aggregate.getAggCallList()) { + if (fieldsUsed.get(j)) { + final ImmutableList<RexNode> args = + relBuilder.fields( + Mappings.apply2(inputMapping, aggCall.getArgList())); + final RexNode filterArg = aggCall.filterArg < 0 ? null + : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg)); + RelBuilder.AggCall newAggCall = + relBuilder.aggregateCall(aggCall.getAggregation(), args) + .distinct(aggCall.isDistinct()) + .filter(filterArg) + .approximate(aggCall.isApproximate()) + .sort(relBuilder.fields(aggCall.collation)) + .as(aggCall.name); + mapping.set(j, groupCount + newAggCallList.size()); + newAggCallList.add(newAggCall); + } + ++j; + } + + final RelBuilder.GroupKey groupKey = + relBuilder.groupKey(newGroupSet, newGroupSets); + relBuilder.aggregate(groupKey, newAggCallList); + + return result(relBuilder.build(), mapping); + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalTableModify}. + */ + public TrimResult trimFields( + LogicalTableModify modifier, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + // Ignore what consumer wants. We always project all columns. + Util.discard(fieldsUsed); + + final RelDataType rowType = modifier.getRowType(); + final int fieldCount = rowType.getFieldCount(); + RelNode input = modifier.getInput(); + + // We want all fields from the child. + final int inputFieldCount = input.getRowType().getFieldCount(); + final ImmutableBitSet inputFieldsUsed = + ImmutableBitSet.range(inputFieldCount); + + // Create input with trimmed columns. + final Set<RelDataTypeField> inputExtraFields = Collections.emptySet(); + TrimResult trimResult = + trimChild(modifier, input, inputFieldsUsed, inputExtraFields); + RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + if (!inputMapping.isIdentity()) { + // We asked for all fields. Can't believe that the child decided + // to permute them! + throw new AssertionError( + "Expected identity mapping, got " + inputMapping); + } + + LogicalTableModify newModifier = modifier; + if (newInput != input) { + newModifier = + modifier.copy( + modifier.getTraitSet(), + Collections.singletonList(newInput)); + } + assert newModifier.getClass() == modifier.getClass(); + + // Always project all fields. + Mapping mapping = Mappings.createIdentity(fieldCount); + return result(newModifier, mapping); + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalTableFunctionScan}. + */ + public TrimResult trimFields( + LogicalTableFunctionScan tabFun, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final RelDataType rowType = tabFun.getRowType(); + final int fieldCount = rowType.getFieldCount(); + final List<RelNode> newInputs = new ArrayList<>(); + + for (RelNode input : tabFun.getInputs()) { + final int inputFieldCount = input.getRowType().getFieldCount(); + ImmutableBitSet inputFieldsUsed = ImmutableBitSet.range(inputFieldCount); + + // Create input with trimmed columns. + final Set<RelDataTypeField> inputExtraFields = + Collections.emptySet(); + TrimResult trimResult = + trimChildRestore( + tabFun, input, inputFieldsUsed, inputExtraFields); + assert trimResult.right.isIdentity(); + newInputs.add(trimResult.left); + } + + LogicalTableFunctionScan newTabFun = tabFun; + if (!tabFun.getInputs().equals(newInputs)) { + newTabFun = tabFun.copy(tabFun.getTraitSet(), newInputs, + tabFun.getCall(), tabFun.getElementType(), tabFun.getRowType(), + tabFun.getColumnMappings()); + } + assert newTabFun.getClass() == tabFun.getClass(); + + // Always project all fields. + Mapping mapping = Mappings.createIdentity(fieldCount); + return result(newTabFun, mapping); + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalValues}. + */ + public TrimResult trimFields( + LogicalValues values, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final RelDataType rowType = values.getRowType(); + final int fieldCount = rowType.getFieldCount(); + + // If they are asking for no fields, we can't give them what they want, + // because zero-column records are illegal. Give them the last field, + // which is unlikely to be a system field. + if (fieldsUsed.isEmpty()) { + fieldsUsed = ImmutableBitSet.range(fieldCount - 1, fieldCount); + } + + // If all fields are used, return unchanged. + if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount))) { + Mapping mapping = Mappings.createIdentity(fieldCount); + return result(values, mapping); + } + + final ImmutableList.Builder<ImmutableList<RexLiteral>> newTuples = + ImmutableList.builder(); + for (ImmutableList<RexLiteral> tuple : values.getTuples()) { + ImmutableList.Builder<RexLiteral> newTuple = ImmutableList.builder(); + for (int field : fieldsUsed) { + newTuple.add(tuple.get(field)); + } + newTuples.add(newTuple.build()); + } + + final Mapping mapping = createMapping(fieldsUsed, fieldCount); + final RelDataType newRowType = + RelOptUtil.permute(values.getCluster().getTypeFactory(), rowType, + mapping); + final LogicalValues newValues = + LogicalValues.create(values.getCluster(), newRowType, + newTuples.build()); + return result(newValues, mapping); + } + + protected Mapping createMapping(ImmutableBitSet fieldsUsed, int fieldCount) { + final Mapping mapping = + Mappings.create( + MappingType.INVERSE_SURJECTION, + fieldCount, + fieldsUsed.cardinality()); + int i = 0; + for (int field : fieldsUsed) { + mapping.set(field, i++); + } + return mapping; + } + + /** + * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for + * {@link org.apache.calcite.rel.logical.LogicalTableScan}. + */ + public TrimResult trimFields( + final TableScan tableAccessRel, + ImmutableBitSet fieldsUsed, + Set<RelDataTypeField> extraFields) { + final int fieldCount = tableAccessRel.getRowType().getFieldCount(); + if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount)) + && extraFields.isEmpty()) { + // if there is nothing to project or if we are projecting everything + // then no need to introduce another RelNode + return trimFields( + (RelNode) tableAccessRel, fieldsUsed, extraFields); + } + final RelNode newTableAccessRel = + tableAccessRel.project(fieldsUsed, extraFields, REL_BUILDER.get()); + + // Some parts of the system can't handle rows with zero fields, so + // pretend that one field is used. + if (fieldsUsed.cardinality() == 0) { + RelNode input = newTableAccessRel; + if (input instanceof Project) { + // The table has implemented the project in the obvious way - by + // creating project with 0 fields. Strip it away, and create our own + // project with one field. + Project project = (Project) input; + if (project.getRowType().getFieldCount() == 0) { + input = project.getInput(); + } + } + return dummyProject(fieldCount, input); + } + + final Mapping mapping = createMapping(fieldsUsed, fieldCount); + return result(newTableAccessRel, mapping); + } + + //~ Inner Classes ---------------------------------------------------------- + + /** + * Result of an attempt to trim columns from a relational expression. + * + * <p>The mapping describes where to find the columns wanted by the parent + * of the current relational expression. + * + * <p>The mapping is a + * {@link org.apache.calcite.util.mapping.Mappings.SourceMapping}, which means + * that no column can be used more than once, and some columns are not used. + * {@code columnsUsed.getSource(i)} returns the source of the i'th output + * field. + * + * <p>For example, consider the mapping for a relational expression that + * has 4 output columns but only two are being used. The mapping + * {2 → 1, 3 → 0} would give the following behavior: + * + * <ul> + * <li>columnsUsed.getSourceCount() returns 4 + * <li>columnsUsed.getTargetCount() returns 2 + * <li>columnsUsed.getSource(0) returns 3 + * <li>columnsUsed.getSource(1) returns 2 + * <li>columnsUsed.getSource(2) throws IndexOutOfBounds + * <li>columnsUsed.getTargetOpt(3) returns 0 + * <li>columnsUsed.getTargetOpt(0) returns -1 + * </ul> + */ + protected static class TrimResult extends Pair<RelNode, Mapping> { + /** + * Creates a TrimResult. + * + * @param left New relational expression + * @param right Mapping of fields onto original fields + */ + public TrimResult(RelNode left, Mapping right) { + super(left, right); + assert right.getTargetCount() == left.getRowType().getFieldCount() + : "rowType: " + left.getRowType() + ", mapping: " + right; + } + } +} + +// End RelFieldTrimmer.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 537355f..6589eeb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -38,6 +38,12 @@ import org.apache.calcite.adapter.druid.DruidTable; import org.apache.calcite.adapter.java.JavaTypeFactory; import org.apache.calcite.adapter.jdbc.JdbcConvention; import org.apache.calcite.adapter.jdbc.JdbcImplementor; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcAggregate; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilter; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcJoin; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcProject; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSort; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcUnion; import org.apache.calcite.adapter.jdbc.JdbcSchema; import org.apache.calcite.adapter.jdbc.JdbcTable; import org.apache.calcite.config.CalciteConnectionConfig; @@ -57,12 +63,17 @@ import org.apache.calcite.plan.hep.HepMatchOrder; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.AbstractConverter; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.AbstractRelNode; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationImpl; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelVisitor; +import org.apache.calcite.rel.convert.ConverterImpl; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Filter; @@ -148,9 +159,11 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -338,6 +351,44 @@ public class CalcitePlanner extends SemanticAnalyzer { private static final Pattern PATTERN_TIMESTAMP = Pattern.compile("TIMESTAMP\\(9\\)"); + /** + * This is the list of operators that are specifically used in Hive. + */ + private static final List<Class<? extends RelNode>> HIVE_REL_NODE_CLASSES = + ImmutableList.of( + RelNode.class, + AbstractRelNode.class, + RelSubset.class, + HepRelVertex.class, + ConverterImpl.class, + AbstractConverter.class, + + HiveTableScan.class, + HiveAggregate.class, + HiveExcept.class, + HiveFilter.class, + HiveIntersect.class, + HiveJoin.class, + HiveMultiJoin.class, + HiveProject.class, + HiveRelNode.class, + HiveSemiJoin.class, + HiveSortExchange.class, + HiveSortLimit.class, + HiveTableFunctionScan.class, + HiveUnion.class, + + DruidQuery.class, + + HiveJdbcConverter.class, + JdbcHiveTableScan.class, + JdbcAggregate.class, + JdbcFilter.class, + JdbcJoin.class, + JdbcProject.class, + JdbcSort.class, + JdbcUnion.class); + public CalcitePlanner(QueryState queryState) throws SemanticException { super(queryState); @@ -1794,14 +1845,12 @@ public class CalcitePlanner extends SemanticAnalyzer { calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider); // We need to get the ColumnAccessInfo and viewToTableSchema for views. - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, - HiveRelFactories.HIVE_BUILDER.create(optCluster, null), - this.columnAccessInfo, this.viewProjectToTableSchema); - - fieldTrimmer.trim(calciteGenPlan); + HiveRelFieldTrimmer.get() + .trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null), + calciteGenPlan, this.columnAccessInfo, this.viewProjectToTableSchema); // Create and set MD provider - HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); + HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf, HIVE_REL_NODE_CLASSES); RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider())); //Remove subquery @@ -5214,10 +5263,12 @@ public class CalcitePlanner extends SemanticAnalyzer { /** * This method can be called at startup time to pre-register all the * additional Hive classes (compared to Calcite core classes) that may - * be visited during the planning phase. + * be visited during the planning phase in the metadata providers + * and the field trimmer. */ - public static void initializeMetadataProviderClass() { - HiveDefaultRelMetadataProvider.initializeMetadataProviderClass(); + public static void warmup() { + HiveDefaultRelMetadataProvider.initializeMetadataProviderClass(HIVE_REL_NODE_CLASSES); + HiveRelFieldTrimmer.initializeFieldTrimmerClass(HIVE_REL_NODE_CLASSES); } private enum TableType { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java index e8dd572..081d5f8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java @@ -70,7 +70,7 @@ public class TestCBORuleFiredOnlyOnce { RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder); // Create MD provider - HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); + HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf, null); List<RelMetadataProvider> list = Lists.newArrayList(); list.add(mdProvider.getMetadataProvider()); planner.registerMetadataProviders(list); diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index 8f73c60..fece82e 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -250,8 +250,8 @@ public class HiveServer2 extends CompositeService { LlapRegistryService.getClient(hiveConf); } - // Initialize metadata provider class - CalcitePlanner.initializeMetadataProviderClass(); + // Initialize metadata provider class and trimmer + CalcitePlanner.warmup(); try { sessionHive = Hive.get(hiveConf);