This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push: new e14cef2 [CALCITE-2703] Reduce code generation and class loading overhead when executing queries in the EnumerableConvention e14cef2 is described below commit e14cef28af2b4f0aac5c3876aa7023053e294ee1 Author: Stamatis Zampetakis <zabe...@gmail.com> AuthorDate: Fri Nov 30 17:26:26 2018 +0100 [CALCITE-2703] Reduce code generation and class loading overhead when executing queries in the EnumerableConvention 1. Add a configurable Guava cache in EnumerableInterpretable#getBindable to re-use Bindable instances. 2. Avoid caching instances of classes with static fields. 3. Add JMH benchmark for the expensive part of EnumerableInterpretable#getBindable method. 4. Add utility method for obtaining integer runtime properties. 5. NPE in VisitorImpl when visiting a FieldDeclaration without an initializer. --- .../enumerable/EnumerableInterpretable.java | 49 +++- .../calcite/config/CalciteSystemProperty.java | 59 ++++- .../apache/calcite/linq4j/tree/VisitorImpl.java | 3 +- ubenchmark/pom.xml | 19 +- .../enumerable/CodeGenerationBenchmark.java | 287 +++++++++++++++++++++ .../calcite/adapter/enumerable/package-info.java | 26 ++ 6 files changed, 428 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableInterpretable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableInterpretable.java index beafd4c..86ed35c 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableInterpretable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableInterpretable.java @@ -31,6 +31,8 @@ import org.apache.calcite.linq4j.Enumerable; import org.apache.calcite.linq4j.Enumerator; import org.apache.calcite.linq4j.tree.ClassDeclaration; import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.linq4j.tree.FieldDeclaration; +import org.apache.calcite.linq4j.tree.VisitorImpl; import org.apache.calcite.plan.ConventionTraitDef; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; @@ -43,6 +45,9 @@ import org.apache.calcite.runtime.Typed; import org.apache.calcite.runtime.Utilities; import org.apache.calcite.util.Util; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; + import org.codehaus.commons.compiler.CompileException; import org.codehaus.commons.compiler.CompilerFactoryFactory; import org.codehaus.commons.compiler.IClassBodyEvaluator; @@ -50,8 +55,10 @@ import org.codehaus.commons.compiler.ICompilerFactory; import java.io.IOException; import java.io.StringReader; +import java.lang.reflect.Modifier; import java.util.List; import java.util.Map; +import java.util.concurrent.ExecutionException; /** * Relational expression that converts an enumerable input to interpretable @@ -82,6 +89,18 @@ public class EnumerableInterpretable extends ConverterImpl return new EnumerableNode(enumerable, implementor.compiler, this); } + /** + * The cache storing Bindable objects, instantiated via dynamically generated Java classes. + * + * It allows to re-use Bindable objects for queries appearing relatively often. It is used + * to avoid the cost of compiling and generating a new class and also instantiating the object. + */ + private static final Cache<String, Bindable> BINDABLE_CACHE = + CacheBuilder.newBuilder() + .concurrencyLevel(CalciteSystemProperty.BINDABLE_CACHE_CONCURRENCY_LEVEL.value()) + .maximumSize(CalciteSystemProperty.BINDABLE_CACHE_MAX_SIZE.value()) + .build(); + public static Bindable toBindable(Map<String, Object> parameters, CalcitePrepare.SparkHandler spark, EnumerableRel rel, EnumerableRel.Prefer prefer) { @@ -110,14 +129,8 @@ public class EnumerableInterpretable extends ConverterImpl } } - static ArrayBindable getArrayBindable(ClassDeclaration expr, String s, - int fieldCount) throws CompileException, IOException { - Bindable bindable = getBindable(expr, s, fieldCount); - return box(bindable); - } - static Bindable getBindable(ClassDeclaration expr, String s, int fieldCount) - throws CompileException, IOException { + throws CompileException, IOException, ExecutionException { ICompilerFactory compilerFactory; try { compilerFactory = CompilerFactoryFactory.getDefaultCompilerFactory(); @@ -125,7 +138,7 @@ public class EnumerableInterpretable extends ConverterImpl throw new IllegalStateException( "Unable to instantiate java compiler", e); } - IClassBodyEvaluator cbe = compilerFactory.newClassBodyEvaluator(); + final IClassBodyEvaluator cbe = compilerFactory.newClassBodyEvaluator(); cbe.setClassName(expr.name); cbe.setExtendedClass(Utilities.class); cbe.setImplementedInterfaces( @@ -137,9 +150,29 @@ public class EnumerableInterpretable extends ConverterImpl // Add line numbers to the generated janino class cbe.setDebuggingInformation(true, true, true); } + + if (CalciteSystemProperty.BINDABLE_CACHE_MAX_SIZE.value() != 0) { + StaticFieldDetector detector = new StaticFieldDetector(); + expr.accept(detector); + if (!detector.containsStaticField) { + return BINDABLE_CACHE.get(s, () -> (Bindable) cbe.createInstance(new StringReader(s))); + } + } return (Bindable) cbe.createInstance(new StringReader(s)); } + /** + * A visitor detecting if the Java AST contains static fields. + */ + static class StaticFieldDetector extends VisitorImpl<Void> { + boolean containsStaticField = false; + + @Override public Void visit(final FieldDeclaration fieldDeclaration) { + containsStaticField = (fieldDeclaration.modifier & Modifier.STATIC) != 0; + return containsStaticField ? null : super.visit(fieldDeclaration); + } + } + /** Converts a bindable over scalar values into an array bindable, with each * row as an array of 1 element. */ static ArrayBindable box(final Bindable bindable) { diff --git a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java index bdda3e6..0660b37 100644 --- a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java +++ b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java @@ -26,6 +26,7 @@ import java.util.Locale; import java.util.Properties; import java.util.Set; import java.util.function.Function; +import java.util.function.IntPredicate; import java.util.stream.Stream; /** @@ -245,14 +246,68 @@ public final class CalciteSystemProperty<T> { public static final CalciteSystemProperty<Integer> METADATA_HANDLER_CACHE_MAXIMUM_SIZE = intProperty("calcite.metadata.handler.cache.maximum.size", 1000); + /** + * The maximum size of the cache used for storing Bindable objects, instantiated via + * dynamically generated Java classes. + * + * <p>The default value is 0.</p> + * + * <p>The property can take any value between [0, {@link Integer#MAX_VALUE}] inclusive. If the + * value is not valid (or not specified) then the default value is used.</p> + * + * <p>The cached objects may be quite big so it is suggested to use a rather small cache size + * (e.g., 1000). For the most common use cases a number close to 1000 should be enough to + * alleviate the performance penalty of compiling and loading classes.</p> + * + * <p>Setting this property to 0 disables the cache.</p> + */ + public static final CalciteSystemProperty<Integer> BINDABLE_CACHE_MAX_SIZE = + intProperty("calcite.bindable.cache.maxSize", 0, v -> v >= 0 && v <= Integer.MAX_VALUE); + /** + * The concurrency level of the cache used for storing Bindable objects, instantiated via + * dynamically generated Java classes. + * + * <p>The default value is 1.</p> + * + * <p>The property can take any value between [1, {@link Integer#MAX_VALUE}] inclusive. If the + * value is not valid (or not specified) then the default value is used.</p> + * + * <p>This property has no effect if the cache is disabled (i.e., {@link #BINDABLE_CACHE_MAX_SIZE} + * set to 0.</p> + */ + public static final CalciteSystemProperty<Integer> BINDABLE_CACHE_CONCURRENCY_LEVEL = + intProperty("calcite.bindable.cache.concurrencyLevel", 1, + v -> v >= 1 && v <= Integer.MAX_VALUE); + private static CalciteSystemProperty<Boolean> booleanProperty(String key, boolean defaultValue) { return new CalciteSystemProperty<>(key, v -> v == null ? defaultValue : Boolean.parseBoolean(v)); } private static CalciteSystemProperty<Integer> intProperty(String key, int defaultValue) { - return new CalciteSystemProperty<>(key, - v -> v == null ? defaultValue : Integer.parseInt(v)); + return intProperty(key, defaultValue, v -> true); + } + + /** + * Returns the value of the system property with the specified name as int, or + * the <code>defaultValue</code> if any of the conditions below hold: + * (i) the property is not defined; + * (ii) the property value cannot be transformed to an int; + * (iii) the property value does not satisfy the checker. + */ + private static CalciteSystemProperty<Integer> intProperty(String key, int defaultValue, + IntPredicate valueChecker) { + return new CalciteSystemProperty<>(key, v -> { + if (v == null) { + return defaultValue; + } + try { + int intVal = Integer.parseInt(v); + return valueChecker.test(intVal) ? intVal : defaultValue; + } catch (NumberFormatException nfe) { + return defaultValue; + } + }); } private static CalciteSystemProperty<String> stringProperty(String key, String defaultValue) { diff --git a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/VisitorImpl.java b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/VisitorImpl.java index 2507db9..8a4bf58 100644 --- a/linq4j/src/main/java/org/apache/calcite/linq4j/tree/VisitorImpl.java +++ b/linq4j/src/main/java/org/apache/calcite/linq4j/tree/VisitorImpl.java @@ -78,7 +78,8 @@ public class VisitorImpl<R> implements Visitor<R> { public R visit(FieldDeclaration fieldDeclaration) { R r0 = fieldDeclaration.parameter.accept(this); - return fieldDeclaration.initializer.accept(this); + return fieldDeclaration.initializer == null ? null + : fieldDeclaration.initializer.accept(this); } public R visit(ForStatement forStatement) { diff --git a/ubenchmark/pom.xml b/ubenchmark/pom.xml index 9fe3150..63028bd 100644 --- a/ubenchmark/pom.xml +++ b/ubenchmark/pom.xml @@ -40,6 +40,21 @@ limitations under the License. </dependency> <dependency> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-linq4j</artifactId> + </dependency> + + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </dependency> + + <dependency> + <groupId>org.codehaus.janino</groupId> + <artifactId>commons-compiler</artifactId> + </dependency> + + <dependency> <groupId>org.openjdk.jmh</groupId> <artifactId>jmh-core</artifactId> </dependency> @@ -48,10 +63,6 @@ limitations under the License. <artifactId>jmh-generator-annprocess</artifactId> <scope>provided</scope> </dependency> - <dependency> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - </dependency> </dependencies> <build> diff --git a/ubenchmark/src/main/java/org/apache/calcite/adapter/enumerable/CodeGenerationBenchmark.java b/ubenchmark/src/main/java/org/apache/calcite/adapter/enumerable/CodeGenerationBenchmark.java new file mode 100644 index 0000000..4c32cde --- /dev/null +++ b/ubenchmark/src/main/java/org/apache/calcite/adapter/enumerable/CodeGenerationBenchmark.java @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.adapter.enumerable; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.linq4j.tree.ClassDeclaration; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.plan.ConventionTraitDef; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.plan.volcano.VolcanoPlanner; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.rules.FilterToCalcRule; +import org.apache.calcite.rel.rules.ProjectToCalcRule; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.runtime.ArrayBindable; +import org.apache.calcite.runtime.Bindable; +import org.apache.calcite.runtime.Typed; +import org.apache.calcite.runtime.Utilities; +import org.apache.calcite.tools.RelBuilder; + + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; + +import org.codehaus.commons.compiler.CompilerFactoryFactory; +import org.codehaus.commons.compiler.IClassBodyEvaluator; +import org.codehaus.commons.compiler.ICompilerFactory; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.profile.GCProfiler; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.StringReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * A benchmark of the main methods that are dynamically + * generating and compiling Java code at runtime. + * + * The benchmark examines the behavior of existing methods + * and evaluates the potential of adding a caching layer on top. + */ +@Fork(value = 1, jvmArgsPrepend = "-Xmx1024m") +@Measurement(iterations = 10, time = 1) +@Warmup(iterations = 0) +@Threads(1) +@OutputTimeUnit(TimeUnit.SECONDS) +@BenchmarkMode(Mode.Throughput) +public class CodeGenerationBenchmark { + + /** + * State holding the generated queries/plans and additional information + * exploited by the embedded compiler in order to dynamically build a Java class. + */ + @State(Scope.Thread) + public static class QueryState { + /** + * The number of distinct queries to be generated. + */ + @Param({"1", "10", "100", "1000"}) + int queries; + + /** + * The number of joins for each generated query. + */ + @Param({"1", "10", "20"}) + int joins; + + /** + * The number of disjunctions for each generated query. + */ + @Param({"1", "10", "100"}) + int whereClauseDisjunctions; + + /** + * The necessary plan information for every generated query. + */ + PlanInfo[] planInfos; + + private int currentPlan = 0; + + @Setup(Level.Trial) + public void setup() { + planInfos = new PlanInfo[queries]; + VolcanoPlanner planner = new VolcanoPlanner(); + planner.addRelTraitDef(ConventionTraitDef.INSTANCE); + planner.addRule(FilterToCalcRule.INSTANCE); + planner.addRule(ProjectToCalcRule.INSTANCE); + planner.addRule(EnumerableRules.ENUMERABLE_CALC_RULE); + planner.addRule(EnumerableRules.ENUMERABLE_JOIN_RULE); + planner.addRule(EnumerableRules.ENUMERABLE_VALUES_RULE); + + RelDataTypeFactory typeFactory = + new JavaTypeFactoryImpl(org.apache.calcite.rel.type.RelDataTypeSystem.DEFAULT); + RelOptCluster cluster = RelOptCluster.create(planner, new RexBuilder(typeFactory)); + RelTraitSet desiredTraits = + cluster.traitSet().replace(EnumerableConvention.INSTANCE); + + RelBuilder relBuilder = RelFactories.LOGICAL_BUILDER.create(cluster, null); + // Generates queries of the following form depending on the configuration parameters. + // SELECT `t`.`name` + // FROM (VALUES (1, 'Value0')) AS `t` (`id`, `name`) + // INNER JOIN (VALUES (1, 'Value1')) AS `t` (`id`, `name`) AS `t0` ON `t`.`id` = `t0`.`id` + // INNER JOIN (VALUES (2, 'Value2')) AS `t` (`id`, `name`) AS `t1` ON `t`.`id` = `t1`.`id` + // INNER JOIN (VALUES (3, 'Value3')) AS `t` (`id`, `name`) AS `t2` ON `t`.`id` = `t2`.`id` + // INNER JOIN ... + // WHERE + // `t`.`name` = 'name0' OR + // `t`.`name` = 'name1' OR + // `t`.`name` = 'name2' OR + // ... + // OR `t`.`id` = 0 + // The last disjunction (i.e, t.id = $i) is what makes the queries different from one another + // by assigning a different constant literal. + for (int i = 0; i < queries; i++) { + relBuilder.values(new String[]{"id", "name"}, 1, "Value" + 0); + for (int j = 1; j <= joins; j++) { + relBuilder + .values(new String[]{"id", "name"}, j, "Value" + j) + .join(JoinRelType.INNER, "id"); + } + + List<RexNode> disjunctions = new ArrayList<>(); + for (int j = 0; j < whereClauseDisjunctions; j++) { + disjunctions.add( + relBuilder.equals( + relBuilder.field("name"), + relBuilder.literal("name" + j))); + } + disjunctions.add( + relBuilder.equals( + relBuilder.field("id"), + relBuilder.literal(i))); + RelNode query = + relBuilder + .filter(relBuilder.or(disjunctions)) + .project(relBuilder.field("name")) + .build(); + + RelNode query0 = planner.changeTraits(query, desiredTraits); + planner.setRoot(query0); + + PlanInfo info = new PlanInfo(); + EnumerableRel plan = (EnumerableRel) planner.findBestExp(); + + EnumerableRelImplementor relImplementor = + new EnumerableRelImplementor(plan.getCluster().getRexBuilder(), new HashMap<>()); + info.classExpr = relImplementor.implementRoot(plan, EnumerableRel.Prefer.ARRAY); + info.javaCode = + Expressions.toString(info.classExpr.memberDeclarations, "\n", false); + + ICompilerFactory compilerFactory; + try { + compilerFactory = CompilerFactoryFactory.getDefaultCompilerFactory(); + } catch (Exception e) { + throw new IllegalStateException( + "Unable to instantiate java compiler", e); + } + IClassBodyEvaluator cbe = compilerFactory.newClassBodyEvaluator(); + cbe.setClassName(info.classExpr.name); + cbe.setExtendedClass(Utilities.class); + cbe.setImplementedInterfaces( + plan.getRowType().getFieldCount() == 1 + ? new Class[]{Bindable.class, Typed.class} + : new Class[]{ArrayBindable.class}); + cbe.setParentClassLoader(EnumerableInterpretable.class.getClassLoader()); + info.cbe = cbe; + planInfos[i] = info; + } + + } + + int nextPlan() { + int ret = currentPlan; + currentPlan = (currentPlan + 1) % queries; + return ret; + } + } + + /***/ + private static class PlanInfo { + ClassDeclaration classExpr; + IClassBodyEvaluator cbe; + String javaCode; + } + + /** + * State holding a cache that is initialized + * once at the beginning of each iteration. + */ + @State(Scope.Thread) + public static class CacheState { + @Param({"10", "100", "1000"}) + int cacheSize; + + Cache<String, Bindable> cache; + + @Setup(Level.Iteration) + public void setup() { + cache = CacheBuilder.newBuilder().maximumSize(cacheSize).concurrencyLevel(1).build(); + } + + } + + + /** + * Benchmarks the part creating Bindable instances from + * {@link EnumerableInterpretable#getBindable(ClassDeclaration, String, int)} + * method without any additional caching layer. + */ + @Benchmark + public Bindable<?> getBindableNoCache(QueryState state) throws Exception { + PlanInfo info = state.planInfos[state.nextPlan()]; + return (Bindable) info.cbe.createInstance(new StringReader(info.javaCode)); + } + + /** + * Benchmarks the part of creating Bindable instances from + * {@link EnumerableInterpretable#getBindable(ClassDeclaration, String, int)} + * method with an additional cache layer. + */ + @Benchmark + public Bindable<?> getBindableWithCache( + QueryState jState, + CacheState chState) throws Exception { + PlanInfo info = jState.planInfos[jState.nextPlan()]; + Cache<String, Bindable> cache = chState.cache; + + EnumerableInterpretable.StaticFieldDetector detector + = new EnumerableInterpretable.StaticFieldDetector(); + info.classExpr.accept(detector); + if (!detector.containsStaticField) { + return cache.get( + info.javaCode, + () -> (Bindable) info.cbe.createInstance(new StringReader(info.javaCode))); + } + throw new IllegalStateException("Benchmark queries should not arrive here"); + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(CodeGenerationBenchmark.class.getName()) + .addProfiler(GCProfiler.class) + .detectJvmArgs() + .build(); + + new Runner(opt).run(); + } +} + +// End CodeGenerationBenchmark.java diff --git a/ubenchmark/src/main/java/org/apache/calcite/adapter/enumerable/package-info.java b/ubenchmark/src/main/java/org/apache/calcite/adapter/enumerable/package-info.java new file mode 100644 index 0000000..061c3f4 --- /dev/null +++ b/ubenchmark/src/main/java/org/apache/calcite/adapter/enumerable/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * JMH benchmarks for Calcite Enumerable adapter + */ +@PackageMarker +package org.apache.calcite.adapter.enumerable; + +import org.apache.calcite.avatica.util.PackageMarker; + +// End package-info.java