This is an automated email from the ASF dual-hosted git repository. rubenql pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push: new 1e424a0d4e [CALCITE-3094] Code of method grows beyond 64 KB when joining two tables with many fields 1e424a0d4e is described below commit 1e424a0d4eeec69e4da0a504cf080cf7a847704d Author: James Duong <james.du...@improving.com> AuthorDate: Fri Jun 14 16:25:16 2024 -0700 [CALCITE-3094] Code of method grows beyond 64 KB when joining two tables with many fields Change code generation such that when implementing joins across many fields up to a certain threshold, populate the output array using System.arraycopy() instead of explicitly instantiating an array with a large number of elements. --- .../calcite/adapter/enumerable/EnumUtils.java | 72 ++++++++ .../calcite/adapter/enumerable/JavaRowFormat.java | 55 +++++++ .../java/org/apache/calcite/interpreter/Row.java | 2 + .../org/apache/calcite/util/BuiltInMethod.java | 3 + .../calcite/test/LargeGeneratedJoinTest.java | 182 +++++++++++++++++++++ 5 files changed, 314 insertions(+) diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java index 7382beeeec..06043a567c 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java @@ -22,6 +22,7 @@ import org.apache.calcite.linq4j.AbstractEnumerable; import org.apache.calcite.linq4j.Enumerable; import org.apache.calcite.linq4j.Enumerator; import org.apache.calcite.linq4j.JoinType; +import org.apache.calcite.linq4j.Nullness; import org.apache.calcite.linq4j.Ord; import org.apache.calcite.linq4j.function.Function1; import org.apache.calcite.linq4j.function.Function2; @@ -30,12 +31,14 @@ import org.apache.calcite.linq4j.tree.BlockBuilder; import org.apache.calcite.linq4j.tree.BlockStatement; import org.apache.calcite.linq4j.tree.ConstantExpression; import org.apache.calcite.linq4j.tree.ConstantUntypedNull; +import org.apache.calcite.linq4j.tree.DeclarationStatement; import org.apache.calcite.linq4j.tree.Expression; import org.apache.calcite.linq4j.tree.ExpressionType; import org.apache.calcite.linq4j.tree.Expressions; import org.apache.calcite.linq4j.tree.FunctionExpression; import org.apache.calcite.linq4j.tree.MethodCallExpression; import org.apache.calcite.linq4j.tree.MethodDeclaration; +import org.apache.calcite.linq4j.tree.NewArrayExpression; import org.apache.calcite.linq4j.tree.ParameterExpression; import org.apache.calcite.linq4j.tree.Primitive; import org.apache.calcite.linq4j.tree.Types; @@ -61,6 +64,7 @@ import com.google.common.collect.ImmutableMap; import org.checkerframework.checker.nullness.qual.Nullable; +import java.lang.reflect.Array; import java.lang.reflect.Method; import java.lang.reflect.Modifier; import java.lang.reflect.Type; @@ -160,6 +164,34 @@ public class EnumUtils { // Generate all fields. final List<Expression> expressions = new ArrayList<>(); final int outputFieldCount = physType.getRowType().getFieldCount(); + + // If there are many output fields, create the output dynamically so that the code size stays + // below the limit. See CALCITE-3094. + final boolean generateCompactCode = outputFieldCount >= 100; + final ParameterExpression compactOutputVar; + final BlockBuilder compactCode = new BlockBuilder(); + if (generateCompactCode) { + Class<?> fieldClass = physType.fieldClass(0); + // If all fields have the same type, use the specific type. Otherwise just use Object. + for (int fieldIndex = 1; fieldIndex < outputFieldCount; ++fieldIndex) { + if (fieldClass != physType.fieldClass(fieldIndex)) { + fieldClass = Object.class; + break; + } + } + + final Class<?> arrayClass = Array.newInstance(fieldClass, 0).getClass(); + compactOutputVar = Expressions.variable(arrayClass, "outputArray"); + final DeclarationStatement exp = + Expressions.declare( + 0, compactOutputVar, new NewArrayExpression(fieldClass, 1, + Expressions.constant(outputFieldCount), null)); + compactCode.add(exp); + } else { + compactOutputVar = null; + } + + int outputField = 0; for (Ord<PhysType> ord : Ord.zip(inputPhysTypes)) { final PhysType inputPhysType = ord.e.makeNullable(joinType.generatesNullsOn(ord.i)); @@ -175,6 +207,18 @@ public class EnumUtils { break; } final int fieldCount = inputPhysType.getRowType().getFieldCount(); + if (generateCompactCode) { + // use an array copy if possible + final Expression copyExpr = + Nullness.castNonNull( + inputPhysType.getFormat().copy(parameter, Nullness.castNonNull(compactOutputVar), + outputField, fieldCount)); + compactCode.add(Expressions.statement(copyExpr)); + outputField += fieldCount; + continue; + } + + // otherwise access the fields individually for (int i = 0; i < fieldCount; i++) { Expression expression = inputPhysType.fieldReference(parameter, i, @@ -189,6 +233,34 @@ public class EnumUtils { expressions.add(expression); } } + + if (generateCompactCode) { + compactCode.add(Nullness.castNonNull(compactOutputVar)); + + // This expression generates code of the form: + // new org.apache.calcite.linq4j.function.Function2() { + // public String[] apply(org.apache.calcite.interpreter.Row left, + // org.apache.calcite.interpreter.Row right) { + // String[] outputArray = new String[left.length + right.length]; + // System.arraycopy(left.copyValues(), 0, outputArray, 0, left.length); + // System.arraycopy(right.copyValues(), 0, outputArray, left.length, right.length); + // return outputArray; + // } + // public String[] apply(Object left, Object right) { + // return apply( + // (org.apache.calcite.interpreter.Row) left, + // (org.apache.calcite.interpreter.Row) right); + // } + // } + // That is, it converts the left and right Row objects to Object[] using Row#copyValues() + // then writes each to an output Object[] using System.arraycopy() + + return Expressions.lambda( + Function2.class, + compactCode.toBlock(), + parameters); + } + return Expressions.lambda( Function2.class, physType.record(expressions), diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java index 4810b04dea..40c09cbc86 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java @@ -23,6 +23,7 @@ import org.apache.calcite.linq4j.tree.Expressions; import org.apache.calcite.linq4j.tree.IndexExpression; import org.apache.calcite.linq4j.tree.MemberExpression; import org.apache.calcite.linq4j.tree.MethodCallExpression; +import org.apache.calcite.linq4j.tree.ParameterExpression; import org.apache.calcite.linq4j.tree.Types; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.runtime.FlatLists; @@ -35,6 +36,9 @@ import org.checkerframework.checker.nullness.qual.Nullable; import java.lang.reflect.Type; import java.util.List; +import static org.apache.calcite.util.BuiltInMethod.ARRAY_COPY; +import static org.apache.calcite.util.BuiltInMethod.ROW_COPY_VALUES; + /** * How a row is represented as a Java value. */ @@ -225,6 +229,11 @@ public enum JavaRowFormat { } return EnumUtils.convert(e, fromType, fieldType); } + + @Override public Expression fieldDynamic(Expression expression, Expression field) { + return Expressions.call(expression, + BuiltInMethod.ROW_VALUE.method, Expressions.constant(field)); + } }, ARRAY { @@ -256,6 +265,23 @@ public enum JavaRowFormat { } return EnumUtils.convert(e, fromType, fieldType); } + + @Override public Expression fieldDynamic(Expression expression, Expression field) { + return Expressions.arrayIndex(expression, field); + } + + @Override public Expression setFieldDynamic(Expression expression, Expression field, + Expression value) { + final IndexExpression e = + Expressions.arrayIndex(expression, Expressions.constant(field)); + return Expressions.assign(e, value); + } + + @Override public @Nullable Expression copy(ParameterExpression parameter, + ParameterExpression outputArray, int outputStartIndex, int length) { + return Expressions.call(ARRAY_COPY.method, parameter, Expressions.constant(0), + outputArray, Expressions.constant(outputStartIndex), Expressions.constant(length)); + } }; public JavaRowFormat optimize(RelDataType rowType) { @@ -301,4 +327,33 @@ public enum JavaRowFormat { */ public abstract Expression field(Expression expression, int field, @Nullable Type fromType, Type fieldType); + + /** + * Similar to {@link #field(Expression, int, Type, Type)}, where the field index is determined + * dynamically at runtime. + */ + public Expression fieldDynamic(Expression expression, Expression field) { + throw new UnsupportedOperationException(this.toString()); + } + + public Expression setFieldDynamic(Expression expression, Expression field, Expression value) { + throw new UnsupportedOperationException(this.toString()); + } + + /** + * Returns an expression that copies the fields of a row of this type to the array. + */ + public @Nullable Expression copy(ParameterExpression parameter, + ParameterExpression outputArray, int outputStartIndex, int length) { + // Note: parameter holds an expression representing a org.apache.calcite.interpreter.Row. + + // Copy the Row as an Object[]. + final Expression rowParameterAsArrayExpression = + Expressions.call(Object[].class, parameter, ROW_COPY_VALUES.method); + + // Use System.arraycopy() with the contents of the Row as the source. + return Expressions.call(ARRAY_COPY.method, rowParameterAsArrayExpression, + Expressions.constant(0), outputArray, Expressions.constant(outputStartIndex), + Expressions.constant(length)); + } } diff --git a/core/src/main/java/org/apache/calcite/interpreter/Row.java b/core/src/main/java/org/apache/calcite/interpreter/Row.java index 1d35308c7b..073851d3b7 100644 --- a/core/src/main/java/org/apache/calcite/interpreter/Row.java +++ b/core/src/main/java/org/apache/calcite/interpreter/Row.java @@ -86,6 +86,8 @@ public class Row { } /** Returns a copy of the values. */ + // Note: This implements BuiltInMethod.ROW_COPY_VALUES. + @SuppressWarnings("unused") public @Nullable Object[] copyValues() { return values.clone(); } diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java index 7b144167da..0e0a3e5390 100644 --- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java +++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java @@ -183,6 +183,7 @@ public enum BuiltInMethod { JDBC_SCHEMA_DATA_SOURCE(JdbcSchema.class, "getDataSource"), ROW_VALUE(Row.class, "getObject", int.class), ROW_AS_COPY(Row.class, "asCopy", Object[].class), + ROW_COPY_VALUES(Row.class, "copyValues"), // This is an instance method that returns an Object[]. RESULT_SET_ENUMERABLE_SET_TIMEOUT(ResultSetEnumerable.class, "setTimeout", DataContext.class), RESULT_SET_ENUMERABLE_OF(ResultSetEnumerable.class, "of", DataSource.class, @@ -272,6 +273,8 @@ public enum BuiltInMethod { FUNCTION1_APPLY(Function1.class, "apply", Object.class), ARRAYS_AS_LIST(Arrays.class, "asList", Object[].class), ARRAY(SqlFunctions.class, "array", Object[].class), + ARRAY_COPY(System.class, "arraycopy", Object.class, int.class, Object.class, int.class, + int.class), // class PairList.Helper is deprecated to discourage code from calling its // methods directly, but use via Janino code generation is just fine. @SuppressWarnings("deprecation") diff --git a/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java b/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java new file mode 100644 index 0000000000..b0365d1aff --- /dev/null +++ b/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.test; + +import org.apache.calcite.adapter.enumerable.EnumerableRules; +import org.apache.calcite.adapter.java.AbstractQueryableTable; +import org.apache.calcite.config.CalciteConnectionProperty; +import org.apache.calcite.config.Lex; +import org.apache.calcite.interpreter.Row; +import org.apache.calcite.jdbc.CalciteSchema; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.linq4j.QueryProvider; +import org.apache.calcite.linq4j.Queryable; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.runtime.Hook; +import org.apache.calcite.schema.QueryableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Table; +import org.apache.calcite.schema.impl.AbstractSchema; +import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.tools.RelConversionException; +import org.apache.calcite.tools.ValidationException; + +import com.google.common.collect.ImmutableMap; + +import org.junit.jupiter.api.Test; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Function; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test case for + * <a href="https://issues.apache.org/jira/browse/CALCITE-3094">[CALCITE-3094] + * Code of method grows beyond 64 KB when joining two tables with many fields</a>. + */ +public class LargeGeneratedJoinTest { + + /** + * Marker interface for Field. + */ + interface FieldT extends BiConsumer<RelDataTypeFactory, RelDataTypeFactory.Builder> { + } + + /** + * Marker interface for Row. + */ + interface RowT extends Function<RelDataTypeFactory, RelDataType> { + } + + static FieldT field(String name) { + return (tf, b) -> b.add(name, SqlTypeName.VARCHAR); + } + + static RowT row(FieldT... fields) { + return tf -> { + RelDataTypeFactory.Builder builder = tf.builder(); + for (FieldT f : fields) { + f.accept(tf, builder); + } + return builder.build(); + }; + } + + private static QueryableTable tab(int fieldCount) { + List<Row> lRow = new ArrayList<>(); + for (int r = 0; r < 2; r++) { + Object[] current = new Object[fieldCount]; + for (int i = 0; i < fieldCount; i++) { + current[i] = "v" + i; + } + lRow.add(Row.of(current)); + } + + List<FieldT> fields = new ArrayList<>(); + for (int i = 0; i < fieldCount; i++) { + fields.add(field("F_" + i)); + } + + final Enumerable<?> enumerable = Linq4j.asEnumerable(lRow); + return new AbstractQueryableTable(Row.class) { + + @Override public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return row(fields.toArray(new FieldT[fieldCount])).apply(typeFactory); + } + + @Override public <T> Queryable<T> asQueryable(QueryProvider queryProvider, SchemaPlus schema, + String tableName) { + return (Queryable<T>) enumerable.asQueryable(); + } + }; + } + + @Test public void test() throws SqlParseException, RelConversionException, ValidationException, + SQLException { + Schema rootSchema = new AbstractSchema() { + @Override protected Map<String, Table> getTableMap() { + return ImmutableMap.of("T0", tab(100), + "T1", tab(101)); + } + }; + + final CalciteSchema sp = CalciteSchema.createRootSchema(false, true); + sp.add("ROOT", rootSchema); + + String sql = "SELECT * \n" + + "FROM ROOT.T0 \n" + + "JOIN ROOT.T1 \n" + + "ON TRUE"; + + sql = "select F_0||F_1, * from (" + sql + ")"; + + + final CalciteAssert.AssertThat ca = CalciteAssert.that() + .with(CalciteConnectionProperty.LEX, Lex.JAVA) + .withSchema("ROOT", rootSchema) + .withDefaultSchema("ROOT"); + + final CalciteAssert.AssertQuery query = ca.query(sql); + query.withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) pl -> { + pl.removeRule(EnumerableRules.ENUMERABLE_CORRELATE_RULE); + pl.addRule(EnumerableRules.ENUMERABLE_BATCH_NESTED_LOOP_JOIN_RULE); + }); + + try { + query.returns(rs -> { + try { + assertTrue(rs.next()); + assertEquals(101 + 100 + 1, rs.getMetaData().getColumnCount()); + long row = 0; + do { + ++row; + for (int i = 1; i <= rs.getMetaData().getColumnCount(); ++i) { + // Rows have the format: v0v1, v0, v1, v2, ..., v99, v0, v1, v2, ..., v99, v100 + if (i == 1) { + assertEquals("v0v1", rs.getString(i), + "Error at row: " + row + ", column: " + i); + } else if (i == rs.getMetaData().getColumnCount()) { + assertEquals("v100", rs.getString(i), + "Error at row: " + row + ", column: " + i); + } else { + assertEquals("v" + ((i - 2) % 100), rs.getString(i), + "Error at row: " + row + ", column: " + i); + } + } + } while (rs.next()); + assertEquals(4, row); + } catch (SQLException e) { + throw new RuntimeException(e); + } + }); + } catch (RuntimeException ex) { + throw (SQLException) ex.getCause(); + } + } +}