This is an automated email from the ASF dual-hosted git repository.

rubenql pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 1e424a0d4e [CALCITE-3094] Code of method grows beyond 64 KB when 
joining two tables with many fields
1e424a0d4e is described below

commit 1e424a0d4eeec69e4da0a504cf080cf7a847704d
Author: James Duong <james.du...@improving.com>
AuthorDate: Fri Jun 14 16:25:16 2024 -0700

    [CALCITE-3094] Code of method grows beyond 64 KB when joining two tables 
with many fields
    
    Change code generation such that when implementing joins across many fields 
up to a certain
    threshold, populate the output array using System.arraycopy() instead of 
explicitly
    instantiating an array with a large number of elements.
---
 .../calcite/adapter/enumerable/EnumUtils.java      |  72 ++++++++
 .../calcite/adapter/enumerable/JavaRowFormat.java  |  55 +++++++
 .../java/org/apache/calcite/interpreter/Row.java   |   2 +
 .../org/apache/calcite/util/BuiltInMethod.java     |   3 +
 .../calcite/test/LargeGeneratedJoinTest.java       | 182 +++++++++++++++++++++
 5 files changed, 314 insertions(+)

diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
index 7382beeeec..06043a567c 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
@@ -22,6 +22,7 @@ import org.apache.calcite.linq4j.AbstractEnumerable;
 import org.apache.calcite.linq4j.Enumerable;
 import org.apache.calcite.linq4j.Enumerator;
 import org.apache.calcite.linq4j.JoinType;
+import org.apache.calcite.linq4j.Nullness;
 import org.apache.calcite.linq4j.Ord;
 import org.apache.calcite.linq4j.function.Function1;
 import org.apache.calcite.linq4j.function.Function2;
@@ -30,12 +31,14 @@ import org.apache.calcite.linq4j.tree.BlockBuilder;
 import org.apache.calcite.linq4j.tree.BlockStatement;
 import org.apache.calcite.linq4j.tree.ConstantExpression;
 import org.apache.calcite.linq4j.tree.ConstantUntypedNull;
+import org.apache.calcite.linq4j.tree.DeclarationStatement;
 import org.apache.calcite.linq4j.tree.Expression;
 import org.apache.calcite.linq4j.tree.ExpressionType;
 import org.apache.calcite.linq4j.tree.Expressions;
 import org.apache.calcite.linq4j.tree.FunctionExpression;
 import org.apache.calcite.linq4j.tree.MethodCallExpression;
 import org.apache.calcite.linq4j.tree.MethodDeclaration;
+import org.apache.calcite.linq4j.tree.NewArrayExpression;
 import org.apache.calcite.linq4j.tree.ParameterExpression;
 import org.apache.calcite.linq4j.tree.Primitive;
 import org.apache.calcite.linq4j.tree.Types;
@@ -61,6 +64,7 @@ import com.google.common.collect.ImmutableMap;
 
 import org.checkerframework.checker.nullness.qual.Nullable;
 
+import java.lang.reflect.Array;
 import java.lang.reflect.Method;
 import java.lang.reflect.Modifier;
 import java.lang.reflect.Type;
@@ -160,6 +164,34 @@ public class EnumUtils {
     // Generate all fields.
     final List<Expression> expressions = new ArrayList<>();
     final int outputFieldCount = physType.getRowType().getFieldCount();
+
+    // If there are many output fields, create the output dynamically so that 
the code size stays
+    // below the limit. See CALCITE-3094.
+    final boolean generateCompactCode = outputFieldCount >= 100;
+    final ParameterExpression compactOutputVar;
+    final BlockBuilder compactCode = new BlockBuilder();
+    if (generateCompactCode) {
+      Class<?> fieldClass = physType.fieldClass(0);
+      // If all fields have the same type, use the specific type. Otherwise 
just use Object.
+      for (int fieldIndex = 1; fieldIndex < outputFieldCount; ++fieldIndex) {
+        if (fieldClass != physType.fieldClass(fieldIndex)) {
+          fieldClass = Object.class;
+          break;
+        }
+      }
+
+      final Class<?> arrayClass = Array.newInstance(fieldClass, 0).getClass();
+      compactOutputVar = Expressions.variable(arrayClass, "outputArray");
+      final DeclarationStatement exp =
+          Expressions.declare(
+              0, compactOutputVar, new NewArrayExpression(fieldClass, 1,
+              Expressions.constant(outputFieldCount), null));
+      compactCode.add(exp);
+    } else {
+      compactOutputVar = null;
+    }
+
+    int outputField = 0;
     for (Ord<PhysType> ord : Ord.zip(inputPhysTypes)) {
       final PhysType inputPhysType =
           ord.e.makeNullable(joinType.generatesNullsOn(ord.i));
@@ -175,6 +207,18 @@ public class EnumUtils {
         break;
       }
       final int fieldCount = inputPhysType.getRowType().getFieldCount();
+      if (generateCompactCode) {
+        // use an array copy if possible
+        final Expression copyExpr =
+            Nullness.castNonNull(
+                inputPhysType.getFormat().copy(parameter, 
Nullness.castNonNull(compactOutputVar),
+                outputField, fieldCount));
+        compactCode.add(Expressions.statement(copyExpr));
+        outputField += fieldCount;
+        continue;
+      }
+
+      // otherwise access the fields individually
       for (int i = 0; i < fieldCount; i++) {
         Expression expression =
             inputPhysType.fieldReference(parameter, i,
@@ -189,6 +233,34 @@ public class EnumUtils {
         expressions.add(expression);
       }
     }
+
+    if (generateCompactCode) {
+      compactCode.add(Nullness.castNonNull(compactOutputVar));
+
+      // This expression generates code of the form:
+      // new org.apache.calcite.linq4j.function.Function2() {
+      //   public String[] apply(org.apache.calcite.interpreter.Row left,
+      //       org.apache.calcite.interpreter.Row right) {
+      //     String[] outputArray = new String[left.length + right.length];
+      //     System.arraycopy(left.copyValues(), 0, outputArray, 0, 
left.length);
+      //     System.arraycopy(right.copyValues(), 0, outputArray, left.length, 
right.length);
+      //     return outputArray;
+      //   }
+      //   public String[] apply(Object left, Object right) {
+      //     return apply(
+      //         (org.apache.calcite.interpreter.Row) left,
+      //         (org.apache.calcite.interpreter.Row) right);
+      //   }
+      // }
+      // That is, it converts the left and right Row objects to Object[] using 
Row#copyValues()
+      // then writes each to an output Object[] using System.arraycopy()
+
+      return Expressions.lambda(
+          Function2.class,
+          compactCode.toBlock(),
+          parameters);
+    }
+
     return Expressions.lambda(
         Function2.class,
         physType.record(expressions),
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
index 4810b04dea..40c09cbc86 100644
--- 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
+++ 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
@@ -23,6 +23,7 @@ import org.apache.calcite.linq4j.tree.Expressions;
 import org.apache.calcite.linq4j.tree.IndexExpression;
 import org.apache.calcite.linq4j.tree.MemberExpression;
 import org.apache.calcite.linq4j.tree.MethodCallExpression;
+import org.apache.calcite.linq4j.tree.ParameterExpression;
 import org.apache.calcite.linq4j.tree.Types;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.runtime.FlatLists;
@@ -35,6 +36,9 @@ import org.checkerframework.checker.nullness.qual.Nullable;
 import java.lang.reflect.Type;
 import java.util.List;
 
+import static org.apache.calcite.util.BuiltInMethod.ARRAY_COPY;
+import static org.apache.calcite.util.BuiltInMethod.ROW_COPY_VALUES;
+
 /**
  * How a row is represented as a Java value.
  */
@@ -225,6 +229,11 @@ public enum JavaRowFormat {
       }
       return EnumUtils.convert(e, fromType, fieldType);
     }
+
+    @Override public Expression fieldDynamic(Expression expression, Expression 
field) {
+      return Expressions.call(expression,
+          BuiltInMethod.ROW_VALUE.method, Expressions.constant(field));
+    }
   },
 
   ARRAY {
@@ -256,6 +265,23 @@ public enum JavaRowFormat {
       }
       return EnumUtils.convert(e, fromType, fieldType);
     }
+
+    @Override public Expression fieldDynamic(Expression expression, Expression 
field) {
+      return Expressions.arrayIndex(expression, field);
+    }
+
+    @Override public Expression setFieldDynamic(Expression expression, 
Expression field,
+        Expression value) {
+      final IndexExpression e =
+          Expressions.arrayIndex(expression, Expressions.constant(field));
+      return Expressions.assign(e, value);
+    }
+
+    @Override public @Nullable Expression copy(ParameterExpression parameter,
+        ParameterExpression outputArray, int outputStartIndex, int length) {
+      return Expressions.call(ARRAY_COPY.method, parameter, 
Expressions.constant(0),
+          outputArray, Expressions.constant(outputStartIndex), 
Expressions.constant(length));
+    }
   };
 
   public JavaRowFormat optimize(RelDataType rowType) {
@@ -301,4 +327,33 @@ public enum JavaRowFormat {
    */
   public abstract Expression field(Expression expression, int field,
       @Nullable Type fromType, Type fieldType);
+
+  /**
+   * Similar to {@link #field(Expression, int, Type, Type)}, where the field 
index is determined
+   * dynamically at runtime.
+   */
+  public Expression fieldDynamic(Expression expression, Expression field) {
+    throw new UnsupportedOperationException(this.toString());
+  }
+
+  public Expression setFieldDynamic(Expression expression, Expression field, 
Expression value) {
+    throw new UnsupportedOperationException(this.toString());
+  }
+
+  /**
+   * Returns an expression that copies the fields of a row of this type to the 
array.
+   */
+  public @Nullable Expression copy(ParameterExpression parameter,
+      ParameterExpression outputArray, int outputStartIndex, int length) {
+    // Note: parameter holds an expression representing a 
org.apache.calcite.interpreter.Row.
+
+    // Copy the Row as an Object[].
+    final Expression rowParameterAsArrayExpression =
+        Expressions.call(Object[].class, parameter, ROW_COPY_VALUES.method);
+
+    // Use System.arraycopy() with the contents of the Row as the source.
+    return Expressions.call(ARRAY_COPY.method, rowParameterAsArrayExpression,
+        Expressions.constant(0), outputArray, 
Expressions.constant(outputStartIndex),
+        Expressions.constant(length));
+  }
 }
diff --git a/core/src/main/java/org/apache/calcite/interpreter/Row.java 
b/core/src/main/java/org/apache/calcite/interpreter/Row.java
index 1d35308c7b..073851d3b7 100644
--- a/core/src/main/java/org/apache/calcite/interpreter/Row.java
+++ b/core/src/main/java/org/apache/calcite/interpreter/Row.java
@@ -86,6 +86,8 @@ public class Row {
   }
 
   /** Returns a copy of the values. */
+  // Note: This implements BuiltInMethod.ROW_COPY_VALUES.
+  @SuppressWarnings("unused")
   public @Nullable Object[] copyValues() {
     return values.clone();
   }
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java 
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 7b144167da..0e0a3e5390 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -183,6 +183,7 @@ public enum BuiltInMethod {
   JDBC_SCHEMA_DATA_SOURCE(JdbcSchema.class, "getDataSource"),
   ROW_VALUE(Row.class, "getObject", int.class),
   ROW_AS_COPY(Row.class, "asCopy", Object[].class),
+  ROW_COPY_VALUES(Row.class, "copyValues"), // This is an instance method that 
returns an Object[].
   RESULT_SET_ENUMERABLE_SET_TIMEOUT(ResultSetEnumerable.class, "setTimeout",
       DataContext.class),
   RESULT_SET_ENUMERABLE_OF(ResultSetEnumerable.class, "of", DataSource.class,
@@ -272,6 +273,8 @@ public enum BuiltInMethod {
   FUNCTION1_APPLY(Function1.class, "apply", Object.class),
   ARRAYS_AS_LIST(Arrays.class, "asList", Object[].class),
   ARRAY(SqlFunctions.class, "array", Object[].class),
+  ARRAY_COPY(System.class, "arraycopy", Object.class, int.class, Object.class, 
int.class,
+      int.class),
   // class PairList.Helper is deprecated to discourage code from calling its
   // methods directly, but use via Janino code generation is just fine.
   @SuppressWarnings("deprecation")
diff --git 
a/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java 
b/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java
new file mode 100644
index 0000000000..b0365d1aff
--- /dev/null
+++ b/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.test;
+
+import org.apache.calcite.adapter.enumerable.EnumerableRules;
+import org.apache.calcite.adapter.java.AbstractQueryableTable;
+import org.apache.calcite.config.CalciteConnectionProperty;
+import org.apache.calcite.config.Lex;
+import org.apache.calcite.interpreter.Row;
+import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.Linq4j;
+import org.apache.calcite.linq4j.QueryProvider;
+import org.apache.calcite.linq4j.Queryable;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.schema.QueryableTable;
+import org.apache.calcite.schema.Schema;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.schema.Table;
+import org.apache.calcite.schema.impl.AbstractSchema;
+import org.apache.calcite.sql.parser.SqlParseException;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.tools.RelConversionException;
+import org.apache.calcite.tools.ValidationException;
+
+import com.google.common.collect.ImmutableMap;
+
+import org.junit.jupiter.api.Test;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-3094";>[CALCITE-3094]
+ * Code of method grows beyond 64 KB when joining two tables with many 
fields</a>.
+ */
+public class LargeGeneratedJoinTest {
+
+  /**
+   * Marker interface for Field.
+   */
+  interface FieldT extends BiConsumer<RelDataTypeFactory, 
RelDataTypeFactory.Builder> {
+  }
+
+  /**
+   * Marker interface for Row.
+   */
+  interface RowT extends Function<RelDataTypeFactory, RelDataType> {
+  }
+
+  static FieldT field(String name) {
+    return (tf, b) -> b.add(name, SqlTypeName.VARCHAR);
+  }
+
+  static RowT row(FieldT... fields) {
+    return tf -> {
+      RelDataTypeFactory.Builder builder = tf.builder();
+      for (FieldT f : fields) {
+        f.accept(tf, builder);
+      }
+      return builder.build();
+    };
+  }
+
+  private static QueryableTable tab(int fieldCount) {
+    List<Row> lRow = new ArrayList<>();
+    for (int r = 0; r < 2; r++) {
+      Object[] current = new Object[fieldCount];
+      for (int i = 0; i < fieldCount; i++) {
+        current[i] = "v" + i;
+      }
+      lRow.add(Row.of(current));
+    }
+
+    List<FieldT> fields = new ArrayList<>();
+    for (int i = 0; i < fieldCount; i++) {
+      fields.add(field("F_" + i));
+    }
+
+    final Enumerable<?> enumerable = Linq4j.asEnumerable(lRow);
+    return new AbstractQueryableTable(Row.class) {
+
+      @Override public RelDataType getRowType(RelDataTypeFactory typeFactory) {
+        return row(fields.toArray(new FieldT[fieldCount])).apply(typeFactory);
+      }
+
+      @Override public <T> Queryable<T> asQueryable(QueryProvider 
queryProvider, SchemaPlus schema,
+          String tableName) {
+        return (Queryable<T>) enumerable.asQueryable();
+      }
+    };
+  }
+
+  @Test public void test() throws SqlParseException, RelConversionException, 
ValidationException,
+      SQLException {
+    Schema rootSchema = new AbstractSchema() {
+      @Override protected Map<String, Table> getTableMap() {
+        return ImmutableMap.of("T0", tab(100),
+            "T1", tab(101));
+      }
+    };
+
+    final CalciteSchema sp = CalciteSchema.createRootSchema(false, true);
+    sp.add("ROOT", rootSchema);
+
+    String sql = "SELECT * \n"
+        + "FROM ROOT.T0 \n"
+        + "JOIN ROOT.T1 \n"
+        + "ON TRUE";
+
+    sql = "select F_0||F_1, * from (" + sql + ")";
+
+
+    final CalciteAssert.AssertThat ca = CalciteAssert.that()
+        .with(CalciteConnectionProperty.LEX, Lex.JAVA)
+        .withSchema("ROOT", rootSchema)
+        .withDefaultSchema("ROOT");
+
+    final CalciteAssert.AssertQuery query = ca.query(sql);
+    query.withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) pl -> {
+      pl.removeRule(EnumerableRules.ENUMERABLE_CORRELATE_RULE);
+      pl.addRule(EnumerableRules.ENUMERABLE_BATCH_NESTED_LOOP_JOIN_RULE);
+    });
+
+    try {
+      query.returns(rs -> {
+        try {
+          assertTrue(rs.next());
+          assertEquals(101 + 100 + 1, rs.getMetaData().getColumnCount());
+          long row = 0;
+          do {
+            ++row;
+            for (int i = 1; i <= rs.getMetaData().getColumnCount(); ++i) {
+              // Rows have the format: v0v1, v0, v1, v2, ..., v99, v0, v1, v2, 
..., v99, v100
+              if (i == 1) {
+                assertEquals("v0v1", rs.getString(i),
+                    "Error at row: " + row + ", column: " + i);
+              } else if (i == rs.getMetaData().getColumnCount()) {
+                assertEquals("v100", rs.getString(i),
+                    "Error at row: " + row + ", column: " + i);
+              } else {
+                assertEquals("v" + ((i - 2) % 100), rs.getString(i),
+                    "Error at row: " + row + ", column: " + i);
+              }
+            }
+          } while (rs.next());
+          assertEquals(4, row);
+        } catch (SQLException e) {
+          throw new RuntimeException(e);
+        }
+      });
+    } catch (RuntimeException ex) {
+      throw (SQLException) ex.getCause();
+    }
+  }
+}

Reply via email to