(calcite) branch main updated: [CALCITE-7422] Support large plan optimization mode for HepPlanner

asolimando Thu, 26 Mar 2026 06:55:12 -0700

This is an automated email from the ASF dual-hosted git repository.

asolimando pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git



The following commit(s) were added to refs/heads/main by this push:
     new f1f9a2b1e3 [CALCITE-7422] Support large plan optimization mode for 
HepPlanner
f1f9a2b1e3 is described below

commit f1f9a2b1e3c25188c3493068bd768aadfad29fd8
Author: wenzhuang.zwz <[email protected]>
AuthorDate: Tue Feb 24 18:46:25 2026 +0800

    [CALCITE-7422] Support large plan optimization mode for HepPlanner
    
    Key optimizations of large plan mode:
    
    1. Reusable graph, avoid reinit.
    2. Efficient traversal, skip stable subtree.
    3. Fine-grained GC.
    
    Usage: see comments of HepPlanner()
    
    Perf result of LargePlanBenchmark:
    Match Order     Union Num  Node Count      Rule Transforms Time (ms)
    --------------------------------------------------------------------
    ARBITRARY       1000       4000            6006            1043
    ARBITRARY       3000       12000           18006           1306
    ARBITRARY       10000      40000           60006           3655
    ARBITRARY       30000      120000          180006          13040
    DEPTH_FIRST     1000       4000            6006            347
    DEPTH_FIRST     3000       12000           18006           1068
    DEPTH_FIRST     10000      40000           60006           4165
    DEPTH_FIRST     30000      120000          180006          12898
    BOTTOM_UP       1000       4000            6006            1145
    BOTTOM_UP       3000       12000           18006           10152
    TOP_DOWN        1000       4000            6006            1193
    TOP_DOWN        3000       12000           18006           8074
---
 build.gradle.kts                                   |  42 ++
 .../calcite/config/CalciteSystemProperty.java      |   8 +
 .../apache/calcite/plan/AbstractRelOptPlanner.java |  41 +-
 .../org/apache/calcite/plan/hep/HepPlanner.java    | 284 +++++++++++--
 .../apache/calcite/plan/hep/HepVertexIterator.java |  92 ++++
 .../org/apache/calcite/test/HepPlannerTest.java    |  96 ++++-
 .../calcite/benchmarks/LargePlanBenchmark.java     | 465 +++++++++++++++++++--
 7 files changed, 964 insertions(+), 64 deletions(-)

diff --git a/build.gradle.kts b/build.gradle.kts
index 33ff8905c8..63ac833db6 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -90,6 +90,28 @@ fun reportsForHumans() = 
!(System.getenv()["CI"]?.toBoolean() ?: false)
 // Inherited from stage-vote-release-plugin: skipSign, useGpgCmd
 // Inherited from gradle-extensions-plugin: slowSuiteLogThreshold=0L, 
slowTestLogThreshold=2000L
 
+val hepLargePlanModeTestIncludes = mapOf(
+    ":core" to listOf(
+        "**/org/apache/calcite/test/HepPlannerTest.class",
+        "**/org/apache/calcite/test/RelOptRulesTest.class",
+        "**/org/apache/calcite/test/RelMetadataTest.class",
+        "**/org/apache/calcite/sql2rel/RelFieldTrimmerTest.class",
+        "**/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.class",
+        "**/org/apache/calcite/test/SqlToRelConverterTest.class",
+        "**/org/apache/calcite/test/SqlHintsConverterTest.class",
+        "**/org/apache/calcite/test/InterpreterTest.class",
+        
"**/org/apache/calcite/test/MaterializedViewSubstitutionVisitorTest.class",
+        "**/org/apache/calcite/test/RuleMatchVisualizerTest.class",
+        "**/org/apache/calcite/test/enumerable/EnumerableJoinTest.class",
+        "**/org/apache/calcite/test/enumerable/EnumerableHashJoinTest.class",
+        "**/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.class"
+    ),
+    ":plus" to listOf(
+        "**/org/apache/calcite/adapter/tpch/TpchTest.class",
+        "**/org/apache/calcite/sql2rel/TpcdsSqlToRelTest.class"
+    )
+)
+
 // Java versions prior to 1.8.0u202 have known issues that cause invalid 
bytecode in certain patterns
 // of annotation usage.
 // So we require at least 1.8.0u202
@@ -137,6 +159,11 @@ fun reportsForHumans() = 
!(System.getenv()["CI"]?.toBoolean() ?: false)
 
 println("Building Apache Calcite $buildVersion")
 
+val testHepLargePlanMode by tasks.registering() {
+    group = LifecycleBasePlugin.VERIFICATION_GROUP
+    description = "Runs HepPlanner regression tests with large-plan mode 
enabled by default."
+}
+
 releaseArtifacts {
     fromProject(":release")
 }
@@ -906,6 +933,21 @@ fun passProperty(name: String, default: String? = null) {
                 }
                 jvmArgs("-Xmx6g")
             }
+            hepLargePlanModeTestIncludes[project.path]?.let { includes ->
+                val hepLargePlanModeTask = 
register<Test>("testHepLargePlanMode") {
+                    group = LifecycleBasePlugin.VERIFICATION_GROUP
+                    description =
+                        "Runs HepPlanner-heavy tests with 
calcite.hep.large.plan.mode=true."
+                    testClassesDirs = sourceSets["test"].output.classesDirs
+                    classpath = sourceSets["test"].runtimeClasspath
+                    include(includes)
+                    systemProperty("calcite.hep.large.plan.mode", "true")
+                    shouldRunAfter("test")
+                }
+                rootProject.tasks.named("testHepLargePlanMode") {
+                    dependsOn(hepLargePlanModeTask)
+                }
+            }
             configureEach<SpotBugsTask> {
                 group = LifecycleBasePlugin.VERIFICATION_GROUP
                 if (enableSpotBugs) {
diff --git 
a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java 
b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
index 70db4505d6..f38d405e34 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
@@ -138,6 +138,14 @@ public final class CalciteSystemProperty<T> {
   public static final CalciteSystemProperty<Boolean> TOPDOWN_OPT =
       booleanProperty("calcite.planner.topdown.opt", false);
 
+  /** Whether {@link org.apache.calcite.plan.hep.HepPlanner} should enable
+   * large-plan mode by default.
+   *
+   * <p>This property only affects planners that do not call
+   * {@code setLargePlanMode} explicitly. */
+  public static final CalciteSystemProperty<Boolean> 
HEP_PLANNER_LARGE_PLAN_MODE =
+      booleanProperty("calcite.hep.large.plan.mode", false);
+
 
   /** Whether to disable generate rel data type digest string.
    *
diff --git 
a/core/src/main/java/org/apache/calcite/plan/AbstractRelOptPlanner.java 
b/core/src/main/java/org/apache/calcite/plan/AbstractRelOptPlanner.java
index 6cb7c4fed4..654f256893 100644
--- a/core/src/main/java/org/apache/calcite/plan/AbstractRelOptPlanner.java
+++ b/core/src/main/java/org/apache/calcite/plan/AbstractRelOptPlanner.java
@@ -28,6 +28,7 @@
 import org.apache.calcite.util.trace.CalciteTrace;
 
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 
 import org.checkerframework.checker.initialization.qual.UnknownInitialization;
 import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
@@ -119,6 +120,17 @@ protected AbstractRelOptPlanner(RelOptCostFactory 
costFactory,
     addListener(new RuleEventLogger());
   }
 
+  /**
+   * Explicitly enables rule attempts tracking regardless of log level.
+   * This is useful for benchmarks and testing.
+   */
+  public void enableRuleAttemptsTracking() {
+    if (this.ruleAttemptsListener == null) {
+      this.ruleAttemptsListener = new RuleAttemptsListener();
+      addListener(this.ruleAttemptsListener);
+    }
+  }
+
   //~ Methods ----------------------------------------------------------------
 
   @Override public void clear() {}
@@ -308,13 +320,31 @@ protected void onNewClass(RelNode node) {
     // do nothing
   }
 
-  protected void dumpRuleAttemptsInfo() {
+  public void dumpRuleAttemptsInfo() {
     if (this.ruleAttemptsListener != null) {
       RULE_ATTEMPTS_LOGGER.debug("Rule Attempts Info for " + 
this.getClass().getSimpleName());
       RULE_ATTEMPTS_LOGGER.debug(this.ruleAttemptsListener.dump());
     }
   }
 
+  /**
+   * Returns the rule attempts information as a map.
+   * The map key is the rule string representation,
+   * and the value is a Pair of (attemptCount, totalTimeMicros).
+   *
+   * <p>This is useful for programmatic access to rule execution statistics,
+   * e.g., in benchmarks to verify that rules have been applied.
+   *
+   * @return Map of rule to the Pair of (attempt count, total time in 
microseconds),
+   *         or empty map if rule attempts tracking is not enabled
+   */
+  public Map<String, Pair<Long, Long>> getRuleAttemptsInfo() {
+    if (this.ruleAttemptsListener == null) {
+      return ImmutableMap.of();
+    }
+    return this.ruleAttemptsListener.getRuleAttempts();
+  }
+
   /**
    * Fires a rule, taking care of tracing and listener notification.
    *
@@ -488,6 +518,15 @@ private static class RuleAttemptsListener implements 
RelOptListener {
     @Override public void relChosen(RelChosenEvent event) {
     }
 
+    /**
+     * Returns a copy of the rule attempts map.
+     * The map key is the rule string representation,
+     * and the value is a Pair of (attemptCount, totalTimeMicros).
+     */
+    public Map<String, Pair<Long, Long>> getRuleAttempts() {
+      return ImmutableMap.copyOf(this.ruleAttempts);
+    }
+
     public String dump() {
       // Sort rules by number of attempts descending, then by rule elapsed 
time descending,
       // then by rule name ascending.
diff --git a/core/src/main/java/org/apache/calcite/plan/hep/HepPlanner.java 
b/core/src/main/java/org/apache/calcite/plan/hep/HepPlanner.java
index c5f8b5610f..d2a39d4a43 100644
--- a/core/src/main/java/org/apache/calcite/plan/hep/HepPlanner.java
+++ b/core/src/main/java/org/apache/calcite/plan/hep/HepPlanner.java
@@ -16,6 +16,7 @@
  */
 package org.apache.calcite.plan.hep;
 
+import org.apache.calcite.config.CalciteSystemProperty;
 import org.apache.calcite.linq4j.function.Function2;
 import org.apache.calcite.linq4j.function.Functions;
 import org.apache.calcite.plan.AbstractRelOptPlanner;
@@ -40,6 +41,7 @@
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.ImmutableIntList;
 import org.apache.calcite.util.Pair;
 import org.apache.calcite.util.Util;
 import org.apache.calcite.util.graph.BreadthFirstIterator;
@@ -54,21 +56,23 @@
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
 
 import org.checkerframework.checker.nullness.qual.Nullable;
 
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Deque;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.IdentityHashMap;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Queue;
 import java.util.Set;
-import java.util.stream.Collectors;
 
 import static com.google.common.base.Preconditions.checkArgument;
 
@@ -125,7 +129,7 @@ public class HepPlanner extends AbstractRelOptPlanner {
    *
    * <p>Value: the set of {@link RelOptRule}s already fired for that exact ID 
list.
    */
-  private final Multimap<List<Integer>, RelOptRule> firedRulesCache = 
HashMultimap.create();
+  private final Multimap<ImmutableIntList, RelOptRule> firedRulesCache = 
HashMultimap.create();
 
   /**
    * Reverse index for {@link #firedRulesCache}, used for cleanup/GC:
@@ -136,11 +140,17 @@ public class HepPlanner extends AbstractRelOptPlanner {
    *
    * <p>Value: match-key ID lists in {@link #firedRulesCache} that contain the 
key ID.
    */
-  private final Multimap<Integer, List<Integer>> firedRulesCacheIndex = 
HashMultimap.create();
-
+  private final Multimap<Integer, ImmutableIntList> firedRulesCacheIndex = 
HashMultimap.create();
 
   private boolean enableFiredRulesCache = false;
 
+  /** Enables optimizations for large plans.
+   * This optimization improves performance for plans of any scale.
+   * To be removed in the future; the optimization will become the default 
behavior.
+   */
+  private boolean largePlanMode = false;
+
+
   //~ Constructors -----------------------------------------------------------
 
   /**
@@ -181,12 +191,55 @@ public HepPlanner(
     this.mainProgram = requireNonNull(program, "program");
     this.onCopyHook = Util.first(onCopyHook, Functions.ignore2());
     this.noDag = noDag;
+    this.largePlanMode = 
CalciteSystemProperty.HEP_PLANNER_LARGE_PLAN_MODE.value();
+  }
+
+  /**
+   * Create a new {@code HepPlanner} capable of executing multiple HepPrograms
+   * with (noDag = false, isLargePlanMode = true, enableFiredRulesCache = 
true).
+   *
+   * <p>Unlike planners that require setRoot for every optimization pass,
+   * this planner preserves the internal graph structure and optimized plan 
across
+   * successive executions. This allows for multiphase optimization where the
+   * output of one {@link HepProgram} serves as the immediate starting point 
for the next.
+   *
+   * <p><b>Usage Example:</b>
+   * <pre>{@code
+   *   HepPlanner planner = new HepPlanner();
+   *   // or use other constructor and set 
isLargePlanMode/enableFiredRulesCache = true
+   *   // HepPlanner planner = new HepPlanner(new HepProgramBuilder().build(), 
...);
+   *   // planner.setEnableFiredRulesCache(true);
+   *   // planner.setLargePlanMode(true);
+   *   planner.setRoot(initPlanRoot);
+   *   planner.executeProgram(phase1Program);
+   *   planner.dumpRuleAttemptsInfo(); // optional
+   *   planner.clearRules(); // clear the rules and rule match caches, the 
graph is preserved
+   *   // other logic ...
+   *   planner.executeProgram(phase2Program);
+   *   planner.clearRules();
+   *   ...
+   *   RelNode optimized = planner.buildFinalPlan();
+   * }</pre>
+   *
+   * @see #setRoot(RelNode)
+   * @see #executeProgram(HepProgram)
+   * @see #dumpRuleAttemptsInfo()
+   * @see #clearRules()
+   * @see #buildFinalPlan()
+   */
+  public HepPlanner() {
+    this(HepProgram.builder().build(), null, false, null, 
RelOptCostImpl.FACTORY);
+    this.largePlanMode = true;
+    this.enableFiredRulesCache = true;
   }
 
   //~ Methods ----------------------------------------------------------------
 
   @Override public void setRoot(RelNode rel) {
-    root = addRelToGraph(rel);
+    // initRelToVertexCache is used to quickly skip common nodes before 
traversing its inputs
+    IdentityHashMap<RelNode, HepRelVertex> initRelToVertexCache = 
(isLargePlanMode() && !noDag)
+        ? new IdentityHashMap<>() : null;
+    root = addRelToGraph(rel, initRelToVertexCache);
     dumpGraph();
   }
 
@@ -196,14 +249,30 @@ public HepPlanner(
 
   @Override public void clear() {
     super.clear();
+    this.materializations.clear();
+    clearRules();
+  }
+
+  /** Clears the rules and rule match caches while preserving the internal 
graph
+   * structure. This is useful for multiphase optimization where the graph 
should
+   * be reused across successive {@link HepProgram} executions.
+   */
+  public void clearRules() {
     for (RelOptRule rule : getRules()) {
       removeRule(rule);
     }
-    this.materializations.clear();
     this.firedRulesCache.clear();
     this.firedRulesCacheIndex.clear();
   }
 
+  public boolean isLargePlanMode() {
+    return largePlanMode;
+  }
+
+  public void setLargePlanMode(final boolean largePlanMode) {
+    this.largePlanMode = largePlanMode;
+  }
+
   @Override public RelNode changeTraits(RelNode rel, RelTraitSet toTraits) {
     // Ignore traits, except for the root, where we remember
     // what the final conversion should be.
@@ -224,6 +293,10 @@ public HepPlanner(
     return buildFinalPlan(requireNonNull(root, "'root' must not be null"));
   }
 
+  public RelNode buildFinalPlan() {
+    return buildFinalPlan(requireNonNull(root, "'root' must not be null"));
+  }
+
   /**
    * Enables or disables the fire-rule cache.
    *
@@ -237,7 +310,7 @@ public void setEnableFiredRulesCache(boolean enable) {
 
   /** Top-level entry point for a program. Initializes state and then invokes
    * the program. */
-  private void executeProgram(HepProgram program) {
+  public void executeProgram(HepProgram program) {
     final HepInstruction.PrepareContext px =
         HepInstruction.PrepareContext.create(this);
     final HepState state = program.prepare(px);
@@ -249,7 +322,7 @@ void executeProgram(HepProgram instruction, 
HepProgram.State state) {
     state.instructionStates.forEach(instructionState -> {
       instructionState.execute();
       int delta = nTransformations - nTransformationsLastGC;
-      if (delta > graphSizeLastGC) {
+      if (!isLargePlanMode() && delta > graphSizeLastGC) {
         // The number of transformations performed since the last
         // garbage collection is greater than the number of vertices in
         // the graph at that time.  That means there should be a
@@ -444,13 +517,20 @@ private void applyRules(HepProgram.State programState,
     final boolean fullRestartAfterTransformation =
         programState.matchOrder != HepMatchOrder.ARBITRARY
             && programState.matchOrder != HepMatchOrder.DEPTH_FIRST;
+    final boolean useHepVertexIterator = (programState.matchOrder == 
HepMatchOrder.ARBITRARY
+        || programState.matchOrder == HepMatchOrder.DEPTH_FIRST) && 
isLargePlanMode();
 
     int nMatches = 0;
 
     boolean fixedPoint;
     do {
-      Iterator<HepRelVertex> iter =
-          getGraphIterator(programState, requireNonNull(root, "root"));
+      Iterator<HepRelVertex> iter;
+      if (!useHepVertexIterator) {
+        iter = getGraphIterator(programState, requireNonNull(root, "root"));
+      } else {
+        iter = HepVertexIterator.of(requireNonNull(root, "root"), new 
HashSet<>()).iterator();
+      }
+
       fixedPoint = true;
       while (iter.hasNext()) {
         HepRelVertex vertex = iter.next();
@@ -470,7 +550,21 @@ private void applyRules(HepProgram.State programState,
             // To the extent possible, pick up where we left
             // off; have to create a new iterator because old
             // one was invalidated by transformation.
-            iter = getGraphIterator(programState, newVertex);
+            if (!useHepVertexIterator) {
+              iter = getGraphIterator(programState, newVertex);
+            } else {
+              // Continue from newVertex and keep previous iterator status.
+              // It prevents revisiting the large plan's stable subgraph from 
root.
+              // A stable subgraph is a part of the DAG to which no rules will 
be applied.
+              // For a plan like this, every node replacement in subgraph2 may 
reset the iterator
+              // to the root, so subgraph1, although stable, will be visited 
repeatedly.
+              // root
+              //   <- subgraph1_root (stable)
+              //     <- ... other nodes in subgraph1 (stable)
+              //   <- subgraph2_root
+              //     <- ... other nodes in subgraph2 (with many node 
replacements)
+              iter = ((HepVertexIterator<HepRelVertex>) 
iter).continueFrom(newVertex);
+            }
             if (programState.matchOrder == HepMatchOrder.DEPTH_FIRST) {
               nMatches =
                   depthFirstApply(programState, iter, rules, forceConversions, 
nMatches);
@@ -493,11 +587,18 @@ private Iterator<HepRelVertex> getGraphIterator(
     switch (requireNonNull(programState.matchOrder, 
"programState.matchOrder")) {
     case ARBITRARY:
     case DEPTH_FIRST:
+      if (isLargePlanMode()) {
+        return HepVertexIterator.of(start, new HashSet<>()).iterator();
+      }
       return DepthFirstIterator.of(graph, start).iterator();
     case TOP_DOWN:
     case BOTTOM_UP:
       assert start == root;
-      collectGarbage();
+      if (!isLargePlanMode()) {
+        // NOTE: We do not need to run garbage collection for the whole graph 
here.
+        // tryCleanVertices already cleans up potentially removed vertices.
+        collectGarbage();
+      }
       return TopologicalOrderIterator.of(graph, 
programState.matchOrder).iterator();
     default:
       throw new
@@ -551,6 +652,20 @@ private Iterator<HepRelVertex> getGraphIterator(
       return null;
     }
 
+    // Cache the fired rule before constructing a HepRuleCall.
+    ImmutableIntList relIds = null;
+    if (enableFiredRulesCache) {
+      int[] ids = new int[bindings.size()];
+      for (int i = 0; i < bindings.size(); i++) {
+        ids[i] = bindings.get(i).getId();
+      }
+      relIds = ImmutableIntList.of(ids);
+      Collection<RelOptRule> rules = firedRulesCache.get(relIds);
+      if (rules.contains(rule)) {
+        return null;
+      }
+    }
+
     HepRuleCall call =
         new HepRuleCall(
             this,
@@ -559,14 +674,6 @@ private Iterator<HepRelVertex> getGraphIterator(
             nodeChildren,
             parents);
 
-    List<Integer> relIds = null;
-    if (enableFiredRulesCache) {
-      relIds = 
call.getRelList().stream().map(RelNode::getId).collect(Collectors.toList());
-      if (firedRulesCache.get(relIds).contains(rule)) {
-        return null;
-      }
-    }
-
     // Allow the rule to apply its own side-conditions.
     if (!rule.matches(call)) {
       return null;
@@ -576,8 +683,8 @@ private Iterator<HepRelVertex> getGraphIterator(
 
     if (relIds != null) {
       firedRulesCache.put(relIds, rule);
-      for (Integer relId : relIds) {
-        firedRulesCacheIndex.put(relId, relIds);
+      for (int i = 0; i < relIds.size(); i++) {
+        firedRulesCacheIndex.put(relIds.getInt(i), relIds);
       }
     }
 
@@ -774,7 +881,9 @@ private HepRelVertex applyTransformationResults(
       parents.add(parent);
     }
 
-    HepRelVertex newVertex = addRelToGraph(bestRel);
+    HepRelVertex newVertex = addRelToGraph(bestRel, null);
+    // LinkedHashSet preserves insertion order during iteration. it is 
debugging-friendly.
+    Set<HepRelVertex> garbageVertexSet = new LinkedHashSet<>();
 
     // There's a chance that newVertex is the same as one
     // of the parents due to common subexpression recognition
@@ -785,10 +894,12 @@ private HepRelVertex applyTransformationResults(
     if (iParentMatch != -1) {
       newVertex = parents.get(iParentMatch);
     } else {
-      contractVertices(newVertex, vertex, parents);
+      contractVertices(newVertex, vertex, parents, garbageVertexSet);
     }
 
-    if (getListener() != null) {
+    if (isLargePlanMode()) {
+      collectGarbage(garbageVertexSet);
+    } else if (getListener() != null) {
       // Assume listener doesn't want to see garbage.
       collectGarbage();
     }
@@ -824,19 +935,26 @@ private HepRelVertex applyTransformationResults(
   }
 
   private HepRelVertex addRelToGraph(
-      RelNode rel) {
+      RelNode rel, @Nullable IdentityHashMap<RelNode, HepRelVertex> 
initRelToVertexCache) {
     // Check if a transformation already produced a reference
     // to an existing vertex.
     if (graph.vertexSet().contains(rel)) {
       return (HepRelVertex) rel;
     }
 
+    // Fast equiv vertex for set root, before add children.
+    if (initRelToVertexCache != null && initRelToVertexCache.containsKey(rel)) 
{
+      HepRelVertex vertex = initRelToVertexCache.get(rel);
+      assert vertex != null;
+      return vertex;
+    }
+
     // Recursively add children, replacing this rel's inputs
     // with corresponding child vertices.
     final List<RelNode> inputs = rel.getInputs();
     final List<RelNode> newInputs = new ArrayList<>();
     for (RelNode input1 : inputs) {
-      HepRelVertex childVertex = addRelToGraph(input1);
+      HepRelVertex childVertex = addRelToGraph(input1, initRelToVertexCache);
       newInputs.add(childVertex);
     }
 
@@ -868,6 +986,10 @@ private HepRelVertex addRelToGraph(
       graph.addEdge(newVertex, (HepRelVertex) input);
     }
 
+    if (initRelToVertexCache != null) {
+      initRelToVertexCache.put(rel, newVertex);
+    }
+
     nTransformations++;
     return newVertex;
   }
@@ -875,7 +997,8 @@ private HepRelVertex addRelToGraph(
   private void contractVertices(
       HepRelVertex preservedVertex,
       HepRelVertex discardedVertex,
-      List<HepRelVertex> parents) {
+      List<HepRelVertex> parents,
+      Set<HepRelVertex> garbageVertexSet) {
     if (preservedVertex == discardedVertex) {
       // Nop.
       return;
@@ -897,6 +1020,18 @@ private void contractVertices(
       }
       clearCache(parent);
       graph.removeEdge(parent, discardedVertex);
+
+      if (!noDag && isLargePlanMode()) {
+        // Recursive merge parent path
+        HepRelVertex addedVertex = 
mapDigestToVertex.get(parentRel.getRelDigest());
+        if (addedVertex != null && addedVertex != parent) {
+          List<HepRelVertex> parentCopy = // contractVertices will change 
predecessorList
+              new ArrayList<>(Graphs.predecessorListOf(graph, parent));
+          contractVertices(addedVertex, parent, parentCopy, garbageVertexSet);
+          continue;
+        }
+      }
+
       graph.addEdge(parent, preservedVertex);
       updateVertex(parent, parentRel);
     }
@@ -904,10 +1039,13 @@ private void contractVertices(
     // NOTE:  we don't actually do graph.removeVertex(discardedVertex),
     // because it might still be reachable from preservedVertex.
     // Leave that job for garbage collection.
+    // If isLargePlanMode is true, we will do fine-grained GC in 
tryCleanVertices
+    // by tracking discarded vertex subtree's inward references.
 
     if (discardedVertex == root) {
       root = preservedVertex;
     }
+    garbageVertexSet.add(discardedVertex);
   }
 
   /**
@@ -992,6 +1130,58 @@ private RelNode buildFinalPlan(HepRelVertex vertex) {
     return rel;
   }
 
+  /** Try to remove discarded vertices recursively. */
+  private void tryCleanVertices(HepRelVertex vertex) {
+    if (vertex == root || !graph.vertexSet().contains(vertex)
+        || !graph.getInwardEdges(vertex).isEmpty()) {
+      return;
+    }
+
+    // rel is the root of a subtree with no inward edges.
+    RelNode rel = vertex.getCurrentRel();
+    notifyDiscard(rel);
+
+    Set<HepRelVertex> outVertices = new LinkedHashSet<>();
+    List<DefaultEdge> outEdges = graph.getOutwardEdges(vertex);
+    for (DefaultEdge outEdge : outEdges) {
+      outVertices.add((HepRelVertex) outEdge.target);
+    }
+
+    for (HepRelVertex child : outVertices) {
+      graph.removeEdge(vertex, child);
+    }
+    assert graph.getInwardEdges(vertex).isEmpty();
+    assert graph.getOutwardEdges(vertex).isEmpty();
+    graph.vertexSet().remove(vertex);
+    mapDigestToVertex.remove(rel.getRelDigest());
+
+    for (HepRelVertex child : outVertices) {
+      tryCleanVertices(child);
+    }
+    clearCache(vertex);
+
+    if (enableFiredRulesCache) {
+      for (ImmutableIntList relIds : firedRulesCacheIndex.get(rel.getId())) {
+        firedRulesCache.removeAll(relIds);
+      }
+    }
+  }
+
+  private void collectGarbage(final Set<HepRelVertex> garbageVertexSet) {
+    for (HepRelVertex vertex : garbageVertexSet) {
+      tryCleanVertices(vertex);
+    }
+
+    if (LOGGER.isTraceEnabled()) {
+      int currentGraphSize = graph.vertexSet().size();
+      collectGarbage();
+      int currentGraphSize2 = graph.vertexSet().size();
+      if (currentGraphSize != currentGraphSize2) {
+        throw new AssertionError("Graph size changed after garbage 
collection");
+      }
+    }
+  }
+
   private void collectGarbage() {
     if (nTransformations == nTransformationsLastGC) {
       // No modifications have taken place since the last gc,
@@ -1040,7 +1230,7 @@ private void collectGarbage() {
 
     if (enableFiredRulesCache) {
       sweepSet.forEach(rel -> {
-        for (List<Integer> relIds : 
firedRulesCacheIndex.get(rel.getCurrentRel().getId())) {
+        for (ImmutableIntList relIds : 
firedRulesCacheIndex.get(rel.getCurrentRel().getId())) {
           firedRulesCache.removeAll(relIds);
         }
         firedRulesCacheIndex.removeAll(rel.getCurrentRel().getId());
@@ -1061,12 +1251,48 @@ private void assertNoCycles() {
         + cyclicVertices);
   }
 
+  private void assertGraphConsistent() {
+    int liveNum = 0;
+    for (HepRelVertex vertex : BreadthFirstIterator.of(graph, 
requireNonNull(root, "root"))) {
+      if (graph.getOutwardEdges(vertex).size()
+          != Sets.newHashSet(requireNonNull(vertex, 
"vertex").getCurrentRel().getInputs()).size()) {
+        throw new AssertionError("HepPlanner:outward edge num is different "
+            + "from input node num, " + vertex);
+      }
+      for (DefaultEdge edge : graph.getInwardEdges(vertex)) {
+        if (!((HepRelVertex) 
edge.source).getCurrentRel().getInputs().contains(vertex)) {
+          throw new AssertionError("HepPlanner:inward edge target is not in 
input node list, "
+              + vertex);
+        }
+      }
+      liveNum++;
+    }
+
+    Set<RelNode> validSet = new HashSet<>();
+    Deque<RelNode> nodes = new ArrayDeque<>();
+    nodes.push(requireNonNull(requireNonNull(root, "root").getCurrentRel()));
+    while (!nodes.isEmpty()) {
+      RelNode node = nodes.pop();
+      validSet.add(node);
+      for (RelNode input : node.getInputs()) {
+        nodes.push(((HepRelVertex) input).getCurrentRel());
+      }
+    }
+
+    if (liveNum == validSet.size()) {
+      return;
+    }
+    throw new AssertionError("HepPlanner:Query graph live node num is 
different from root"
+        + " input valid node num, liveNodeNum: " + liveNum + ", validNodeNum: 
" + validSet.size());
+  }
+
   private void dumpGraph() {
     if (!LOGGER.isTraceEnabled()) {
       return;
     }
 
     assertNoCycles();
+    assertGraphConsistent();
 
     HepRelVertex root = this.root;
     if (root == null) {
diff --git 
a/core/src/main/java/org/apache/calcite/plan/hep/HepVertexIterator.java 
b/core/src/main/java/org/apache/calcite/plan/hep/HepVertexIterator.java
new file mode 100644
index 0000000000..7831d845d6
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/plan/hep/HepVertexIterator.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan.hep;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.SingleRel;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+/**
+ * Iterates over the vertices in a HepVertex graph in depth-first order.
+ * In a HepVertex graph, every HepVertex.getCurrentRel().getInputs() is a
+ * List&lt;HepRelVertex&gt;.
+ *
+ * @param <V> Vertex type
+ */
+public class HepVertexIterator<V extends HepRelVertex>
+    implements Iterator<V> {
+  private final Deque<V> deque = new ArrayDeque<>();
+  private final Set<Integer> visitedSet;
+
+  private HepVertexIterator(V root, Set<Integer> visitedSet) {
+    this.deque.push(root);
+    this.visitedSet = visitedSet;
+  }
+
+  /**
+   * Creates a HepVertexIterator for a given HepVertex root.
+   *
+   * @param root Root of iteration.
+   * @param visitedSet Set of HepVertex IDs to exclude from iteration; next() 
will add more
+   *                   items to it.
+   */
+  protected static <V extends HepRelVertex> Iterable<V> of(
+      final V root, final Set<Integer> visitedSet) {
+    return () -> new HepVertexIterator<>(root, visitedSet);
+  }
+
+  public Iterator<V> continueFrom(V newVertex) {
+    this.deque.push(newVertex);
+    return this;
+  }
+
+  @Override public boolean hasNext() {
+    return !deque.isEmpty();
+  }
+
+  @Override public V next() {
+    if (!hasNext()) {
+      throw new NoSuchElementException();
+    }
+    V v = deque.pop();
+
+    RelNode current = v.getCurrentRel();
+    if (current instanceof SingleRel) {
+      @SuppressWarnings("unchecked") V target = (V) ((SingleRel) 
current).getInput();
+      if (visitedSet.add(target.getId())) {
+        deque.push(target);
+      }
+    } else {
+      for (RelNode input : current.getInputs()) {
+        @SuppressWarnings("unchecked") V target = (V) input;
+        if (visitedSet.add(target.getId())) {
+          deque.push(target);
+        }
+      }
+    }
+    return v;
+  }
+
+  @Override public void remove() {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git a/core/src/test/java/org/apache/calcite/test/HepPlannerTest.java 
b/core/src/test/java/org/apache/calcite/test/HepPlannerTest.java
index 9af5e7fe83..1729817e36 100644
--- a/core/src/test/java/org/apache/calcite/test/HepPlannerTest.java
+++ b/core/src/test/java/org/apache/calcite/test/HepPlannerTest.java
@@ -16,8 +16,10 @@
  */
 package org.apache.calcite.test;
 
+import org.apache.calcite.config.CalciteSystemProperty;
 import org.apache.calcite.plan.RelOptListener;
 import org.apache.calcite.plan.RelOptMaterialization;
+import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.plan.hep.HepMatchOrder;
 import org.apache.calcite.plan.hep.HepPlanner;
 import org.apache.calcite.plan.hep.HepProgram;
@@ -30,6 +32,7 @@
 import org.apache.calcite.rel.rules.CoerceInputsRule;
 import org.apache.calcite.rel.rules.CoreRules;
 import org.apache.calcite.sql.SqlExplainLevel;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.calcite.tools.RelBuilder;
 
 import com.google.common.collect.ImmutableList;
@@ -366,8 +369,11 @@ private void assertIncludesExactlyOnce(String message, 
String digest,
   }
 
   @Test void testRuleApplyCount() {
+    final boolean largePlanMode =
+        CalciteSystemProperty.HEP_PLANNER_LARGE_PLAN_MODE.value();
+
     long applyTimes = checkRuleApplyCount(HepMatchOrder.ARBITRARY, false);
-    assertThat(applyTimes, is(316L));
+    assertThat(applyTimes, is(largePlanMode ? 87L : 316L));
 
     applyTimes = checkRuleApplyCount(HepMatchOrder.DEPTH_FIRST, false);
     assertThat(applyTimes, is(87L));
@@ -391,6 +397,22 @@ private void assertIncludesExactlyOnce(String message, 
String digest,
     assertThat(applyTimes, is(65L));
   }
 
+  @Test void testOrderSensitivePrograms() {
+    diffRepos = DiffRepository.lookup(HepPlannerTest.class);
+
+    final String topDownPlan =
+        runUnion(HepMatchOrder.TOP_DOWN, 1, false);
+    final String bottomUpPlan =
+        runUnion(HepMatchOrder.BOTTOM_UP, 1, false);
+
+    assertThat(topDownPlan.equals(bottomUpPlan), is(false));
+
+    final String legacyPlan = runToCalc(false);
+    final String largePlanModePlan = runToCalc(true);
+
+    assertThat(largePlanModePlan.equals(legacyPlan), is(false));
+  }
+
   @Test void testMaterialization() {
     HepPlanner planner = new HepPlanner(HepProgram.builder().build());
     RelNode tableRel = sql("select * from dept").toRel();
@@ -420,6 +442,78 @@ private long checkRuleApplyCount(HepMatchOrder matchOrder, 
boolean enableFiredRu
     return listener.getApplyTimes();
   }
 
+  private String runUnion(HepMatchOrder matchOrder, int matchLimit,
+      boolean largePlanMode) {
+    HepProgram program = HepProgram.builder()
+        .addMatchOrder(matchOrder)
+        .addMatchLimit(matchLimit)
+        .addRuleInstance(CoreRules.UNION_TO_DISTINCT)
+        .build();
+    HepPlanner planner = new HepPlanner(program);
+    planner.setLargePlanMode(largePlanMode);
+    planner.setRoot(unionPlan());
+    return RelOptUtil.toString(planner.findBestExp());
+  }
+
+  private String runToCalc(boolean largePlanMode) {
+    HepProgram program = HepProgram.builder()
+        .addMatchLimit(1)
+        .addRuleInstance(CoreRules.PROJECT_TO_CALC)
+        .build();
+    HepPlanner planner = new HepPlanner(program);
+    planner.setLargePlanMode(largePlanMode);
+    planner.setRoot(toCalcPlan());
+    return RelOptUtil.toString(planner.findBestExp());
+  }
+
+  private RelNode unionPlan() {
+    final RelBuilder builder = RelBuilderTest.createBuilder();
+    final RelNode dept =
+        builder.scan("DEPT")
+            .project(builder.field("DNAME"))
+            .build();
+    final RelNode emp =
+        builder.scan("EMP")
+            .project(builder.field("ENAME"))
+            .build();
+    final RelNode bonus =
+        builder.scan("BONUS")
+            .project(builder.field("ENAME"))
+            .build();
+    final RelNode left =
+        builder.push(dept)
+            .push(emp)
+            .union(false)
+            .build();
+    return builder.push(left)
+        .push(bonus)
+        .union(false)
+        .build();
+  }
+
+  private RelNode toCalcPlan() {
+    final RelBuilder builder = RelBuilderTest.createBuilder();
+    final RelNode scan = builder.scan("EMP").build();
+    final RelNode upper =
+        builder.push(scan)
+            .project(
+                builder.alias(
+                    builder.call(SqlStdOperatorTable.UPPER, 
builder.field("ENAME")),
+                    "EXPR$0"))
+            .build();
+    final RelNode lower =
+        builder.push(scan)
+            .project(
+                builder.alias(
+                    builder.call(SqlStdOperatorTable.LOWER, 
builder.field("ENAME")),
+                    "EXPR$0"))
+            .build();
+    return builder.push(upper)
+        .push(lower)
+        .union(true)
+        .build();
+  }
+
   /** Listener for HepPlannerTest; counts how many times rules fire. */
   private static class HepTestListener implements RelOptListener {
     private long applyTimes;
diff --git 
a/ubenchmark/src/jmh/java/org/apache/calcite/benchmarks/LargePlanBenchmark.java 
b/ubenchmark/src/jmh/java/org/apache/calcite/benchmarks/LargePlanBenchmark.java
index 6d1cab3413..91455da172 100644
--- 
a/ubenchmark/src/jmh/java/org/apache/calcite/benchmarks/LargePlanBenchmark.java
+++ 
b/ubenchmark/src/jmh/java/org/apache/calcite/benchmarks/LargePlanBenchmark.java
@@ -26,10 +26,10 @@
 import org.apache.calcite.rel.type.RelDataTypeFactory;
 import org.apache.calcite.schema.SchemaPlus;
 import org.apache.calcite.schema.impl.AbstractTable;
-import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.tools.Frameworks;
 import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.Pair;
 
 import com.google.common.collect.ImmutableList;
 
@@ -46,11 +46,13 @@
 import org.openjdk.jmh.annotations.State;
 import org.openjdk.jmh.annotations.Threads;
 import org.openjdk.jmh.annotations.Warmup;
-import org.openjdk.jmh.runner.Runner;
 import org.openjdk.jmh.runner.RunnerException;
-import org.openjdk.jmh.runner.options.Options;
-import org.openjdk.jmh.runner.options.OptionsBuilder;
 
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -65,19 +67,37 @@
  *  are repeatedly applicable.
  */
 
-@Fork(value = 1, jvmArgsPrepend = {"-Xss200m"})
-@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
-@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS)
+@Fork(value = 1, jvmArgsPrepend = {"-Xss200m",
+    "-Dcalcite.disable.generate.type.digest.string=true"})
+@Measurement(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS)
+@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS)
 @BenchmarkMode(Mode.AverageTime)
 @OutputTimeUnit(TimeUnit.MILLISECONDS)
 @State(Scope.Thread)
 @Threads(1)
 public class LargePlanBenchmark {
 
-  @Param({"100", "1000", "5000", "10000"})
+  @Param({"100", "1000", "10000", "100000"})
   int unionNum;
 
-  private RelBuilder builder;
+  // For large plans, "DEPTH_FIRST", "BOTTOM_UP", and "TOP_DOWN" are slower 
than ARBITRARY
+  @Param({"ARBITRARY"})
+  String matchOrder;
+
+  // Enable validation mode to verify rule application counts across different 
orders.
+  boolean enableValidation = false;
+
+  boolean isLargePlanMode = true; // false is very slow in 10000 unions
+  boolean isEnableFiredRulesCache = true;
+  private static RelBuilder builder;
+
+  // All available match orders for validation
+  private static final String[] ALL_MATCH_ORDERS = {
+      "ARBITRARY", "DEPTH_FIRST", "BOTTOM_UP", "TOP_DOWN"
+  };
+
+  // Validation sizes: 1, 10, 100, 1000
+  private static final int[] VALIDATION_SIZES = {1, 10, 100, 1000};
 
   @Setup(Level.Trial)
   public void setup() {
@@ -112,9 +132,9 @@ private RelNode makeSelectBranch(int i) {
         .filter(
             builder.and(
                 builder.equals(builder.field("EMPNO"), builder.literal(i)),
-                builder.call(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL,
+                builder.greaterThanOrEqual(
                     builder.field("MGR"), builder.literal(0)),
-                builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
+                builder.lessThanOrEqual(
                     builder.field("MGR"), builder.literal(0)),
                 builder.equals(builder.field("ENAME"), builder.literal("Y")),
                 builder.equals(builder.field("SAL"), builder.literal(i))
@@ -144,41 +164,420 @@ private RelNode makeUnionTree(int unionNum) {
 
   @Benchmark
   public void testLargeUnionPlan() {
+    testLargeUnionPlan(unionNum, matchOrder, false);
+  }
+
+  /**
+   * Executes the optimization with the given parameters.
+   *
+   * @param unionNum number of union branches
+   * @param matchOrder the match order to use
+   * @param collectStats whether to collect and return rule statistics (for 
validation)
+   * @return rule statistics map if collectStats is true, otherwise empty map
+   */
+  public Map<String, Map<String, Pair<Long, Long>>> testLargeUnionPlan(
+      int unionNum, String matchOrder, boolean collectStats) {
+
     RelNode root = makeUnionTree(unionNum);
+    HepMatchOrder hepMatchOrder = HepMatchOrder.valueOf(matchOrder);
 
     HepProgram filterReduce = HepProgram.builder()
-        .addMatchOrder(HepMatchOrder.DEPTH_FIRST)
+        .addMatchOrder(hepMatchOrder)
         .addRuleInstance(CoreRules.FILTER_REDUCE_EXPRESSIONS)
         .build();
 
     HepProgram projectReduce = HepProgram.builder()
-        .addMatchOrder(HepMatchOrder.DEPTH_FIRST)
+        .addMatchOrder(hepMatchOrder)
         .addRuleInstance(CoreRules.PROJECT_REDUCE_EXPRESSIONS)
         .build();
 
-    // Phrase 1
-    HepPlanner planner = new HepPlanner(filterReduce);
-    planner.setRoot(root);
-    root = planner.findBestExp();
-    planner.clear();
-
-    // ... do some things cannot be done in planner.findBestExp() ...
-    // Phrase 2
-    planner = new HepPlanner(projectReduce);
-    planner.setRoot(root);
-    root = planner.findBestExp();
-    planner.clear();
-
-    // TODO LATER large plan optimization
-    // TODO LATER set "-Dcalcite.disable.generate.type.digest.string=true"
+    Map<String, Map<String, Pair<Long, Long>>> stats = new HashMap<>();
+
+    if (!isLargePlanMode) {
+      // Phase 1
+      HepPlanner planner = new HepPlanner(filterReduce);
+      if (collectStats) {
+        planner.enableRuleAttemptsTracking();
+      }
+      planner.setRoot(root);
+      planner.setEnableFiredRulesCache(isEnableFiredRulesCache);
+      root = planner.findBestExp();
+      if (collectStats) {
+        stats.put("FILTER", snapshotRuleAttempts(planner));
+      }
+
+      // Phase 2
+      planner = new HepPlanner(projectReduce);
+      if (collectStats) {
+        planner.enableRuleAttemptsTracking();
+      }
+      planner.setEnableFiredRulesCache(isEnableFiredRulesCache);
+      planner.setRoot(root);
+      root = planner.findBestExp();
+      if (collectStats) {
+        stats.put("PROJECT", snapshotRuleAttempts(planner));
+      }
+    } else {
+      HepPlanner planner = new HepPlanner();
+      planner.setEnableFiredRulesCache(isEnableFiredRulesCache);
+      planner.setLargePlanMode(isLargePlanMode);
+      if (collectStats) {
+        planner.enableRuleAttemptsTracking();
+      }
+      planner.setRoot(root);
+
+      // Phase 1: Execute FILTER_REDUCE_EXPRESSIONS
+      Map<String, Pair<Long, Long>> beforeFilter =
+          collectStats ? snapshotRuleAttempts(planner) : null;
+      planner.executeProgram(filterReduce);
+      if (collectStats) {
+        stats.put("FILTER",
+            subtractRuleAttempts(snapshotRuleAttempts(planner), beforeFilter));
+      }
+      planner.clearRules();
+
+      // Phase 2: Execute PROJECT_REDUCE_EXPRESSIONS
+      Map<String, Pair<Long, Long>> beforeProject =
+          collectStats ? snapshotRuleAttempts(planner) : null;
+      planner.executeProgram(projectReduce);
+      if (collectStats) {
+        stats.put("PROJECT",
+            subtractRuleAttempts(snapshotRuleAttempts(planner), 
beforeProject));
+      }
+      planner.clearRules();
+
+      root = planner.buildFinalPlan();
+    }
+
+    return stats;
+  }
+
+  /**
+   * Returns a string repeated n times (Java 8 compatible).
+   */
+  private static String repeat(String str, int count) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < count; i++) {
+      sb.append(str);
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Runs validation mode to verify that different match orders produce
+   * the same rule application counts.
+   */
+  public void runValidation() {
+    this.enableValidation = true;
+    System.out.println("\n"
+        + repeat("=", 80));
+    System.out.println("VALIDATION MODE: Verifying rule application counts 
across match orders");
+    System.out.println(repeat("=", 80) + "\n");
+
+    Map<String, Map<Integer, Map<String, Map<String, Pair<Long, Long>>>>> 
allStats =
+        new HashMap<>();
+
+    for (String order : ALL_MATCH_ORDERS) {
+      System.out.println("Testing match order: " + order);
+      Map<Integer, Map<String, Map<String, Pair<Long, Long>>>> orderStats = 
new HashMap<>();
+
+      for (int size : VALIDATION_SIZES) {
+        System.out.println("  Size: " + size);
+        Map<String, Map<String, Pair<Long, Long>>> stats =
+            testLargeUnionPlan(size, order, true);
+        orderStats.put(size, stats);
+      }
+
+      allStats.put(order, orderStats);
+      System.out.println();
+    }
+
+    System.out.println("\n"
+        + repeat("-", 80));
+    System.out.println("VALIDATION RESULTS");
+    System.out.println(repeat("-", 80) + "\n");
+
+    boolean allPassed = true;
+    Map<Integer, Map<String, Map<String, Pair<Long, Long>>>> baselineStats =
+        allStats.get("ARBITRARY");
+
+    for (String order : ALL_MATCH_ORDERS) {
+      if (order.equals("ARBITRARY")) {
+        continue;
+      }
+
+      System.out.println("Comparing " + order + " against ARBITRARY:");
+      Map<Integer, Map<String, Map<String, Pair<Long, Long>>>> orderStats =
+          allStats.get(order);
+
+      for (int size : VALIDATION_SIZES) {
+        boolean sizePassed =
+            validateSizeStats(order, size, baselineStats.get(size), 
orderStats.get(size));
+        if (!sizePassed) {
+          allPassed = false;
+        }
+      }
+      System.out.println();
+    }
+
+    System.out.println("\n"
+        + repeat("=", 80));
+    if (allPassed) {
+      System.out.println("VALIDATION PASSED: All match orders produce "
+          + "consistent rule application counts");
+    } else {
+      System.out.println("VALIDATION FAILED: Some match orders have "
+          + "inconsistent rule application counts");
+    }
+    System.out.println(repeat("=", 80) + "\n");
+  }
+
+  /**
+   * Validates that the rule statistics for a specific size match between
+   * the baseline (ARBITRARY) and the test order.
+   */
+  private boolean validateSizeStats(String order, int size,
+      Map<String, Map<String, Pair<Long, Long>>> baseline,
+      Map<String, Map<String, Pair<Long, Long>>> test) {
+
+    boolean passed = true;
+    StringBuilder sb = new StringBuilder();
+    sb.append(String.format(Locale.ROOT, "  Size %4d: ", size));
+
+    Map<String, Pair<Long, Long>> baselineFilter = baseline.get("FILTER");
+    Map<String, Pair<Long, Long>> testFilter = test.get("FILTER");
+    if (!comparePhaseStats("FILTER", baselineFilter, testFilter, sb)) {
+      passed = false;
+    }
+
+    Map<String, Pair<Long, Long>> baselineProject = baseline.get("PROJECT");
+    Map<String, Pair<Long, Long>> testProject = test.get("PROJECT");
+    if (!comparePhaseStats("PROJECT", baselineProject, testProject, sb)) {
+      passed = false;
+    }
+    if (passed) {
+      sb.append("PASSED");
+    } else {
+      sb.append("FAILED");
+    }
+
+    System.out.println(sb.toString());
+    return passed;
+  }
+
+  private boolean comparePhaseStats(String phase,
+      Map<String, Pair<Long, Long>> baseline,
+      Map<String, Pair<Long, Long>> test,
+      StringBuilder sb) {
+
+    if (baseline == null && test == null) {
+      return true;
+    }
+    if (baseline == null || test == null) {
+      sb.append(phase).append("(null mismatch) ");
+      return false;
+    }
+
+    boolean passed = true;
+    for (String rule : baseline.keySet()) {
+      Pair<Long, Long> baselineCount = baseline.get(rule);
+      Pair<Long, Long> testCount = test.get(rule);
+
+      if (testCount == null) {
+        sb.append(phase).append("/").append(rule).append("(missing) ");
+        passed = false;
+        continue;
+      }
+
+      if (!baselineCount.left.equals(testCount.left)) {
+        sb.append(
+            String.format(Locale.ROOT, "%s/%s(%d vs %d) ",
+                phase, rule, baselineCount.left, testCount.left));
+        passed = false;
+      }
+    }
+
+    return passed;
+  }
+
+  private static Map<String, Pair<Long, Long>> snapshotRuleAttempts(HepPlanner 
planner) {
+    return new HashMap<>(planner.getRuleAttemptsInfo());
+  }
+
+  private static Map<String, Pair<Long, Long>> subtractRuleAttempts(
+      Map<String, Pair<Long, Long>> current,
+      Map<String, Pair<Long, Long>> previous) {
+    Map<String, Pair<Long, Long>> delta = new HashMap<>();
+    for (Map.Entry<String, Pair<Long, Long>> entry : current.entrySet()) {
+      Pair<Long, Long> oldValue = previous.get(entry.getKey());
+      long oldAttempts = oldValue == null ? 0L : oldValue.left;
+      long oldTime = oldValue == null ? 0L : oldValue.right;
+      long deltaAttempts = entry.getValue().left - oldAttempts;
+      long deltaTime = entry.getValue().right - oldTime;
+      if (deltaAttempts != 0L || deltaTime != 0L) {
+        delta.put(entry.getKey(), Pair.of(deltaAttempts, deltaTime));
+      }
+    }
+    return delta;
+  }
+
+  /**
+   * Runs benchmark mode for performance testing.
+   * Tests different union sizes based on the match order's scalability.
+   */
+  public void runBenchmark() {
+    System.out.println("\n"
+        + repeat("=", 80));
+    System.out.println("BENCHMARK MODE: Performance testing");
+    System.out.println(repeat("=", 80) + "\n");
+
+    // Define size ranges for each match order
+    // ARBITRARY: 1K, 3K, 10K, 30K, 100K, 300K (best scalability)
+    // DEPTH_FIRST: 1K, 3K, 10K, 30K (good scalability)
+    // BOTTOM_UP/TOP_DOWN: 1K, 3K (limited scalability)
+    Map<String, int[]> orderSizes = new HashMap<>();
+    orderSizes.put("ARBITRARY", new int[]{1000, 3000, 10000, 30000});
+    orderSizes.put("DEPTH_FIRST", new int[]{1000, 3000, 10000, 30000});
+    orderSizes.put("BOTTOM_UP", new int[]{1000, 3000});
+    orderSizes.put("TOP_DOWN", new int[]{1000, 3000});
+
+    List<String> results = new ArrayList<>();
+    results.add(
+        String.format(Locale.ROOT,
+            "%-15s %-10s %-15s %-15s %-15s",
+            "Match Order", "Union Num", "Node Count", "Rule Attempts", "Time 
(ms)"));
+    results.add(repeat("-", 85));
+
+    for (String order : ALL_MATCH_ORDERS) {
+      System.out.println("Testing match order: " + order);
+      int[] sizes = orderSizes.get(order);
+
+      for (int size : sizes) {
+        int nodeCount = 4 * size + 3;
+
+        // Warmup
+        testLargeUnionPlan(size, order, false);
+
+        // Actual measurement with rule stats collection
+        long startTime = System.currentTimeMillis();
+        try {
+          Map<String, Map<String, Pair<Long, Long>>> stats =
+              testLargeUnionPlan(size, order, true);
+          long endTime = System.currentTimeMillis();
+          long elapsed = endTime - startTime;
+
+          long totalRuleAttempts = 0;
+          for (Map<String, Pair<Long, Long>> phaseStats : stats.values()) {
+            for (Pair<Long, Long> ruleStat : phaseStats.values()) {
+              totalRuleAttempts += ruleStat.left;
+            }
+          }
+
+          results.add(
+              String.format(Locale.ROOT,
+                  "%-15s %-10d %-15d %-15d %-15d",
+                  order, size, nodeCount, totalRuleAttempts, elapsed));
+          System.out.println("  Size " + size + ": " + elapsed + " ms, "
+              + "nodes=" + nodeCount + ", ruleAttempts=" + totalRuleAttempts);
+        } catch (Exception e) {
+          results.add(
+              String.format(Locale.ROOT,
+                  "%-15s %-10d %-15s %-15s %-15s",
+                  order, size, "N/A", "N/A", "N/A"));
+          System.out.println("  Size " + size + ": FAILED - " + 
e.getMessage());
+        }
+      }
+      System.out.println();
+    }
+
+    // Print summary report
+    System.out.println("\n"
+        + repeat("=", 85));
+    System.out.println("BENCHMARK REPORT");
+    System.out.println(repeat("=", 85));
+    for (String line : results) {
+      System.out.println(line);
+    }
+    System.out.println(repeat("=", 85) + "\n");
   }
 
   public static void main(String[] args) throws RunnerException {
-    Options opt = new OptionsBuilder()
-        .include(LargePlanBenchmark.class.getSimpleName())
-        .detectJvmArgs()
-        .build();
+    LargePlanBenchmark benchmark = new LargePlanBenchmark();
+    benchmark.setup();
+    benchmark.isLargePlanMode = true;
+    benchmark.isEnableFiredRulesCache = true;
+
+    // Check command line arguments for mode selection
+    boolean runValidation = false;
+    boolean runBenchmark = false;
+    boolean runProfile = false;
+
+    for (String arg : args) {
+      if ("--validate".equals(arg) || "-v".equals(arg)) {
+        runValidation = true;
+      } else if ("--benchmark".equals(arg) || "-b".equals(arg)) {
+        runBenchmark = true;
+      } else if ("--both".equals(arg)) {
+        runValidation = true;
+        runBenchmark = true;
+      } else if ("--profile".equals(arg) || "-p".equals(arg)) {
+        runProfile = true;
+      }
+    }
 
-    new Runner(opt).run();
+    // Profile mode takes precedence (specialized mode)
+    if (runProfile) {
+      benchmark.runProfileMode();
+      return;
+    }
+
+    // Default: run benchmark mode if no arguments specified
+    if (!runValidation && !runBenchmark) {
+      runBenchmark = true;
+    }
+
+    // Run validation first (if requested)
+    if (runValidation) {
+      benchmark.runValidation();
+    }
+
+    // Then run benchmark (if requested)
+    if (runBenchmark) {
+      benchmark.runBenchmark();
+    }
+  }
+
+  /**
+   * Runs profile mode for performance tuning with ARBITRARY order at 100K 
unions.
+   * This mode is optimized for generating perf/flame graph records.
+   */
+  public void runProfileMode() {
+    System.out.println("\n"
+        + repeat("=", 80));
+    System.out.println("PROFILE MODE: ARBITRARY order with 100000 unions");
+    System.out.println("Optimized for perf/flame graph recording");
+    System.out.println(repeat("=", 80) + "\n");
+
+    String order = "ARBITRARY";
+    int size = 100000;
+
+    long startTime = System.currentTimeMillis();
+    try {
+      testLargeUnionPlan(size, order, false);
+      long endTime = System.currentTimeMillis();
+      long elapsed = endTime - startTime;
+
+      System.out.println("\n"
+          + repeat("=", 80));
+      System.out.println("PROFILE RUN COMPLETED");
+      System.out.println(repeat("=", 80));
+      System.out.println("Match Order: " + order);
+      System.out.println("Union Size:  " + size);
+      System.out.println("Time:        " + elapsed + " ms (" + (elapsed / 
1000.0) + " s)");
+      System.out.println(repeat("=", 80) + "\n");
+    } catch (Exception e) {
+      System.err.println("Profile run failed: " + e.getMessage());
+      e.printStackTrace();
+    }
   }
 }

(calcite) branch main updated: [CALCITE-7422] Support large plan optimization mode for HepPlanner

Reply via email to