nsivarajan commented on code in PR #63067:
URL: https://github.com/apache/doris/pull/63067#discussion_r3290420879


##########
fe/fe-core/src/main/java/org/apache/doris/statistics/query/QueryStatsRecorder.java:
##########
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics.query;
+
+import org.apache.doris.analysis.StatementBase;
+import org.apache.doris.catalog.DatabaseIf;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.common.Config;
+import org.apache.doris.nereids.StatementContext;
+import org.apache.doris.nereids.glue.LogicalPlanAdapter;
+import org.apache.doris.nereids.trees.expressions.ExprId;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.commands.Command;
+import org.apache.doris.nereids.trees.plans.physical.PhysicalFilter;
+import 
org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeOlapScan;
+import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan;
+import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan;
+import org.apache.doris.qe.ConnectContext;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Records column-level query-hit and filter-hit statistics from the Nereids 
physical plan.
+ * Called once per query in NereidsPlanner after plan translation.
+ *
+ * <p>Scope (Part 1):
+ * <ul>
+ *   <li>queryHit: base SELECT columns whose ExprId flows straight through to 
the root
+ *       plan's output without rewriting. Columns hidden by an alias, an 
expression,
+ *       or an aggregate function are NOT recorded yet (Part 2).</li>
+ *   <li>filterHit: columns referenced in WHERE predicate conjuncts.</li>
+ *   <li>Only OlapTable scans are recorded; external tables (Hive, Iceberg, 
JDBC, …) are not.</li>
+ *   <li>DML, EXPLAIN, and internal queries (e.g. auto-analyze) are 
skipped.</li>
+ *   <li>Per query, each table's count is incremented at most once regardless 
of scan count.</li>
+ * </ul>
+ * GROUP BY, ORDER BY, window, JOIN, and aliased/projected columns are 
deferred to Part 2.
+ */
+public class QueryStatsRecorder {
+    private static final Logger LOG = 
LogManager.getLogger(QueryStatsRecorder.class);
+
+    private QueryStatsRecorder() {}
+
+    public static void record(PhysicalPlan plan, StatementContext stmtContext) 
{
+        if (!shouldRecord(stmtContext)) {
+            return;
+        }
+        if (stmtContext.isQueryStatsRecorded()) {
+            return;
+        }
+        // Set the latch before the work so a partial-failure retry does not 
double-count.
+        stmtContext.markQueryStatsRecorded();
+        try {
+            Map<String, StatsDelta> deltas = collectDeltas(plan);
+            for (StatsDelta delta : deltas.values()) {
+                if (!delta.empty()) {
+                    try {
+                        Env.getCurrentEnv().getQueryStats().addStats(delta);
+                    } catch (Exception e) {
+                        ConnectContext cc = stmtContext.getConnectContext();
+                        String queryId = (cc != null && cc.queryId() != null)
+                                ? cc.queryId().toString() : "unknown";
+                        LOG.warn("Failed to record query stats for query={}", 
queryId, e);
+                    }
+                }
+            }
+        } catch (Exception e) {
+            ConnectContext cc = stmtContext.getConnectContext();
+            String queryId = (cc != null && cc.queryId() != null)
+                    ? cc.queryId().toString() : "unknown";
+            LOG.warn("Failed to build query stats deltas for query={}", 
queryId, e);
+        }
+    }
+
+    /**
+     * Builds the per-table StatsDelta map from the physical plan.
+     * Package-private so unit tests can verify recording logic without 
touching Env.
+     */
+    static Map<String, StatsDelta> collectDeltas(PhysicalPlan plan) {
+        Map<ExprId, PhysicalOlapScan> exprIdToScan = new HashMap<>();
+        Map<String, StatsDelta> deltas = new HashMap<>();
+        walkPlan(plan, exprIdToScan, deltas);
+        if (exprIdToScan.isEmpty()) {
+            return deltas;
+        }
+        for (Slot slot : plan.getOutput()) {
+            if (!(slot instanceof SlotReference)) {
+                continue;
+            }
+            SlotReference sr = (SlotReference) slot;
+            PhysicalOlapScan sourceScan = exprIdToScan.get(sr.getExprId());
+            if (sourceScan == null) {
+                continue;
+            }
+            StatsDelta delta = getOrCreateDelta(deltas, sourceScan);
+            if (delta != null) {
+                sr.getOriginalColumn().ifPresent(col -> 
delta.addQueryStats(col.getName()));
+            }
+        }
+        return deltas;
+    }
+
+    // Package-private for testing.
+    static boolean shouldRecord(StatementContext ctx) {
+        if (!Config.enable_query_hit_stats) {
+            return false;
+        }
+        // Skip internal queries such as auto-analyze.
+        ConnectContext connectContext = ctx.getConnectContext();
+        if (connectContext != null && connectContext.getState().isInternal()) {
+            return false;
+        }
+        StatementBase stmt = ctx.getParsedStatement();
+        if (stmt == null || stmt.isExplain()) {
+            return false;
+        }
+        if (stmt instanceof LogicalPlanAdapter

Review Comment:
   Thanks @morrySnow for review, this is because I have a failure in P0 
regression of test suite, which is successful in Cloud_P0 tried moving record 
from statement to Plan. I've fixed it again in new commit. 
   
   but still failing on P0 regression , as it return query stats in double 
(expected 1, returned 2).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to