github-actions[bot] commented on code in PR #61566:
URL: https://github.com/apache/doris/pull/61566#discussion_r3008577505


##########
be/src/exprs/aggregate/aggregate_function_window_funnel_v2.h:
##########
@@ -0,0 +1,585 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+#include "common/cast_set.h"
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/column/column_string.h"
+#include "core/data_type/data_type_number.h"
+#include "core/types.h"
+#include "core/value/vdatetime_value.h"
+#include "exprs/aggregate/aggregate_function.h"
+#include "exprs/aggregate/aggregate_function_window_funnel.h" // for 
WindowFunnelMode, string_to_window_funnel_mode
+#include "util/var_int.h"
+
+namespace doris {
+#include "common/compile_check_begin.h"
+class Arena;
+class BufferReadable;
+class BufferWritable;
+class IColumn;
+} // namespace doris
+
+namespace doris {
+
+/// Merge two event lists, utilizing sorted flags to optimize.
+/// After merge, all events are in `events_list` and it is sorted.
+template <typename T>
+void merge_events_list(T& events_list, size_t prefix_size, bool prefix_sorted, 
bool suffix_sorted) {
+    if (!prefix_sorted && !suffix_sorted) {
+        std::stable_sort(std::begin(events_list), std::end(events_list));
+    } else {
+        const auto begin = std::begin(events_list);
+        const auto middle = std::next(begin, prefix_size);
+        const auto end = std::end(events_list);
+
+        if (!prefix_sorted) {
+            std::stable_sort(begin, middle);
+        }
+        if (!suffix_sorted) {
+            std::stable_sort(middle, end);
+        }
+        std::inplace_merge(begin, middle, end);
+    }
+}
+
+/// V2 state: stores only matched events as (timestamp, event_index) pairs.
+/// Compared to V1 which stores all rows with N boolean columns, V2 only stores
+/// events that actually match at least one condition, dramatically reducing 
memory.
+///
+/// To prevent same-row multi-condition chain advancement (where a single row
+/// matching multiple conditions could incorrectly advance the funnel through
+/// multiple levels), we use the high bit of event_idx as a "same-row 
continuation"
+/// flag. When a row matches multiple conditions, the first event stored for 
that
+/// row has bit 7 = 0, and subsequent events from the same row have bit 7 = 1.
+/// The algorithm uses this to ensure each funnel step comes from a different 
row.
+///
+/// This approach adds ZERO storage overhead — each event remains 9 bytes 
(UInt64 + UInt8).
+struct WindowFunnelStateV2 {
+    /// (timestamp_int_val, 1-based event_index with continuation flag in bit 
7)
+    ///
+    /// Bit layout of event_idx:
+    ///   bit 7 (0x80): continuation flag — 1 means this event is from the 
same row
+    ///                  as the preceding event in events_list
+    ///   bits 0-6:     actual 1-based event index (supports up to 127 
conditions)
+    ///
+    /// Sorted by timestamp only (via operator<). stable_sort preserves 
insertion order
+    /// for equal timestamps, so same-row events remain consecutive after 
sorting.
+    struct TimestampEvent {
+        UInt64 timestamp;
+        UInt8 event_idx; // includes continuation flag in bit 7
+
+        /// Sort by timestamp only. For same timestamp, stable_sort preserves 
insertion
+        /// order, keeping same-row events consecutive.
+        bool operator<(const TimestampEvent& other) const { return timestamp < 
other.timestamp; }
+        bool operator<=(const TimestampEvent& other) const { return timestamp 
<= other.timestamp; }
+    };
+
+    static constexpr UInt8 CONTINUATION_FLAG = 0x80;
+    static constexpr UInt8 EVENT_IDX_MASK = 0x7F;
+
+    /// Extract the actual 1-based event index (stripping continuation flag).
+    static int get_event_idx(UInt8 raw) { return (raw & EVENT_IDX_MASK); }
+    /// Check if this event is a continuation of the same row as the previous 
event.
+    static bool is_continuation(UInt8 raw) { return (raw & CONTINUATION_FLAG) 
!= 0; }
+
+    static constexpr int64_t WINDOW_UNSET = -1;
+
+    int event_count = 0;
+    int64_t window = WINDOW_UNSET;
+    WindowFunnelMode window_funnel_mode = WindowFunnelMode::INVALID;
+    bool sorted = true;
+    std::vector<TimestampEvent> events_list;
+
+    WindowFunnelStateV2() = default;
+    WindowFunnelStateV2(int arg_event_count) : event_count(arg_event_count) {}
+
+    void reset() {
+        events_list.clear();
+        sorted = true;
+    }
+
+    void add(const IColumn** arg_columns, ssize_t row_num, int64_t win, 
WindowFunnelMode mode) {
+        window = win;
+        window_funnel_mode = mode;
+
+        // get_data() returns DateV2Value<DateTimeV2ValueType>; convert to 
packed UInt64
+        auto timestamp = assert_cast<const 
ColumnVector<TYPE_DATETIMEV2>&>(*arg_columns[2])
+                                 .get_data()[row_num]
+                                 .to_date_int_val();
+
+        // Iterate from last event to first (reverse order).
+        // This ensures that after stable_sort, events with the same timestamp
+        // appear in descending event_index order, which is important for 
correct
+        // matching when one row satisfies multiple conditions.
+        //
+        // The first event stored for this row has continuation=0;
+        // subsequent events from the same row have continuation=1 (bit 7 set).
+        bool first_match = true;
+        for (int i = event_count - 1; i >= 0; --i) {
+            auto event_val =
+                    assert_cast<const ColumnUInt8&>(*arg_columns[3 + 
i]).get_data()[row_num];
+            if (event_val) {
+                UInt8 packed_idx = cast_set<UInt8>(i + 1);
+                if (!first_match) {
+                    packed_idx |= CONTINUATION_FLAG;
+                }
+                first_match = false;
+                TimestampEvent new_event {timestamp, packed_idx};
+                if (sorted && !events_list.empty()) {
+                    sorted = events_list.back() <= new_event;
+                }
+                events_list.emplace_back(new_event);
+            }
+        }
+    }
+
+    void sort() {
+        if (!sorted) {
+            std::stable_sort(std::begin(events_list), std::end(events_list));
+            sorted = true;
+        }
+    }
+
+    void merge(const WindowFunnelStateV2& other) {
+        if (other.events_list.empty()) {
+            return;
+        }
+
+        if (events_list.empty()) {
+            events_list = other.events_list;
+            sorted = other.sorted;
+        } else {
+            const auto prefix_size = events_list.size();
+            events_list.insert(std::end(events_list), 
std::begin(other.events_list),
+                               std::end(other.events_list));
+            // Both stable_sort and inplace_merge preserve relative order of 
equal elements.
+            // Since same-row events have the same timestamp (and thus compare 
equal in
+            // the primary sort key), they remain consecutive after merge — 
preserving
+            // the validity of continuation flags.
+            merge_events_list(events_list, prefix_size, sorted, other.sorted);
+            sorted = true;
+        }
+
+        event_count = event_count > 0 ? event_count : other.event_count;
+        window = window != WINDOW_UNSET ? window : other.window;
+        window_funnel_mode = window_funnel_mode == WindowFunnelMode::INVALID
+                                     ? other.window_funnel_mode
+                                     : window_funnel_mode;
+    }
+
+    void write(BufferWritable& out) const {
+        write_var_int(event_count, out);
+        write_var_int(window, out);
+        
write_var_int(static_cast<std::underlying_type_t<WindowFunnelMode>>(window_funnel_mode),
+                      out);
+        write_var_int(sorted ? 1 : 0, out);
+        write_var_int(cast_set<Int64>(events_list.size()), out);
+        for (const auto& evt : events_list) {
+            // Use fixed-size binary write for timestamp (8 bytes) and 
event_idx (1 byte).
+            // The event_idx byte includes the continuation flag in bit 7.
+            out.write(reinterpret_cast<const char*>(&evt.timestamp), 
sizeof(evt.timestamp));
+            out.write(reinterpret_cast<const char*>(&evt.event_idx), 
sizeof(evt.event_idx));
+        }
+    }
+
+    void read(BufferReadable& in) {
+        Int64 tmp = 0;
+        read_var_int(tmp, in);
+        event_count = cast_set<int>(tmp);
+
+        read_var_int(window, in);
+
+        read_var_int(tmp, in);
+        window_funnel_mode = static_cast<WindowFunnelMode>(tmp);
+
+        read_var_int(tmp, in);
+        sorted = (tmp != 0);
+
+        Int64 size = 0;
+        read_var_int(size, in);
+        events_list.clear();
+        events_list.resize(size);
+        for (Int64 i = 0; i < size; ++i) {
+            in.read(reinterpret_cast<char*>(&events_list[i].timestamp),
+                    sizeof(events_list[i].timestamp));
+            in.read(reinterpret_cast<char*>(&events_list[i].event_idx),
+                    sizeof(events_list[i].event_idx));
+        }
+    }
+
+    using DateValueType = DateV2Value<DateTimeV2ValueType>;
+
+    /// Reconstruct DateV2Value from packed UInt64.
+    static DateValueType _ts_from_int(UInt64 packed) { return 
DateValueType(packed); }
+
+    /// Check if `current_ts` is within `window` seconds of `base_ts`.
+    /// Both are packed UInt64 from DateV2Value::to_date_int_val().
+    bool _within_window(UInt64 base_ts, UInt64 current_ts) const {
+        DateValueType end_ts = _ts_from_int(base_ts);
+        TimeInterval interval(SECOND, window, false);
+        end_ts.template date_add_interval<SECOND>(interval);
+        return current_ts <= end_ts.to_date_int_val();
+    }
+
+    /// Track (first_timestamp, last_timestamp, last_event_list_idx) for each 
event level.
+    /// Uses packed UInt64 values; 0 means unset for first_ts.
+    /// last_list_idx tracks the position in events_list of the event that set 
this level,
+    /// used to check continuation flag on subsequent events to detect 
same-row advancement.
+    struct TimestampPair {
+        UInt64 first_ts = 0;
+        UInt64 last_ts = 0;
+        size_t last_list_idx = 0;
+        bool has_value() const { return first_ts != 0; }
+        void reset() {
+            first_ts = 0;
+            last_ts = 0;
+            last_list_idx = 0;
+        }
+    };
+
+    int get() const {
+        if (event_count == 0 || events_list.empty()) {
+            return 0;
+        }
+        if (window < 0) {
+            throw Exception(ErrorCode::INVALID_ARGUMENT,
+                            "the sliding time window must be a positive 
integer, but got: {}",
+                            window);
+        }
+
+        switch (window_funnel_mode) {
+        case WindowFunnelMode::DEFAULT:
+            return _get_default();
+        case WindowFunnelMode::INCREASE:
+            return _get_increase();
+        case WindowFunnelMode::DEDUPLICATION:
+            return _get_deduplication();
+        case WindowFunnelMode::FIXED:
+            return _get_fixed();
+        default:
+            throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid window_funnel 
mode");
+        }
+    }
+
+private:
+    /// DEFAULT mode: O(N) single-pass algorithm.
+    /// Uses events_timestamp array to track the (first, last) timestamps for 
each level.
+    /// For each event in sorted order:
+    ///   - If it's event 0, start a new potential chain
+    ///   - If its predecessor level has been matched, within time window, AND 
from a
+    ///     different row (checked via continuation flag), extend the chain
+    ///
+    /// In DEFAULT mode, unconditionally overwriting events_timestamp[0] when 
a new event-0
+    /// appears is safe: timestamps are monotonically non-decreasing, higher 
levels retain
+    /// the old chain's first_ts, and the <= window check still succeeds.
+    int _get_default() const {
+        std::vector<TimestampPair> events_timestamp(event_count);
+
+        for (size_t i = 0; i < events_list.size(); ++i) {
+            const auto& evt = events_list[i];
+            int event_idx = get_event_idx(evt.event_idx) - 1;
+
+            if (event_idx == 0) {
+                events_timestamp[0] = {evt.timestamp, evt.timestamp, i};
+            } else if (events_timestamp[event_idx - 1].has_value() &&
+                       !_is_same_row(events_timestamp[event_idx - 
1].last_list_idx, i)) {
+                // Must be from a DIFFERENT row than the predecessor level
+                if (_within_window(events_timestamp[event_idx - 1].first_ts, 
evt.timestamp)) {
+                    events_timestamp[event_idx] = {events_timestamp[event_idx 
- 1].first_ts,
+                                                   evt.timestamp, i};
+                    if (event_idx + 1 == event_count) {
+                        return event_count;
+                    }
+                }
+            }
+        }
+
+        for (int event = event_count; event > 0; --event) {
+            if (events_timestamp[event - 1].has_value()) {
+                return event;
+            }
+        }
+        return 0;
+    }
+
+    /// INCREASE mode: multi-pass algorithm matching V1 semantics.
+    ///
+    /// A single-pass approach cannot correctly handle INCREASE mode because 
when a new
+    /// event-0 appears, the old chain and the new chain have different 
trade-offs:
+    /// - The old chain has an earlier last_ts (better for the strict-increase 
check)
+    /// - The new chain has a later first_ts (better for the time-window check)
+    /// Neither dominates the other, so both must be tried independently.
+    ///
+    /// This method iterates over each event-0 occurrence as a potential chain 
start,
+    /// then scans forward to build the longest matching chain from that start.
+    /// The maximum chain length across all starts is returned.
+    int _get_increase() const {

Review Comment:
   **[Performance]** `_get_increase()` has O(N_matched × M_event0) worst-case 
complexity, where N_matched is total matched events and M_event0 is the count 
of event-0 occurrences. For a user with many event-0 matches (e.g., thousands 
of "view" events but few "click"/"purchase"), this degrades to O(N²).
   
   By comparison, V1's INCREASE mode is O(N × E) single-pass (where E = 
event_count, typically small), because it advances `start_row` past each 
event-0 match rather than re-scanning.
   
   In practice, V2 wins overall due to only storing matched events 
(dramatically fewer entries). But in pathological cases with many event-0 
matches per user group, this could be a bottleneck.
   
   Consider adding an optimization: if `max_level == event_count`, we can 
return early (already handled at line 374). Additionally, you could skip 
starting points where the remaining events can't possibly improve `max_level` 
(e.g., if only `event_count - max_level - 1` events remain). A more fundamental 
fix would maintain active chains in a single pass.



##########
regression-test/suites/nereids_p0/aggregate/window_funnel_v2.groovy:
##########
@@ -0,0 +1,492 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("window_funnel_v2") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+
+    // ==================== Basic DEFAULT mode tests ====================
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """
+    sql """
+        CREATE TABLE IF NOT EXISTS windowfunnel_v2_test (
+            xwho varchar(50) NULL COMMENT 'xwho',
+            xwhen datetime COMMENT 'xwhen',
+            xwhat int NULL COMMENT 'xwhat'
+        )
+        DUPLICATE KEY(xwho)
+        DISTRIBUTED BY HASH(xwho) BUCKETS 3
+        PROPERTIES (
+        "replication_num" = "1"
+        );
+    """
+    sql "INSERT into windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:41:00', 1)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 13:28:02', 2)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 16:15:01', 3)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 19:05:04', 4)"
+
+    // window=1 second, only event1 matches (events too far apart)
+    order_qt_v2_default_small_window """
+        select
+            window_funnel(
+                1,
+                'default',
+                t.xwhen,
+                t.xwhat = 1,
+                t.xwhat = 2
+            ) AS level
+        from windowfunnel_v2_test t;
+    """
+    // window=20000 seconds, both events match
+    order_qt_v2_default_large_window """
+        select
+            window_funnel(
+                20000,
+                'default',
+                t.xwhen,
+                t.xwhat = 1,
+                t.xwhat = 2
+            ) AS level
+        from windowfunnel_v2_test t;
+    """
+
+    // ==================== DateTimeV2 precision test ====================
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """
+    sql """
+        CREATE TABLE IF NOT EXISTS windowfunnel_v2_test (
+            xwho varchar(50) NULL COMMENT 'xwho',
+            xwhen datetimev2(3) COMMENT 'xwhen',
+            xwhat int NULL COMMENT 'xwhat'
+        )
+        DUPLICATE KEY(xwho)
+        DISTRIBUTED BY HASH(xwho) BUCKETS 3
+        PROPERTIES (
+        "replication_num" = "1"
+        );
+    """
+    sql "INSERT into windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:41:00.111111', 1)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 13:28:02.111111', 2)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 16:15:01.111111', 3)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 19:05:04.111111', 4)"
+
+    order_qt_v2_datetimev2_small """
+        select
+            window_funnel(
+                1,
+                'default',
+                t.xwhen,
+                t.xwhat = 1,
+                t.xwhat = 2
+            ) AS level
+        from windowfunnel_v2_test t;
+    """
+    order_qt_v2_datetimev2_large """
+        select
+            window_funnel(
+                20000,
+                'default',
+                t.xwhen,
+                t.xwhat = 1,
+                t.xwhat = 2
+            ) AS level
+        from windowfunnel_v2_test t;
+    """
+
+    // ==================== Multi-user default mode tests ====================
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """
+    sql """
+        CREATE TABLE windowfunnel_v2_test(
+            user_id BIGINT,
+            event_name VARCHAR(64),
+            event_timestamp datetime,
+            phone_brand varchar(64),
+            tab_num int
+        ) distributed by hash(user_id) buckets 3 
properties("replication_num"="1");
+    """
+    sql """
+        INSERT INTO windowfunnel_v2_test VALUES
+            (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+            (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+            (100123, '下单', '2022-05-14 10:04:00', "HONOR", 3),
+            (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+            (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+            (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+            (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+            (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+            (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+            (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+            (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+    """
+    // 3 hour window
+    order_qt_v2_multi_user_default0 """
+        SELECT
+            user_id,
+            window_funnel(3600 * 3, "default", event_timestamp, event_name = 
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id
+    """
+    // 5 minute window
+    order_qt_v2_multi_user_default1 """
+        SELECT
+            user_id,
+            window_funnel(300, "default", event_timestamp, event_name = '登录', 
event_name = '访问', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id
+    """
+    // 30 second window
+    order_qt_v2_multi_user_default2 """
+        SELECT
+            user_id,
+            window_funnel(30, "default", event_timestamp, event_name = '登录', 
event_name = '访问', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id
+    """
+    // complicate expressions with != condition
+    order_qt_v2_default_neq """
+        SELECT
+            user_id,
+            window_funnel(3600000000, "default", event_timestamp, event_name = 
'登录', event_name != '登陆', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id;
+    """
+    // Complex filter conditions
+    order_qt_v2_default_complex """
+        SELECT
+            user_id,
+            window_funnel(3600000000, "default", event_timestamp,
+                          event_name = '登录' AND phone_brand in ('HONOR', 
'XIAOMI', 'VIVO') AND tab_num not in (4, 5),
+                          event_name = '访问' AND tab_num not in (4, 5),
+                          event_name = '下单' AND tab_num not in (6, 7),
+                          event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id;
+    """
+
+    // ==================== DEDUPLICATION mode tests ====================
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """
+    sql """
+        CREATE TABLE windowfunnel_v2_test(
+            user_id BIGINT,
+            event_name VARCHAR(64),
+            event_timestamp datetime,
+            phone_brand varchar(64),
+            tab_num int
+        ) distributed by hash(user_id) buckets 3 
properties("replication_num"="1");
+    """
+    sql """
+        INSERT INTO windowfunnel_v2_test VALUES
+            (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+            (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+            (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+            (100123, '登录', '2022-05-14 10:04:00', 'HONOR', 3),
+            (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3),
+            (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3),
+            (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3),
+            (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3),
+            (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3),
+            (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+            (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+            (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+            (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+            (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+            (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+            (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+            (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+    """
+    order_qt_v2_deduplication0 """
+        SELECT
+            user_id,
+            window_funnel(3600, "deduplication", event_timestamp, event_name = 
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id
+    """
+
+    // Test dedup with duplicate event2 (访问)
+    sql """ truncate table windowfunnel_v2_test; """
+    sql """
+        INSERT INTO windowfunnel_v2_test VALUES
+            (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+            (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+            (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+            (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3),
+            (100123, '访问', '2022-05-14 10:04:00', 'HONOR', 3),
+            (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+            (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+            (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+            (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+            (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+            (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+            (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+            (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+    """
+    order_qt_v2_deduplication1 """
+        SELECT
+            user_id,
+            window_funnel(3600, "deduplication", event_timestamp, event_name = 
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id
+    """
+
+    // ==================== FIXED mode tests (StarRocks-style semantics) 
====================
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """
+    sql """
+        CREATE TABLE windowfunnel_v2_test(
+            user_id BIGINT,
+            event_name VARCHAR(64),
+            event_timestamp datetime,
+            phone_brand varchar(64),
+            tab_num int
+        ) distributed by hash(user_id) buckets 3 
properties("replication_num"="1");
+    """
+    sql """
+        INSERT INTO windowfunnel_v2_test VALUES
+            (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+            (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+            (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+            (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+            (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+            (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+            (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+            (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+            (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+            (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+            (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+    """
+    // Note: In V2 fixed mode (StarRocks-style), unmatched rows don't break 
the chain.
+    // The chain only breaks when a matched event's predecessor level hasn't 
been matched.
+    order_qt_v2_fixed0 """
+        SELECT
+            user_id,
+            window_funnel(3600, "fixed", event_timestamp, event_name = '登录', 
event_name = '访问', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id
+    """
+
+    // Test fixed mode where event order in conditions doesn't match data order
+    sql """ truncate table windowfunnel_v2_test; """
+    sql """
+        INSERT INTO windowfunnel_v2_test VALUES
+            (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+            (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+            (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+            (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4);
+    """
+    order_qt_v2_fixed_reorder """
+        select
+            window_funnel(
+                20000,
+                'fixed',
+                t.event_timestamp,
+                t.event_name = '登录',
+                t.event_name = '访问',
+                t.event_name = '付款',
+                t.event_name = '下单'
+            ) AS level
+        from windowfunnel_v2_test t;
+    """
+
+    // ==================== INCREASE mode tests ====================
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """
+    sql """
+        CREATE TABLE windowfunnel_v2_test(
+            user_id BIGINT,
+            event_name VARCHAR(64),
+            event_timestamp datetime,
+            phone_brand varchar(64),
+            tab_num int
+        ) distributed by hash(user_id) buckets 3 
properties("replication_num"="1");
+    """
+    sql """
+        INSERT INTO windowfunnel_v2_test VALUES
+            (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+            (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+            (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+            (100123, '付款', '2022-05-14 10:04:00', 'HONOR', 4),
+            (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+            (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+            (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+            (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+            (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+            (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+            (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+    """
+    order_qt_v2_increase0 """
+        SELECT
+            user_id,
+            window_funnel(3600, "increase", event_timestamp, event_name = 
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id
+    """
+
+    // Test increase mode with same-timestamp events
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """
+    sql """
+        CREATE TABLE IF NOT EXISTS windowfunnel_v2_test (
+            xwho varchar(50) NULL COMMENT 'xwho',
+            xwhen datetimev2(3) COMMENT 'xwhen',
+            xwhat int NULL COMMENT 'xwhat'
+        )
+        DUPLICATE KEY(xwho)
+        DISTRIBUTED BY HASH(xwho) BUCKETS 3
+        PROPERTIES (
+        "replication_num" = "1"
+        );
+    """
+    sql "INSERT into windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:41:00.111111', 1)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 13:28:02.111111', 2)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 13:28:02.111111', 3)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 15:05:04.111111', 4)"
+    order_qt_v2_increase_same_ts """
+        select
+            window_funnel(
+                20000,
+                'increase',
+                t.xwhen,
+                t.xwhat = 1,
+                t.xwhat = 2,
+                t.xwhat = 3,
+                t.xwhat = 4
+            ) AS level
+        from windowfunnel_v2_test t;
+    """
+
+    // ==================== V2 FIXED mode key difference from V1 
====================
+    // In V1, unmatched rows (rows that match no event condition) break the 
chain in FIXED mode.
+    // In V2, unmatched rows are not stored, so only matched events with level 
jumps break the chain.
+    // This test shows the behavioral difference.
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """
+    sql """
+        CREATE TABLE windowfunnel_v2_test(
+            user_id BIGINT,
+            event_name VARCHAR(64),
+            event_timestamp datetime,
+            phone_brand varchar(64),
+            tab_num int
+        ) distributed by hash(user_id) buckets 3 
properties("replication_num"="1");
+    """
+    sql """
+        INSERT INTO windowfunnel_v2_test VALUES
+            (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+            (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+            (100123, '登录2', '2022-05-14 10:03:00', 'HONOR', 3),
+            (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+            (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4);
+    """
+    // V2 fixed mode: 登录2 doesn't match any condition, so it's not stored.
+    // The chain 登录->访问->下单->付款 is unbroken because there are no level jumps.
+    // V1 would return 2 here (登录2 physically breaks adjacency), V2 returns 4.
+    order_qt_v2_fixed_vs_v1 """
+        SELECT
+            user_id,
+            window_funnel(3600, "fixed", event_timestamp, event_name = '登录', 
event_name = '访问', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id
+    """
+
+    // ==================== Test using window_funnel_v2 explicit name 
====================
+    order_qt_v2_explicit_name """
+        SELECT
+            user_id,
+            window_funnel_v2(3600, "fixed", event_timestamp, event_name = 
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+        FROM windowfunnel_v2_test
+        GROUP BY user_id
+        order BY user_id
+    """
+
+    // ==================== INCREASE mode: event-0 re-occurrence bug fix 
====================
+    // Regression test for the bug where a later event-0 overwrites 
events_timestamp[0]
+    // and breaks the INCREASE mode strict-increase check for an already-valid 
chain.
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """
+    sql """
+        CREATE TABLE IF NOT EXISTS windowfunnel_v2_test (
+            xwho varchar(50) NULL COMMENT 'xwho',
+            xwhen datetimev2(3) COMMENT 'xwhen',
+            xwhat int NULL COMMENT 'xwhat'
+        )
+        DUPLICATE KEY(xwho)
+        DISTRIBUTED BY HASH(xwho) BUCKETS 3
+        PROPERTIES (
+        "replication_num" = "1"
+        );
+    """
+    // Case 1: Old chain (from t=0) is valid and completes all 3 levels.
+    // The duplicate event-0 at t=50 should not destroy it.
+    sql "INSERT into windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:00.000', 1)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:50.000', 1)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:50.000', 2)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:01:00.000', 3)"
+    order_qt_v2_increase_event0_overwrite """
+        select
+            window_funnel(
+                100,
+                'increase',
+                t.xwhen,
+                t.xwhat = 1,
+                t.xwhat = 2,
+                t.xwhat = 3
+            ) AS level
+        from windowfunnel_v2_test t;
+    """
+
+    // Case 2: Old chain can't complete (window too small), new chain is 
better.
+    sql """ truncate table windowfunnel_v2_test; """
+    sql "INSERT into windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:00.000', 1)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:50.000', 1)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:51.000', 2)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:52.000', 3)"
+    order_qt_v2_increase_new_chain_better """
+        select
+            window_funnel(
+                5,
+                'increase',
+                t.xwhen,
+                t.xwhat = 1,
+                t.xwhat = 2,
+                t.xwhat = 3
+            ) AS level
+        from windowfunnel_v2_test t;
+    """
+
+    // Case 3: Old chain is better (reached level 2), new chain only reaches 
level 1.
+    sql """ truncate table windowfunnel_v2_test; """
+    sql "INSERT into windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:00.000', 1)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:10.000', 2)"
+    sql "INSERT INTO windowfunnel_v2_test (xwho, xwhen, xwhat) VALUES('1', 
'2022-03-12 10:00:50.000', 1)"
+    order_qt_v2_increase_old_chain_better """
+        select
+            window_funnel(
+                100,
+                'increase',
+                t.xwhen,
+                t.xwhat = 1,
+                t.xwhat = 2,
+                t.xwhat = 3
+            ) AS level
+        from windowfunnel_v2_test t;
+    """
+
+    sql """ DROP TABLE IF EXISTS windowfunnel_v2_test """

Review Comment:
   **[Test Standard]** Per Doris regression test standards: "After completing 
tests, do not drop tables; instead drop tables before using them in tests, to 
preserve the environment for debugging." This final `DROP TABLE` should be 
removed. Each test section already drops before creating, which is correct.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to