Copilot commented on code in PR #3229:
URL: https://github.com/apache/brpc/pull/3229#discussion_r2868938106


##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include "butil/logging.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+// Standalone statistics module for tracking backup/total request ratio
+// within a sliding time window. Each instance schedules two bvar::Window
+// sampler tasks; keep this in mind for high channel-count deployments.
+class BackupRateLimiter {
+public:
+    BackupRateLimiter(double max_backup_ratio,
+                      int window_size_seconds,
+                      int update_interval_seconds)
+        : _max_backup_ratio(max_backup_ratio)
+        , _update_interval_us(update_interval_seconds * 1000000LL)
+        , _total_count()
+        , _backup_count()
+        , _total_window(&_total_count, window_size_seconds)
+        , _backup_window(&_backup_count, window_size_seconds)
+        , _cached_ratio(0.0)
+        , _last_update_us(0) {
+    }
+
+    // All atomic operations use relaxed ordering intentionally.
+    // This is best-effort rate limiting: a slightly stale ratio is
+    // acceptable for approximate throttling.
+    bool ShouldAllow() const {
+        const int64_t now_us = butil::cpuwide_time_us();
+        int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
+        double ratio = _cached_ratio.load(butil::memory_order_relaxed);
+
+        if (now_us - last_us >= _update_interval_us) {
+            if (_last_update_us.compare_exchange_strong(
+                    last_us, now_us, butil::memory_order_relaxed)) {
+                int64_t total = _total_window.get_value();
+                int64_t backup = _backup_window.get_value();
+                ratio = (total > 0) ? static_cast<double>(backup) / total : 
0.0;
+                _cached_ratio.store(ratio, butil::memory_order_relaxed);
+            }
+        }
+
+        // max_backup_ratio >= 1.0 means no limit (ratio cannot exceed 1.0).
+        bool allow = _max_backup_ratio >= 1.0 || ratio < _max_backup_ratio;
+        if (allow) {
+            // Count backup decisions immediately for faster feedback
+            // during latency spikes (before RPCs complete).
+            _backup_count << 1;
+        }
+        return allow;
+    }
+
+    void OnRPCEnd(const Controller* /*controller*/) {
+        // Count all completed RPCs. Backup decisions are counted
+        // in ShouldAllow() at decision time for faster feedback.
+        _total_count << 1;
+    }
+
+private:
+    double  _max_backup_ratio;
+    int64_t _update_interval_us;
+
+    bvar::Adder<int64_t>            _total_count;
+    mutable bvar::Adder<int64_t>    _backup_count;
+    bvar::Window<bvar::Adder<int64_t>> _total_window;
+    bvar::Window<bvar::Adder<int64_t>> _backup_window;
+
+    mutable butil::atomic<double>  _cached_ratio;
+    mutable butil::atomic<int64_t> _last_update_us;
+};
+
+// Internal BackupRequestPolicy that composes a BackupRateLimiter
+// for ratio-based suppression.
+class RateLimitedBackupPolicy : public BackupRequestPolicy {
+public:
+    RateLimitedBackupPolicy(int32_t backup_request_ms,
+                            double max_backup_ratio,
+                            int window_size_seconds,
+                            int update_interval_seconds)
+        : _backup_request_ms(backup_request_ms)
+        , _rate_limiter(max_backup_ratio, window_size_seconds,
+                        update_interval_seconds) {
+    }
+
+    int32_t GetBackupRequestMs(const Controller* /*controller*/) const 
override {
+        return _backup_request_ms;
+    }
+
+    bool DoBackup(const Controller* /*controller*/) const override {
+        return _rate_limiter.ShouldAllow();
+    }
+
+    void OnRPCEnd(const Controller* controller) override {
+        _rate_limiter.OnRPCEnd(controller);
+    }
+
+private:
+    int32_t _backup_request_ms;
+    BackupRateLimiter _rate_limiter;
+};
+
+BackupRequestPolicy* CreateRateLimitedBackupPolicy(
+    const RateLimitedBackupPolicyOptions& options) {
+    if (options.backup_request_ms < 0) {
+        LOG(ERROR) << "Invalid backup_request_ms=" << options.backup_request_ms
+                   << ", must be >= 0";
+        return NULL;
+    }
+    if (options.max_backup_ratio <= 0 || options.max_backup_ratio > 1.0) {
+        LOG(ERROR) << "Invalid max_backup_ratio=" << options.max_backup_ratio
+                   << ", must be in (0, 1]";
+        return NULL;
+    }

Review Comment:
   PR description states `max_ratio > 1.0` is clamped to 1.0 with LOG(WARNING), 
but CreateRateLimitedBackupPolicy currently treats `max_backup_ratio > 1.0` as 
an error and returns NULL. Either update the PR description/docs to match this 
behavior, or implement the documented clamping behavior here.



##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include "butil/logging.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+// Standalone statistics module for tracking backup/total request ratio
+// within a sliding time window. Each instance schedules two bvar::Window
+// sampler tasks; keep this in mind for high channel-count deployments.
+class BackupRateLimiter {
+public:
+    BackupRateLimiter(double max_backup_ratio,
+                      int window_size_seconds,
+                      int update_interval_seconds)
+        : _max_backup_ratio(max_backup_ratio)
+        , _update_interval_us(update_interval_seconds * 1000000LL)
+        , _total_count()
+        , _backup_count()
+        , _total_window(&_total_count, window_size_seconds)
+        , _backup_window(&_backup_count, window_size_seconds)
+        , _cached_ratio(0.0)
+        , _last_update_us(0) {
+    }
+
+    // All atomic operations use relaxed ordering intentionally.
+    // This is best-effort rate limiting: a slightly stale ratio is
+    // acceptable for approximate throttling.
+    bool ShouldAllow() const {
+        const int64_t now_us = butil::cpuwide_time_us();
+        int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
+        double ratio = _cached_ratio.load(butil::memory_order_relaxed);
+
+        if (now_us - last_us >= _update_interval_us) {
+            if (_last_update_us.compare_exchange_strong(
+                    last_us, now_us, butil::memory_order_relaxed)) {
+                int64_t total = _total_window.get_value();
+                int64_t backup = _backup_window.get_value();
+                ratio = (total > 0) ? static_cast<double>(backup) / total : 
0.0;
+                _cached_ratio.store(ratio, butil::memory_order_relaxed);
+            }
+        }
+
+        // max_backup_ratio >= 1.0 means no limit (ratio cannot exceed 1.0).
+        bool allow = _max_backup_ratio >= 1.0 || ratio < _max_backup_ratio;
+        if (allow) {
+            // Count backup decisions immediately for faster feedback
+            // during latency spikes (before RPCs complete).
+            _backup_count << 1;
+        }
+        return allow;
+    }
+
+    void OnRPCEnd(const Controller* /*controller*/) {
+        // Count all completed RPCs. Backup decisions are counted
+        // in ShouldAllow() at decision time for faster feedback.
+        _total_count << 1;
+    }
+
+private:
+    double  _max_backup_ratio;
+    int64_t _update_interval_us;
+
+    bvar::Adder<int64_t>            _total_count;
+    mutable bvar::Adder<int64_t>    _backup_count;
+    bvar::Window<bvar::Adder<int64_t>> _total_window;
+    bvar::Window<bvar::Adder<int64_t>> _backup_window;
+
+    mutable butil::atomic<double>  _cached_ratio;
+    mutable butil::atomic<int64_t> _last_update_us;
+};
+
+// Internal BackupRequestPolicy that composes a BackupRateLimiter
+// for ratio-based suppression.
+class RateLimitedBackupPolicy : public BackupRequestPolicy {
+public:
+    RateLimitedBackupPolicy(int32_t backup_request_ms,
+                            double max_backup_ratio,
+                            int window_size_seconds,
+                            int update_interval_seconds)
+        : _backup_request_ms(backup_request_ms)
+        , _rate_limiter(max_backup_ratio, window_size_seconds,
+                        update_interval_seconds) {
+    }
+
+    int32_t GetBackupRequestMs(const Controller* /*controller*/) const 
override {
+        return _backup_request_ms;
+    }
+
+    bool DoBackup(const Controller* /*controller*/) const override {
+        return _rate_limiter.ShouldAllow();
+    }
+
+    void OnRPCEnd(const Controller* controller) override {
+        _rate_limiter.OnRPCEnd(controller);
+    }
+
+private:
+    int32_t _backup_request_ms;
+    BackupRateLimiter _rate_limiter;
+};
+
+BackupRequestPolicy* CreateRateLimitedBackupPolicy(
+    const RateLimitedBackupPolicyOptions& options) {
+    if (options.backup_request_ms < 0) {
+        LOG(ERROR) << "Invalid backup_request_ms=" << options.backup_request_ms
+                   << ", must be >= 0";
+        return NULL;
+    }
+    if (options.max_backup_ratio <= 0 || options.max_backup_ratio > 1.0) {
+        LOG(ERROR) << "Invalid max_backup_ratio=" << options.max_backup_ratio
+                   << ", must be in (0, 1]";
+        return NULL;
+    }
+    if (options.window_size_seconds < 1 || options.window_size_seconds > 3600) 
{
+        LOG(ERROR) << "Invalid window_size_seconds=" << 
options.window_size_seconds
+                   << ", must be in [1, 3600]";
+        return NULL;
+    }
+    if (options.update_interval_seconds < 1) {
+        LOG(ERROR) << "Invalid update_interval_seconds="
+                   << options.update_interval_seconds << ", must be >= 1";
+        return NULL;
+    }
+    return new RateLimitedBackupPolicy(
+        options.backup_request_ms, options.max_backup_ratio,
+        options.window_size_seconds, options.update_interval_seconds);
+}

Review Comment:
   This PR introduces a new public factory + rate-limiting behavior, but there 
are no unit tests in this change set to validate (1) ratio enforcement over 
time windows, (2) the `total==0`/cold-start behavior, and (3) invalid-parameter 
handling returning NULL. The repository already has backup-request-policy tests 
(e.g., in brpc_channel_unittest.cpp); adding targeted tests for 
CreateRateLimitedBackupPolicy/RateLimitedBackupPolicy would help prevent 
regressions.



##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include "butil/logging.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+// Standalone statistics module for tracking backup/total request ratio
+// within a sliding time window. Each instance schedules two bvar::Window
+// sampler tasks; keep this in mind for high channel-count deployments.
+class BackupRateLimiter {
+public:
+    BackupRateLimiter(double max_backup_ratio,
+                      int window_size_seconds,
+                      int update_interval_seconds)
+        : _max_backup_ratio(max_backup_ratio)
+        , _update_interval_us(update_interval_seconds * 1000000LL)
+        , _total_count()
+        , _backup_count()
+        , _total_window(&_total_count, window_size_seconds)
+        , _backup_window(&_backup_count, window_size_seconds)
+        , _cached_ratio(0.0)
+        , _last_update_us(0) {
+    }
+
+    // All atomic operations use relaxed ordering intentionally.
+    // This is best-effort rate limiting: a slightly stale ratio is
+    // acceptable for approximate throttling.
+    bool ShouldAllow() const {
+        const int64_t now_us = butil::cpuwide_time_us();
+        int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
+        double ratio = _cached_ratio.load(butil::memory_order_relaxed);
+
+        if (now_us - last_us >= _update_interval_us) {
+            if (_last_update_us.compare_exchange_strong(
+                    last_us, now_us, butil::memory_order_relaxed)) {
+                int64_t total = _total_window.get_value();
+                int64_t backup = _backup_window.get_value();
+                ratio = (total > 0) ? static_cast<double>(backup) / total : 
0.0;

Review Comment:
   BackupRateLimiter::ShouldAllow() computes `ratio` as 0 when `total == 0`, 
even if `backup > 0`. During long latency spikes where no RPCs complete within 
the window, this makes the limiter treat the ratio as 0 and effectively allow 
unlimited backups—exactly the storm scenario this policy is meant to prevent. 
Consider making the `total == 0` branch conservative when backups have occurred 
in-window (e.g., treat as 1.0 / deny further backups until totals appear), 
while still allowing the true cold-start case (total==0 && backup==0).
   ```suggestion
                   if (total > 0) {
                       ratio = static_cast<double>(backup) / total;
                   } else if (backup > 0) {
                       // Be conservative when backups have been issued but
                       // no RPC completions are observed in-window. Treat the
                       // ratio as saturated to prevent unlimited backup storms
                       // during latency spikes.
                       ratio = 1.0;
                   } else {
                       // True cold-start: no traffic observed yet.
                       ratio = 0.0;
                   }
   ```



##########
src/brpc/backup_request_policy.h:
##########
@@ -38,6 +38,39 @@ class BackupRequestPolicy {
     virtual void OnRPCEnd(const Controller* controller) = 0;
 };
 
+// Options for CreateRateLimitedBackupPolicy().
+// All fields have defaults matching the recommended starting values.
+struct RateLimitedBackupPolicyOptions {
+    // Time in milliseconds after which a backup request is sent if the RPC
+    // has not completed. Must be >= 0.
+    // Default: 0
+    int32_t backup_request_ms = 0;

Review Comment:
   RateLimitedBackupPolicyOptions defaults `backup_request_ms` to 0, which 
would cause immediate backup requests if a caller constructs options and only 
changes `max_backup_ratio` (a plausible mistake, since the struct provides 
defaults). This is risky because it can double traffic unexpectedly. Consider 
using a safer default (e.g., match ChannelOptions default of -1/disabled, or 
require callers to set it explicitly) and update validation accordingly.
   ```suggestion
       // has not completed. Must be >= -1. A value of -1 disables backup
       // requests.
       // Default: -1 (disabled)
       int32_t backup_request_ms = -1;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to