Copilot commented on code in PR #3229:
URL: https://github.com/apache/brpc/pull/3229#discussion_r2852485840


##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include <gflags/gflags.h>
+#include "brpc/reloadable_flags.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+DEFINE_double(backup_request_max_ratio, -1,
+    "Maximum ratio of backup requests to total requests. "
+    "Value in (0, 1]. -1 means no limit (default). Can be overridden "
+    "per-channel via ChannelOptions.backup_request_max_ratio. "
+    "Note: takes effect at Channel::Init() time; changing this flag "
+    "at runtime does not affect already-created channels.");
+
+static bool validate_backup_request_max_ratio(const char*, double v) {
+    if (v <= 0) return true;  // non-positive means disabled
+    if (v <= 1.0) return true;
+    LOG(ERROR) << "Invalid backup_request_max_ratio=" << v
+               << ", must be <= 0 (disabled) or in (0, 1]";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_max_ratio,
+                     validate_backup_request_max_ratio);
+
+DEFINE_int32(backup_request_ratio_window_size_s, 10,
+    "Window size in seconds for computing the backup request ratio. "
+    "Must be >= 1.");
+
+static bool validate_backup_request_ratio_window_size_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_window_size_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_window_size_s,
+                     validate_backup_request_ratio_window_size_s);
+
+DEFINE_int32(backup_request_ratio_update_interval_s, 5,
+    "Interval in seconds between ratio cache updates. Must be >= 1.");
+
+static bool validate_backup_request_ratio_update_interval_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_update_interval_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_update_interval_s,
+                     validate_backup_request_ratio_update_interval_s);
+
+// Standalone statistics module for tracking backup/total request ratio
+// within a sliding time window.
+class BackupRateLimiter {
+public:
+    BackupRateLimiter(double max_backup_ratio,
+                      int window_size_seconds,
+                      int update_interval_seconds)
+        : _max_backup_ratio(max_backup_ratio)
+        , _update_interval_us(update_interval_seconds * 1000000LL)
+        , _total_window(&_total_count, window_size_seconds)
+        , _backup_window(&_backup_count, window_size_seconds)
+        , _cached_ratio(0.0)
+        , _last_update_us(0) {
+    }
+
+    // All atomic operations use relaxed ordering intentionally.
+    // This is best-effort rate limiting: a slightly stale ratio is
+    // acceptable for approximate throttling.
+    bool ShouldAllow() const {
+        const int64_t now_us = butil::cpuwide_time_us();
+        int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
+        double ratio = _cached_ratio.load(butil::memory_order_relaxed);
+
+        if (now_us - last_us >= _update_interval_us) {
+            if (_last_update_us.compare_exchange_strong(
+                    last_us, now_us, butil::memory_order_relaxed)) {
+                int64_t total = _total_window.get_value();
+                int64_t backup = _backup_window.get_value();
+                // Fall back to cumulative counts when the window has no
+                // sampled data yet (cold-start within the first few seconds).
+                if (total <= 0) {
+                    total = _total_count.get_value();
+                    backup = _backup_count.get_value();
+                }
+                ratio = (total > 0) ? static_cast<double>(backup) / total : 
0.0;
+                _cached_ratio.store(ratio, butil::memory_order_relaxed);
+            }
+        }
+
+        return ratio < _max_backup_ratio;
+    }

Review Comment:
   The policy uses `return ratio < _max_backup_ratio;`. With `max_backup_ratio 
== 1.0` (explicitly allowed by the docs), this will still suppress backups once 
the observed ratio reaches 1.0, even though the ratio cannot exceed 1.0 in 
normal operation (backup <= total). That makes `1.0` unexpectedly act like a 
limiter.
   
   Consider special-casing `max_backup_ratio >= 1.0` to always allow, or 
adjusting the comparison/semantics (and comments) so that `1.0` truly means “no 
limit”.



##########
src/brpc/channel.h:
##########
@@ -116,11 +116,22 @@ struct ChannelOptions {
     // Default: NULL
     const Authenticator* auth;
 
+    // Maximum ratio of backup requests to total requests within a sliding
+    // time window. When the ratio reaches or exceeds this value, backup
+    // requests are suppressed. Value in (0, 1]. -1 means no limit.
+    // Only effective when backup_request_ms >= 0 and backup_request_policy
+    // is NULL (i.e. no custom policy). When effective, an internal
+    // rate-limited BackupRequestPolicy is created and used automatically.
+    // Default: -1 (no limit, same as FLAGS_backup_request_max_ratio)
+    double backup_request_max_ratio;
+
     // Customize the backup request time and whether to send backup request.
-    // Priority: `backup_request_policy' > `backup_request_ms'.
+    // Priority: `backup_request_policy' > `backup_request_max_ratio' > 
`backup_request_ms'.
     // Overridable by Controller.set_backup_request_ms() or
     // Controller.set_backup_request_policy().
-    // This object is NOT owned by channel and should remain valid when 
channel is used.
+    // When user-supplied, this object is NOT owned by channel and should
+    // remain valid during channel's lifetime. When backup_request_max_ratio
+    // creates an internal policy, that policy IS owned by channel.

Review Comment:
   This comment says backup behavior is overridable by 
`Controller.set_backup_request_ms()`, but once an internal 
`BackupRequestPolicy` is auto-created (via `backup_request_max_ratio`), 
`Controller::backup_request_ms()` will take the value from the policy and 
ignore `_backup_request_ms`. In practice, per-RPC `set_backup_request_ms()` 
will no longer have any effect unless the caller also overrides the policy.
   
   Either update the documentation to reflect this, or consider making the 
internal policy’s `GetBackupRequestMs()` respect a controller-level override if 
that behavior is required.



##########
test/brpc_channel_unittest.cpp:
##########
@@ -2803,6 +2806,104 @@ TEST_F(ChannelTest, backup_request_policy) {
     }
 }
 
+TEST_F(ChannelTest, backup_request_rate_limited) {
+    ASSERT_EQ(0, StartAccept(_ep));
+
+    // Test 1: Policy is created when backup_request_max_ratio > 0
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = 0.3;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy != NULL);
+    }
+
+    // Test 2: No policy when ratio is disabled (-1)
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = -1;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy == NULL);
+    }
+
+    // Test 3: Custom policy is preserved, not replaced
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = 0.3;
+        opt.backup_request_policy = &_backup_request_policy;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_EQ(&_backup_request_policy,
+                  channel.options().backup_request_policy);
+    }
+
+    // Test 4: No policy when backup_request_ms < 0 (backup disabled)
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = -1;
+        opt.backup_request_max_ratio = 0.3;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy == NULL);
+    }
+
+    // Test 5: Functional — rate limiting reduces backup requests.
+    // Use short update interval so ratio refreshes on every DoBackup call.
+    // Save/restore gflags outside the inner scope so cleanup always runs.
+    const int32_t saved_interval =
+        brpc::FLAGS_backup_request_ratio_update_interval_s;
+    const int32_t saved_window =
+        brpc::FLAGS_backup_request_ratio_window_size_s;
+    brpc::FLAGS_backup_request_ratio_update_interval_s = 1;
+    brpc::FLAGS_backup_request_ratio_window_size_s = 2;

Review Comment:
   This test mutates process-wide gflags 
(`FLAGS_backup_request_ratio_update_interval_s` / `_window_size_s`) and 
restores them manually at the end. If an `ASSERT_*` fails inside the test, the 
restore code won’t run and later tests may be affected.
   
   Consider using `BRPC_SCOPE_EXIT` / `butil::MakeScopeGuard` to restore flags 
unconditionally, and also setting/saving/restoring 
`FLAGS_backup_request_max_ratio` so the test is deterministic even when the 
binary is launched with `--backup_request_max_ratio` set.



##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include <gflags/gflags.h>
+#include "brpc/reloadable_flags.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+DEFINE_double(backup_request_max_ratio, -1,
+    "Maximum ratio of backup requests to total requests. "
+    "Value in (0, 1]. -1 means no limit (default). Can be overridden "
+    "per-channel via ChannelOptions.backup_request_max_ratio. "
+    "Note: takes effect at Channel::Init() time; changing this flag "
+    "at runtime does not affect already-created channels.");
+
+static bool validate_backup_request_max_ratio(const char*, double v) {
+    if (v <= 0) return true;  // non-positive means disabled
+    if (v <= 1.0) return true;
+    LOG(ERROR) << "Invalid backup_request_max_ratio=" << v
+               << ", must be <= 0 (disabled) or in (0, 1]";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_max_ratio,
+                     validate_backup_request_max_ratio);
+
+DEFINE_int32(backup_request_ratio_window_size_s, 10,
+    "Window size in seconds for computing the backup request ratio. "
+    "Must be >= 1.");
+
+static bool validate_backup_request_ratio_window_size_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_window_size_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_window_size_s,
+                     validate_backup_request_ratio_window_size_s);
+
+DEFINE_int32(backup_request_ratio_update_interval_s, 5,
+    "Interval in seconds between ratio cache updates. Must be >= 1.");
+
+static bool validate_backup_request_ratio_update_interval_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_update_interval_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_update_interval_s,
+                     validate_backup_request_ratio_update_interval_s);
+
+// Standalone statistics module for tracking backup/total request ratio
+// within a sliding time window.
+class BackupRateLimiter {
+public:
+    BackupRateLimiter(double max_backup_ratio,
+                      int window_size_seconds,
+                      int update_interval_seconds)
+        : _max_backup_ratio(max_backup_ratio)
+        , _update_interval_us(update_interval_seconds * 1000000LL)
+        , _total_window(&_total_count, window_size_seconds)
+        , _backup_window(&_backup_count, window_size_seconds)
+        , _cached_ratio(0.0)
+        , _last_update_us(0) {
+    }
+
+    // All atomic operations use relaxed ordering intentionally.
+    // This is best-effort rate limiting: a slightly stale ratio is
+    // acceptable for approximate throttling.
+    bool ShouldAllow() const {
+        const int64_t now_us = butil::cpuwide_time_us();
+        int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
+        double ratio = _cached_ratio.load(butil::memory_order_relaxed);
+
+        if (now_us - last_us >= _update_interval_us) {
+            if (_last_update_us.compare_exchange_strong(
+                    last_us, now_us, butil::memory_order_relaxed)) {
+                int64_t total = _total_window.get_value();
+                int64_t backup = _backup_window.get_value();
+                // Fall back to cumulative counts when the window has no
+                // sampled data yet (cold-start within the first few seconds).
+                if (total <= 0) {
+                    total = _total_count.get_value();
+                    backup = _backup_count.get_value();
+                }
+                ratio = (total > 0) ? static_cast<double>(backup) / total : 
0.0;
+                _cached_ratio.store(ratio, butil::memory_order_relaxed);
+            }
+        }
+
+        return ratio < _max_backup_ratio;
+    }
+
+    void OnRPCEnd(const Controller* controller) {
+        _total_count << 1;
+        if (controller->has_backup_request()) {
+            _backup_count << 1;
+        }
+    }
+
+private:
+    double  _max_backup_ratio;
+    int64_t _update_interval_us;
+
+    bvar::Adder<int64_t>   _total_count;
+    bvar::Adder<int64_t>   _backup_count;
+    bvar::Window<bvar::Adder<int64_t>> _total_window;
+    bvar::Window<bvar::Adder<int64_t>> _backup_window;
+
+    mutable butil::atomic<double>  _cached_ratio;
+    mutable butil::atomic<int64_t> _last_update_us;
+};
+
+// Internal BackupRequestPolicy that composes a BackupRateLimiter
+// for ratio-based suppression.
+class RateLimitedBackupPolicy : public BackupRequestPolicy {
+public:
+    RateLimitedBackupPolicy(int32_t backup_request_ms,
+                            double max_backup_ratio,
+                            int window_size_seconds,
+                            int update_interval_seconds)
+        : _backup_request_ms(backup_request_ms)
+        , _rate_limiter(max_backup_ratio, window_size_seconds,
+                        update_interval_seconds) {
+    }
+
+    int32_t GetBackupRequestMs(const Controller* /*controller*/) const 
override {
+        return _backup_request_ms;
+    }
+
+    bool DoBackup(const Controller* /*controller*/) const override {
+        return _rate_limiter.ShouldAllow();
+    }
+
+    void OnRPCEnd(const Controller* controller) override {
+        _rate_limiter.OnRPCEnd(controller);
+    }
+
+private:
+    int32_t _backup_request_ms;
+    BackupRateLimiter _rate_limiter;
+};
+
+BackupRequestPolicy* CreateRateLimitedBackupPolicy(
+    int32_t backup_request_ms,
+    double max_backup_ratio,
+    int window_size_seconds,
+    int update_interval_seconds) {
+    return new RateLimitedBackupPolicy(
+        backup_request_ms, max_backup_ratio,
+        window_size_seconds, update_interval_seconds);

Review Comment:
   `CreateRateLimitedBackupPolicy()` is a public factory, but it does not 
validate or normalize `max_backup_ratio`, `window_size_seconds`, or 
`update_interval_seconds`. Since the gflags have validators but this factory 
can be called directly by users, invalid inputs (e.g. `max_backup_ratio <= 0`, 
`update_interval_seconds <= 0`, negative window size) can lead to surprising 
behavior (or very hot update loops).
   
   Consider adding input checks/clamping (and documenting the accepted ranges) 
so the factory is safe and predictable when used directly.
   ```suggestion
       // Normalize inputs to be consistent with the corresponding gflags:
       // - max_backup_ratio: <= 0 means disabled; (0, 1] is valid; > 1 is 
clamped to 1
       // - window_size_seconds: must be >= 1
       // - update_interval_seconds: must be >= 1
       double normalized_max_backup_ratio = max_backup_ratio;
       if (normalized_max_backup_ratio > 1.0) {
           LOG(ERROR) << "Invalid max_backup_ratio=" << max_backup_ratio
                      << ", clamping to 1.0";
           normalized_max_backup_ratio = 1.0;
       }
       // Non-positive ratio is allowed and interpreted as `no limit`, 
consistent with
       // validate_backup_request_max_ratio.
   
       int normalized_window_size_seconds = window_size_seconds;
       if (normalized_window_size_seconds < 1) {
           LOG(ERROR) << "Invalid window_size_seconds=" << window_size_seconds
                      << ", clamping to 1";
           normalized_window_size_seconds = 1;
       }
   
       int normalized_update_interval_seconds = update_interval_seconds;
       if (normalized_update_interval_seconds < 1) {
           LOG(ERROR) << "Invalid update_interval_seconds=" << 
update_interval_seconds
                      << ", clamping to 1";
           normalized_update_interval_seconds = 1;
       }
   
       return new RateLimitedBackupPolicy(
           backup_request_ms, normalized_max_backup_ratio,
           normalized_window_size_seconds, normalized_update_interval_seconds);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to