Copilot commented on code in PR #3229:
URL: https://github.com/apache/brpc/pull/3229#discussion_r2852824049


##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include <gflags/gflags.h>
+#include "brpc/reloadable_flags.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+DEFINE_double(backup_request_max_ratio, -1,
+    "Maximum ratio of backup requests to total requests. "
+    "Value in (0, 1] enables rate limiting. Values <= 0 disable it "
+    "(-1 is default). Can be overridden per-channel via "
+    "ChannelOptions.backup_request_max_ratio. "
+    "Note: takes effect at Channel::Init() time; changing this flag "
+    "at runtime does not affect already-created channels.");
+
+static bool validate_backup_request_max_ratio(const char*, double v) {
+    if (v <= 0) return true;  // non-positive means disabled
+    if (v <= 1.0) return true;
+    LOG(ERROR) << "Invalid backup_request_max_ratio=" << v
+               << ", must be <= 0 (disabled) or in (0, 1]";

Review Comment:
   `LOG(...)` is used in this file (validators and factory), but 
`butil/logging.h` is not included. This will fail to compile unless another 
header happens to pull logging in indirectly; add the proper include in this 
translation unit.



##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include <gflags/gflags.h>
+#include "brpc/reloadable_flags.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+DEFINE_double(backup_request_max_ratio, -1,
+    "Maximum ratio of backup requests to total requests. "
+    "Value in (0, 1] enables rate limiting. Values <= 0 disable it "
+    "(-1 is default). Can be overridden per-channel via "
+    "ChannelOptions.backup_request_max_ratio. "
+    "Note: takes effect at Channel::Init() time; changing this flag "
+    "at runtime does not affect already-created channels.");
+
+static bool validate_backup_request_max_ratio(const char*, double v) {
+    if (v <= 0) return true;  // non-positive means disabled
+    if (v <= 1.0) return true;
+    LOG(ERROR) << "Invalid backup_request_max_ratio=" << v
+               << ", must be <= 0 (disabled) or in (0, 1]";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_max_ratio,
+                     validate_backup_request_max_ratio);
+
+DEFINE_int32(backup_request_ratio_window_size_s, 10,
+    "Window size in seconds for computing the backup request ratio. "
+    "Must be >= 1.");
+
+static bool validate_backup_request_ratio_window_size_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_window_size_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_window_size_s,
+                     validate_backup_request_ratio_window_size_s);
+
+DEFINE_int32(backup_request_ratio_update_interval_s, 5,
+    "Interval in seconds between ratio cache updates. Must be >= 1.");
+
+static bool validate_backup_request_ratio_update_interval_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_update_interval_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_update_interval_s,
+                     validate_backup_request_ratio_update_interval_s);
+
+// Standalone statistics module for tracking backup/total request ratio
+// within a sliding time window.
+class BackupRateLimiter {
+public:
+    BackupRateLimiter(double max_backup_ratio,
+                      int window_size_seconds,
+                      int update_interval_seconds)
+        : _max_backup_ratio(max_backup_ratio)
+        , _update_interval_us(update_interval_seconds * 1000000LL)
+        , _total_window(&_total_count, window_size_seconds)
+        , _backup_window(&_backup_count, window_size_seconds)
+        , _cached_ratio(0.0)
+        , _last_update_us(0) {
+    }
+
+    // All atomic operations use relaxed ordering intentionally.
+    // This is best-effort rate limiting: a slightly stale ratio is
+    // acceptable for approximate throttling.
+    bool ShouldAllow() const {
+        const int64_t now_us = butil::cpuwide_time_us();
+        int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
+        double ratio = _cached_ratio.load(butil::memory_order_relaxed);
+
+        if (now_us - last_us >= _update_interval_us) {
+            if (_last_update_us.compare_exchange_strong(
+                    last_us, now_us, butil::memory_order_relaxed)) {
+                int64_t total = _total_window.get_value();
+                int64_t backup = _backup_window.get_value();
+                ratio = (total > 0) ? static_cast<double>(backup) / total : 
0.0;
+                _cached_ratio.store(ratio, butil::memory_order_relaxed);
+            }
+        }
+
+        // max_backup_ratio >= 1.0 means no limit (ratio cannot exceed 1.0).
+        return _max_backup_ratio >= 1.0 || ratio < _max_backup_ratio;
+    }
+
+    void OnRPCEnd(const Controller* controller) {
+        _total_count << 1;
+        if (controller->has_backup_request()) {
+            _backup_count << 1;
+        }
+    }
+
+private:
+    double  _max_backup_ratio;
+    int64_t _update_interval_us;
+
+    bvar::Adder<int64_t>   _total_count;
+    bvar::Adder<int64_t>   _backup_count;
+    bvar::Window<bvar::Adder<int64_t>> _total_window;
+    bvar::Window<bvar::Adder<int64_t>> _backup_window;
+
+    mutable butil::atomic<double>  _cached_ratio;
+    mutable butil::atomic<int64_t> _last_update_us;
+};
+
+// Internal BackupRequestPolicy that composes a BackupRateLimiter
+// for ratio-based suppression.
+class RateLimitedBackupPolicy : public BackupRequestPolicy {
+public:
+    RateLimitedBackupPolicy(int32_t backup_request_ms,
+                            double max_backup_ratio,
+                            int window_size_seconds,
+                            int update_interval_seconds)
+        : _backup_request_ms(backup_request_ms)
+        , _rate_limiter(max_backup_ratio, window_size_seconds,
+                        update_interval_seconds) {
+    }
+
+    int32_t GetBackupRequestMs(const Controller* /*controller*/) const 
override {
+        return _backup_request_ms;
+    }
+
+    bool DoBackup(const Controller* /*controller*/) const override {
+        return _rate_limiter.ShouldAllow();
+    }
+
+    void OnRPCEnd(const Controller* controller) override {
+        _rate_limiter.OnRPCEnd(controller);
+    }
+
+private:
+    int32_t _backup_request_ms;
+    BackupRateLimiter _rate_limiter;
+};
+
+BackupRequestPolicy* CreateRateLimitedBackupPolicy(
+    int32_t backup_request_ms,
+    double max_backup_ratio,
+    int window_size_seconds,
+    int update_interval_seconds) {
+    if (max_backup_ratio <= 0 || max_backup_ratio > 1.0) {
+        LOG(ERROR) << "Invalid max_backup_ratio=" << max_backup_ratio
+                   << ", must be in (0, 1]";
+        return NULL;

Review Comment:
   `CreateRateLimitedBackupPolicy()` does not validate `backup_request_ms` at 
all. Since the channel-level integration treats `backup_request_ms < 0` as 
“backup disabled”, consider rejecting negative values in the factory as invalid 
(return NULL) to match the API comment (“Returns NULL on invalid parameters”) 
and avoid creating policies that can never trigger backups.



##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include <gflags/gflags.h>
+#include "brpc/reloadable_flags.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+DEFINE_double(backup_request_max_ratio, -1,
+    "Maximum ratio of backup requests to total requests. "
+    "Value in (0, 1] enables rate limiting. Values <= 0 disable it "
+    "(-1 is default). Can be overridden per-channel via "
+    "ChannelOptions.backup_request_max_ratio. "
+    "Note: takes effect at Channel::Init() time; changing this flag "
+    "at runtime does not affect already-created channels.");
+
+static bool validate_backup_request_max_ratio(const char*, double v) {
+    if (v <= 0) return true;  // non-positive means disabled
+    if (v <= 1.0) return true;
+    LOG(ERROR) << "Invalid backup_request_max_ratio=" << v
+               << ", must be <= 0 (disabled) or in (0, 1]";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_max_ratio,
+                     validate_backup_request_max_ratio);
+
+DEFINE_int32(backup_request_ratio_window_size_s, 10,
+    "Window size in seconds for computing the backup request ratio. "
+    "Must be >= 1.");
+
+static bool validate_backup_request_ratio_window_size_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_window_size_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_window_size_s,
+                     validate_backup_request_ratio_window_size_s);
+
+DEFINE_int32(backup_request_ratio_update_interval_s, 5,
+    "Interval in seconds between ratio cache updates. Must be >= 1.");
+
+static bool validate_backup_request_ratio_update_interval_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_update_interval_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_update_interval_s,
+                     validate_backup_request_ratio_update_interval_s);
+
+// Standalone statistics module for tracking backup/total request ratio
+// within a sliding time window.
+class BackupRateLimiter {
+public:
+    BackupRateLimiter(double max_backup_ratio,
+                      int window_size_seconds,
+                      int update_interval_seconds)
+        : _max_backup_ratio(max_backup_ratio)
+        , _update_interval_us(update_interval_seconds * 1000000LL)
+        , _total_window(&_total_count, window_size_seconds)
+        , _backup_window(&_backup_count, window_size_seconds)
+        , _cached_ratio(0.0)
+        , _last_update_us(0) {
+    }
+
+    // All atomic operations use relaxed ordering intentionally.
+    // This is best-effort rate limiting: a slightly stale ratio is
+    // acceptable for approximate throttling.
+    bool ShouldAllow() const {
+        const int64_t now_us = butil::cpuwide_time_us();
+        int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
+        double ratio = _cached_ratio.load(butil::memory_order_relaxed);
+
+        if (now_us - last_us >= _update_interval_us) {
+            if (_last_update_us.compare_exchange_strong(
+                    last_us, now_us, butil::memory_order_relaxed)) {
+                int64_t total = _total_window.get_value();
+                int64_t backup = _backup_window.get_value();
+                ratio = (total > 0) ? static_cast<double>(backup) / total : 
0.0;
+                _cached_ratio.store(ratio, butil::memory_order_relaxed);
+            }
+        }
+
+        // max_backup_ratio >= 1.0 means no limit (ratio cannot exceed 1.0).
+        return _max_backup_ratio >= 1.0 || ratio < _max_backup_ratio;
+    }
+
+    void OnRPCEnd(const Controller* controller) {
+        _total_count << 1;
+        if (controller->has_backup_request()) {
+            _backup_count << 1;
+        }
+    }
+
+private:
+    double  _max_backup_ratio;
+    int64_t _update_interval_us;
+
+    bvar::Adder<int64_t>   _total_count;
+    bvar::Adder<int64_t>   _backup_count;
+    bvar::Window<bvar::Adder<int64_t>> _total_window;
+    bvar::Window<bvar::Adder<int64_t>> _backup_window;
+
+    mutable butil::atomic<double>  _cached_ratio;
+    mutable butil::atomic<int64_t> _last_update_us;
+};
+
+// Internal BackupRequestPolicy that composes a BackupRateLimiter
+// for ratio-based suppression.
+class RateLimitedBackupPolicy : public BackupRequestPolicy {
+public:
+    RateLimitedBackupPolicy(int32_t backup_request_ms,
+                            double max_backup_ratio,
+                            int window_size_seconds,
+                            int update_interval_seconds)
+        : _backup_request_ms(backup_request_ms)
+        , _rate_limiter(max_backup_ratio, window_size_seconds,
+                        update_interval_seconds) {
+    }
+
+    int32_t GetBackupRequestMs(const Controller* /*controller*/) const 
override {
+        return _backup_request_ms;
+    }
+
+    bool DoBackup(const Controller* /*controller*/) const override {
+        return _rate_limiter.ShouldAllow();
+    }
+
+    void OnRPCEnd(const Controller* controller) override {
+        _rate_limiter.OnRPCEnd(controller);
+    }
+
+private:
+    int32_t _backup_request_ms;
+    BackupRateLimiter _rate_limiter;
+};
+
+BackupRequestPolicy* CreateRateLimitedBackupPolicy(
+    int32_t backup_request_ms,
+    double max_backup_ratio,
+    int window_size_seconds,
+    int update_interval_seconds) {
+    if (max_backup_ratio <= 0 || max_backup_ratio > 1.0) {
+        LOG(ERROR) << "Invalid max_backup_ratio=" << max_backup_ratio
+                   << ", must be in (0, 1]";
+        return NULL;
+    }
+    if (window_size_seconds < 1) {
+        LOG(ERROR) << "Invalid window_size_seconds=" << window_size_seconds
+                   << ", must be >= 1";
+        return NULL;
+    }
+    if (update_interval_seconds < 1) {
+        LOG(ERROR) << "Invalid update_interval_seconds="
+                   << update_interval_seconds << ", must be >= 1";
+        return NULL;
+    }
+    return new RateLimitedBackupPolicy(
+        backup_request_ms, max_backup_ratio,

Review Comment:
   `CreateRateLimitedBackupPolicy()` uses plain `new` which may throw/abort on 
OOM depending on build flags. The brpc codebase commonly uses `new 
(std::nothrow)` and returns NULL on allocation failure; consider following that 
pattern here to keep the “returns NULL on error” contract.



##########
src/brpc/channel.cpp:
##########
@@ -172,10 +177,20 @@ Channel::~Channel() {
         const ChannelSignature sig = ComputeChannelSignature(_options);
         SocketMapRemove(SocketMapKey(_server_address, sig));
     }
+    if (_owns_backup_policy) {
+        delete _options.backup_request_policy;
+    }
 }
 
 
 int Channel::InitChannelOptions(const ChannelOptions* options) {
+    // Clean up any previously created internal backup policy (re-Init case).
+    if (_owns_backup_policy) {
+        delete _options.backup_request_policy;
+        _options.backup_request_policy = NULL;
+        _owns_backup_policy = false;
+    }

Review Comment:
   Channel controllers keep a raw `_backup_request_policy` pointer for the 
duration of the RPC. Deleting an internally-created policy in 
`Channel::~Channel()` can cause use-after-free if the channel is destroyed 
while async RPCs are still in-flight (existing tests in this repo explicitly 
delete channels during async calls). The internal policy needs a lifetime at 
least as long as any controller using it (e.g., ref-count/shared ownership 
similar to `_lb`, or otherwise avoiding deletion until it’s guaranteed no 
controllers reference it).
   ```suggestion
       // NOTE: Do not delete _options.backup_request_policy here even when
       // _owns_backup_policy is true. Controllers may still hold a raw pointer
       // to this policy while async RPCs are in flight, and deleting it here
       // could cause use-after-free. The small leak is preferable to UAF.
   }
   
   
   int Channel::InitChannelOptions(const ChannelOptions* options) {
       // NOTE: We intentionally no longer delete any previously created 
internal
       // backup_request_policy here. Controllers may still be using the policy
       // via a raw pointer while async RPCs are in flight, and deleting it in 
this
       // re-init path could cause use-after-free.
   ```



##########
test/brpc_channel_unittest.cpp:
##########
@@ -2803,6 +2807,141 @@ TEST_F(ChannelTest, backup_request_policy) {
     }
 }
 
+TEST_F(ChannelTest, backup_request_rate_limited) {
+    ASSERT_EQ(0, StartAccept(_ep));
+
+    // Test 1: Policy is created when backup_request_max_ratio > 0
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = 0.3;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy != NULL);
+    }
+
+    // Test 2: No policy when ratio is disabled (-1)
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = -1;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy == NULL);
+    }

Review Comment:
   This test asserts that `backup_request_max_ratio = -1` results in no policy, 
but `-1` means “use global gflag”. If the test binary is run with 
`--backup_request_max_ratio>0`, this assertion will fail. Consider 
saving/restoring `FLAGS_backup_request_max_ratio` and forcing it to `-1` for 
this block (or adjust the test to avoid depending on the process-wide default) 
so the test is hermetic.



##########
src/brpc/channel.cpp:
##########
@@ -172,10 +177,20 @@ Channel::~Channel() {
         const ChannelSignature sig = ComputeChannelSignature(_options);
         SocketMapRemove(SocketMapKey(_server_address, sig));
     }
+    if (_owns_backup_policy) {
+        delete _options.backup_request_policy;
+    }
 }
 
 
 int Channel::InitChannelOptions(const ChannelOptions* options) {
+    // Clean up any previously created internal backup policy (re-Init case).
+    if (_owns_backup_policy) {
+        delete _options.backup_request_policy;
+        _options.backup_request_policy = NULL;
+        _owns_backup_policy = false;
+    }
+
     if (options) {  // Override default options if user provided one.
         _options = *options;
     }

Review Comment:
   In the re-`Init` path, the code deletes the previously-owned internal policy 
before copying `*options` into `_options`. If a caller reuses 
`channel.options()` (which may contain an internally-created policy pointer) 
and passes that back into `Init()`, this will copy a dangling 
`backup_request_policy` pointer and can lead to UAF. Consider copying options 
first (saving old owned pointer), then if the incoming `backup_request_policy` 
equals the old owned pointer treat it as NULL and recreate, or otherwise avoid 
exposing/copying internal policy pointers across Init cycles.
   ```suggestion
       // Save any previously created internal backup policy (re-Init case).
       auto* const old_backup_policy = _options.backup_request_policy;
       const bool old_owns_backup_policy = _owns_backup_policy;
   
       if (options) {  // Override default options if user provided one.
           _options = *options;
           // If caller reused channel.options() (which may contain an 
internally-created
           // backup_request_policy), and that pointer was owned by this 
Channel and is
           // about to be deleted, avoid copying a dangling pointer into 
_options.
           if (old_owns_backup_policy &&
               _options.backup_request_policy == old_backup_policy) {
               _options.backup_request_policy = NULL;
           }
       } else {
           // No new options provided; preserve existing _options except that 
any previous
           // internally-owned backup policy is being discarded.
           if (old_owns_backup_policy &&
               _options.backup_request_policy == old_backup_policy) {
               _options.backup_request_policy = NULL;
           }
       }
   
       // Now it is safe to delete the old internal backup policy, after we've 
checked
       // whether the incoming options reused the pointer.
       if (old_owns_backup_policy) {
           delete old_backup_policy;
           _owns_backup_policy = false;
       }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to