Copilot commented on code in PR #3350:
URL: https://github.com/apache/brpc/pull/3350#discussion_r3437763493


##########
src/butil/raw_pack.h:
##########
@@ -62,6 +69,13 @@ class RawPacker {
         return *this;
     }

Review Comment:
   RawPacker writes via uint16_t*/uint32_t* casts into a `char*` stream. This 
can violate strict-aliasing rules and may perform unaligned stores when 
`stream` isn't suitably aligned (e.g. packing into a byte buffer), which is 
undefined behavior on some architectures. Prefer memcpy-based stores using 
HostToNet{16,32,64} to be safe for any alignment.



##########
src/brpc/policy/rdma_handshake_fallback_protocol.cpp:
##########
@@ -0,0 +1,214 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/policy/rdma_handshake_fallback_protocol.h"
+
+#include <limits>
+#include <string>
+#include <string.h>
+#include "butil/object_pool.h"
+#include "butil/logging.h"
+#include "butil/raw_pack.h"
+#include "brpc/destroyable.h"
+#include "brpc/rdma/rdma_handshake.pb.h"
+#include "brpc/rdma/rdma_handshake_constants.h"
+
+namespace brpc {
+namespace policy {
+
+// Both handshake versions are handled:
+//   - v2 ("RDMA"): [ "RDMA" 4B ][ msg_len 2B ][ body ], total >= 40B.
+//   - v3 ("RDM3"): [ "RDM3" 4B ][ pb_size 4B (big-endian) ][ RdmaHello bytes 
].
+
+// v2 header: 4B magic + 2B msg_len.
+static constexpr size_t RDMA_V2_HEADER_LEN = rdma::HELLO_MAGIC_LEN + 2;
+// v3 header: 4B magic + 4B pb_size.
+static constexpr size_t RDMA_V3_HEADER_LEN =
+    rdma::HELLO_MAGIC_LEN + rdma::HELLO_V3_PB_SIZE_LEN;
+// An intentionally-invalid v2 hello_ver written into the reply: the client's
+// ValidHelloMessage() requires hello_ver==2, so it rejects this and falls 
back.
+static constexpr uint16_t RDMA_HELLO_V2_VERSION_INVALID =
+    std::numeric_limits<uint16_t>::max();
+
+// Length of the gid field (== sizeof(ibv_gid)). Spelled as a literal so this
+// header stays free of <infiniband/verbs.h>.
+constexpr size_t RDMA_GID_LEN = 16;
+
+namespace {
+struct RdmaFallbackContext : public Destroyable {
+    static RdmaFallbackContext* create() { return 
butil::get_object<RdmaFallbackContext>(); }
+    void Destroy() override { butil::return_object(this); }
+};
+
+ParseResult HandleRdmaV2Hello(butil::IOBuf* source, Socket* socket) {
+    char header_buf[RDMA_V2_HEADER_LEN];
+    if (source->copy_to(header_buf, sizeof(header_buf)) < sizeof(header_buf)) {
+        // Magic matched, but msg_len has not fully arrived yet.
+        return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA);
+    }
+
+    // msg_len (big-endian, right after the magic) is the total hello length.
+    uint16_t hello_size = 0;
+    butil::RawUnpacker(header_buf + 
rdma::HELLO_MAGIC_LEN).unpack16(hello_size);
+    // A too-small value is malformed; a too-large one would make us wait
+    // forever for bytes that never come (the real hello is 40B).
+    if (hello_size < rdma::HELLO_V2_MSG_LEN_MIN || hello_size > 
rdma::HELLO_V2_MSG_LEN_MAX) {
+        return MakeParseError(PARSE_ERROR_ABSOLUTELY_WRONG);
+    }
+    // During the handshake the client sends exactly one hello and then blocks
+    // waiting for the server hello: fewer bytes -> wait; more bytes -> the 
peer
+    // is not a well-behaved RDMA client, bail out.
+    if (source->size() < hello_size) {
+        return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA);
+    }
+    if (source->size() > hello_size) {
+        return MakeParseError(PARSE_ERROR_ABSOLUTELY_WRONG);
+    }
+    source->pop_front(hello_size);
+
+    // Reply an incompatible hello so the client downgrades to TCP on THIS
+    // connection: magic "RDMA" + msg_len(=40) + an invalid hello_ver. msg_len
+    // must be valid, otherwise the client errors out at the IO layer instead 
of
+    // negotiating. The rest of the 36B body stays zero (value-initialized);
+    // the client's ValidHelloMessage() rejects hello_ver!=2 -> 
negotiated=false.
+    char reply[rdma::HELLO_V2_MSG_LEN_MIN]{};
+    fast_memcpy(reply, rdma::HELLO_MAGIC, rdma::HELLO_MAGIC_LEN);
+    butil::RawPacker(reply + rdma::HELLO_MAGIC_LEN)
+        .pack16(sizeof(reply)).pack16(RDMA_HELLO_V2_VERSION_INVALID);
+
+    butil::IOBuf out;
+    out.append(reply, sizeof(reply));
+    if (socket->Write(&out) != 0) {
+        PLOG(WARNING) << "Fail to send RDMA v2 fallback hello to "
+                      << socket->description();
+        return MakeParseError(PARSE_ERROR_ABSOLUTELY_WRONG,
+                              "fail to send RDMA v2 fallback hello");
+    }
+
+    // Remember we are now draining the client's ACK across subsequent reads.
+    socket->reset_parsing_context(RdmaFallbackContext::create());
+    return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA);
+}
+
+ParseResult HandleRdmaV3Hello(butil::IOBuf* source, Socket* socket) {
+    char header_buf[RDMA_V3_HEADER_LEN];
+    if (source->copy_to(header_buf, sizeof(header_buf)) < sizeof(header_buf)) {
+        // Magic matched, but pb_size has not fully arrived yet.
+        return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA);
+    }
+    uint32_t pb_size = 0;
+    butil::RawUnpacker(header_buf + rdma::HELLO_MAGIC_LEN).unpack32(pb_size);
+    // Zero is malformed; an over-large value would make us wait forever. The
+    // upper bound also keeps the size_t addition below well within range.
+    if (pb_size == 0 || pb_size > rdma::HELLO_V3_MAX_PB_SIZE) {
+        return MakeParseError(PARSE_ERROR_ABSOLUTELY_WRONG);
+    }
+    // size_t (>= 64-bit on supported platforms), so this addition cannot
+    // overflow given the bound above.
+    const size_t hello_size = RDMA_V3_HEADER_LEN + pb_size;
+    // During the handshake the client sends exactly one hello and then blocks
+    // waiting for the server hello: fewer bytes -> wait; more bytes -> the 
peer
+    // is not a well-behaved RDMA client, bail out.
+    if (source->size() < hello_size) {
+        return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA);
+    }
+    if (source->size() > hello_size) {
+        return MakeParseError(PARSE_ERROR_ABSOLUTELY_WRONG);
+    }
+    source->pop_front(hello_size);
+
+    // Build a v3 hello the client will reject so it downgrades to TCP. All
+    // `required` fields are populated so the client's ParseFromArray() 
succeeds
+    // (a parse failure would surface as an IO error, not a clean fallback), 
but
+    // block_size==0 (< MIN_BLOCK_SIZE) and qp_num==0 make its ValidRdmaHello()
+    // return false -> negotiated=false regardless of g_skip_rdma_init.
+    rdma::RdmaHello reply_msg;
+    reply_msg.set_block_size(0);
+    reply_msg.set_sq_size(0);
+    reply_msg.set_rq_size(0);
+    reply_msg.set_lid(0);
+    reply_msg.set_gid(std::string(RDMA_GID_LEN, '\0'));
+    reply_msg.set_qp_num(0);
+
+    // [ "RDM3" 4B ][ pb_size 4B (big-endian) ][ RdmaHello protobuf bytes ]
+    butil::IOBuf packet;
+    packet.append(rdma::HELLO_MAGIC_V3, rdma::HELLO_MAGIC_LEN);
+    uint32_t pb_size_be = 
butil::HostToNet32(static_cast<uint32_t>(reply_msg.ByteSizeLong()));
+    packet.append(&pb_size_be, sizeof(pb_size_be));
+
+    butil::IOBufAsZeroCopyOutputStream output(&packet);
+    if (!reply_msg.SerializeToZeroCopyStream(&output)) {
+        return MakeParseError(PARSE_ERROR_ABSOLUTELY_WRONG,
+                              "Fail to serialize RDMA v3 fallback hello");
+    }
+
+    if (socket->Write(&packet) != 0) {
+        return MakeParseError(PARSE_ERROR_ABSOLUTELY_WRONG,
+                              "Fail to write RDMA v3 fallback hello");
+    }
+
+    // Remember we are now draining the client's ACK across subsequent reads.
+    socket->reset_parsing_context(RdmaFallbackContext::create());
+    return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA);
+}
+}  // namespace
+
+ParseResult ParseRdmaHandshake(butil::IOBuf* source, Socket* socket,
+                                       bool /*read_eof*/, const void* /*arg*/) 
{
+    // It is a small state machine kept across parse calls via Socket's
+    // parsing_context() (same mechanism HTTP uses):
+    //   - no context yet: if the leading bytes are an RDMA magic ("RDMA" for 
v2 or
+    //     "RDM3" for v3), consume the client hello, reply a version-matched 
but
+    //     un-negotiable hello (so the client downgrades to TCP on this 
connection),
+    //     store a context and return NOT_ENOUGH_DATA to wait for the client's 
ACK;
+    //   - context present: drain the 4-byte ACK, drop the context and return
+    //     TRY_OTHERS so InputMessenger parses the following real request from 
the
+    //     first protocol.
+    if (socket->parsing_context() != NULL) {
+        if (source->size() < rdma::HELLO_ACK_LEN) {
+            return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA);
+        }
+        source->pop_front(rdma::HELLO_ACK_LEN);
+        socket->reset_parsing_context(NULL);
+        return MakeParseError(PARSE_ERROR_TRY_OTHERS);
+    }
+
+    char magic_buf[rdma::HELLO_MAGIC_LEN];
+    const size_t mn = source->copy_to(magic_buf, sizeof(magic_buf));
+    if (mn < rdma::HELLO_MAGIC_LEN) {
+        return MakeParseError(PARSE_ERROR_NOT_ENOUGH_DATA);
+    }
+
+    if (*(const uint32_t*)magic_buf == *(const uint32_t*)rdma::HELLO_MAGIC) {
+        return HandleRdmaV2Hello(source, socket);
+    }
+    if (*(const uint32_t*)magic_buf == *(const uint32_t*)rdma::HELLO_MAGIC_V3) 
{
+        return HandleRdmaV3Hello(source, socket);
+    }

Review Comment:
   Comparing the 4-byte magic via `*(const uint32_t*)` performs type-punning 
and can read unaligned data, which is undefined behavior under strict-aliasing 
/ alignment rules. Use `memcmp` against the known 4-byte magic strings instead.



##########
src/brpc/server.cpp:
##########
@@ -637,6 +641,7 @@ Acceptor* Server::BuildAcceptor() {
         }
         if (has_whitelist &&
             !is_http_protocol(protocols[i].name) &&
+            !is_rdma_handshake_fallback_protocol(protocols[i].name) &&
             !whitelist.erase(protocols[i].name)) {
             // the protocol is not allowed to serve.

Review Comment:
   `ListProtocols()` iterates protocols in `ProtocolType` numeric order 
(protocol_map index), and `BuildAcceptor()` adds handlers in that order. If 
rdma_handshake must be attempted before http, implementing it by renumbering 
the protobuf enum will break wire compatibility. Prefer keeping enum values 
stable and explicitly ordering the acceptor/input handlers so `rdma_handshake` 
is tried before HTTP regardless of `ProtocolType` value.



##########
src/brpc/options.proto:
##########
@@ -43,28 +43,29 @@ enum ProtocolType {
     PROTOCOL_SOFA_PBRPC = 4;
     PROTOCOL_RTMP = 5;
     PROTOCOL_THRIFT = 6;
-    PROTOCOL_HTTP = 7;
-    PROTOCOL_PUBLIC_PBRPC = 8;
-    PROTOCOL_NOVA_PBRPC = 9;
-    PROTOCOL_REDIS = 10;
-    PROTOCOL_NSHEAD_CLIENT = 11;       // implemented in baidu-rpc-ub
-    PROTOCOL_NSHEAD = 12;
-    PROTOCOL_HADOOP_RPC = 13;
-    PROTOCOL_HADOOP_SERVER_RPC = 14;
-    PROTOCOL_MONGO = 15;               // server side only
-    PROTOCOL_UBRPC_COMPACK = 16;
-    PROTOCOL_DIDX_CLIENT = 17;         // Client side only
-    PROTOCOL_MEMCACHE = 18;            // Client side only
-    PROTOCOL_ITP = 19;
-    PROTOCOL_NSHEAD_MCPACK = 20;
-    PROTOCOL_DISP_IDL = 21;            // Client side only
-    PROTOCOL_ERSDA_CLIENT = 22;        // Client side only
-    PROTOCOL_UBRPC_MCPACK2 = 23;       // Client side only
+    PROTOCOL_RDMA_HANDSHAKE = 7;
+    PROTOCOL_HTTP = 8;
+    PROTOCOL_PUBLIC_PBRPC = 9;
+    PROTOCOL_NOVA_PBRPC = 10;
+    PROTOCOL_REDIS = 11;

Review Comment:
   This change renumbers existing `ProtocolType` enum values (e.g. HTTP from 7 
-> 8). `ProtocolType` is serialized in other protos (e.g. rpc_dump.proto / 
span.proto), so renumbering breaks backward compatibility for any stored 
traces/dumps and any cross-version components exchanging these messages. Avoid 
renumbering: keep existing numeric IDs stable and assign 
`PROTOCOL_RDMA_HANDSHAKE` a new unused number, then implement the desired 
protocol-selection priority without relying on enum ordering.



##########
src/butil/raw_pack.h:
##########
@@ -71,21 +85,33 @@ class RawPacker {
 class RawUnpacker {
 public:
     explicit RawUnpacker(const void* stream) : _stream((const char*)stream) {}
-    ~RawUnpacker() {}
 
-    RawUnpacker& unpack32(uint32_t & host_value) {
+    RawUnpacker& unpack16(uint16_t& host_value) {
+        host_value = NetToHost16(*(const uint16_t*)_stream);
+        _stream += 2;
+        return *this;
+    }
+
+    RawUnpacker& unpack32(uint32_t& host_value) {
         host_value = NetToHost32(*(const uint32_t*)_stream);
         _stream += 4;
         return *this;
     }
 
-    RawUnpacker& unpack64(uint64_t & host_value) {
+    RawUnpacker& unpack64(uint64_t& host_value) {
         const uint32_t *p = (const uint32_t*)_stream;
         host_value = (((uint64_t)NetToHost32(p[0])) << 32) | NetToHost32(p[1]);
         _stream += 8;
         return *this;
     }

Review Comment:
   RawUnpacker reads via uint16_t*/uint32_t* casts from a `char*` stream. This 
can violate strict-aliasing rules and may fault on platforms that don't allow 
unaligned loads. Using memcpy into a properly aligned temporary before 
NetToHost{16,32,64} avoids UB.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to