Hi all,

On 5/28/26 11:55 AM, Lorenzo Bianconi wrote:
>> On Wed, May 27, 2026 at 7:06 PM Lorenzo Bianconi <
>> [email protected]> wrote:
>>
>>>> The packet buffering, which is part of the ARP processing, required
>>>> a pinctrl lock. As a consequence we could block packet processing
>>>> when the lock is taken by main thread and possibly drop some traffic
>>>> that is heading towards pinctrl. Make sure the packet buffering is
>>>> now handled without any lock, that is done by using cmap instead of
>>>> hmap to store the objects and all concurrent members are accessed
>>>> via atomics instead. This should increase the throughput of the
>>>> packet-in processing for ARP handling.
>>>
>>> Hi Ales,
>>>
>>> I think the patch is fine. Moreover, reading the comments from Marc, I
>>> consider
>>> this one the first step toward reducing the contention in pinctrl_thread
>>> codepath.
>>>
>>> Acked-by: Lorenzo Bianconi <[email protected]>
>>>
>>> Regards,
>>> Lorenzo
>>
>>
>>>>
>>>> Reported-at: https://redhat.atlassian.net/browse/FDP-3301
>>>> Signed-off-by: Ales Musil <[email protected]>
>>>> ---
>>>>  controller/mac-cache.c | 127 +++++++++++++++++++++++------------------
>>>>  controller/mac-cache.h |  48 +++++++---------
>>>>  controller/pinctrl.c   |  81 +++++++++++---------------
>>>>  3 files changed, 125 insertions(+), 131 deletions(-)
>>>>
>>>
>>> [...]
>>>
>>>> @@ -115,25 +115,25 @@ struct bp_packet_data {
>>>>  };
>>>>
>>>>  struct buffered_packets {
>>>> -    struct hmap_node hmap_node;
>>>> +    struct cmap_node cmap_node;
>>>>
>>>> -    struct mac_binding_data mb_data;
>>>> +    struct mac_binding_data mb_data;  /* Immutable after insert. */
>>>
>>> nit: in order to make it really immutable we can do something like:
>>>
>>>         const struct mac_binding_data * const mb_data;
>>>
>>
>> That would require extra allocation because it would become
>> a pointer instead of embedded struct. Which is probably not worth
>> Also, making it const struct is a bit clunky during initialization.
> 
> ack, I agree it is uglier, but it would be safer. Anyway, just a nit, up to
> you.
> 
>>
>>
>>>>
>>>> -    /* Queue of packet_data associated with this struct. */
>>>> +    /* Queue of packet_data associated with this struct.
>>>> +     * Handler thread only. */
>>>>      struct vector queue;
>>>
>>> nit: can we make vector (and the fields below) really accessible just by
>>> pinctrl
>>> thread?
>>>
>>
>> I was thinking about that, but it didn't seem worth splitting into
>> two structs just because of this.
> 
> even here, just a nit, up to you.
> 
> Regards,
> Lorenzo
> 
>>
>>
>>>>
>>>> -    /* Timestamp in ms when the buffered packet should expire. */
>>>> +    /* Timestamp in ms when the buffered packet should expire.
>>>> +     * Handler thread only. */
>>>>      long long int expire_at_ms;
>>>>
>>>> -    /* Timestamp in ms when the buffered packet should do full SB
>>> lookup.*/
>>>> -    long long int lookup_at_ms;
>>>> -};
>>>> +    /* Resolved MAC address packed as uint64. 0 means unresolved.
>>>> +     * Written by main thread, read by handler thread. */
>>>> +    atomic_uint64_t resolved_mac;
>>>>
>>>> -struct buffered_packets_ctx {
>>>> -    /* Map of all buffered packets waiting for the MAC address. */
>>>> -    struct hmap buffered_packets;
>>>> -    /* List of packet data that are ready to be sent. */
>>>> -    struct vector ready_packets_data;
>>>> +    /* Timestamp in ms when the buffered packet should do full SB
>>> lookup.
>>>> +     * Main thread only. */
>>>> +    long long int lookup_at_ms;
>>>>  };
>>>>
>>>>  /* Thresholds. */
>>>> @@ -211,27 +211,23 @@ void fdb_stats_run(struct vector *stats_vec,
>>> uint64_t *req_delay, void *data);
>>>>  void bp_packet_data_destroy(struct bp_packet_data *pd);
>>>>
>>>>  struct buffered_packets *
>>>> -buffered_packets_add(struct buffered_packets_ctx *ctx,
>>>> +buffered_packets_add(struct cmap *bp_map,
>>>>                       struct mac_binding_data mb_data);
>>>>
>>>>  void buffered_packets_packet_data_enqueue(struct buffered_packets *bp,
>>>>                                            const struct
>>> ofputil_packet_in *pin,
>>>>                                            const struct ofpbuf
>>> *continuation);
>>>>
>>>> -void buffered_packets_ctx_run(struct buffered_packets_ctx *ctx,
>>>> -                              const struct hmap *recent_mbs,
>>>> -                              struct ovsdb_idl_index *sbrec_pb_by_key,
>>>> -                              struct ovsdb_idl_index *sbrec_dp_by_key,
>>>> -                              struct ovsdb_idl_index *sbrec_pb_by_name,
>>>> -                              struct ovsdb_idl_index
>>> *sbrec_mb_by_lport_ip);
>>>> -
>>>> -void buffered_packets_ctx_init(struct buffered_packets_ctx *ctx);
>>>> -
>>>> -void buffered_packets_ctx_destroy(struct buffered_packets_ctx *ctx);
>>>> +bool buffered_packets_lookup_run(struct cmap *bp_map,
>>>> +                                 const struct hmap *recent_mbs,
>>>> +                                 struct ovsdb_idl_index
>>> *sbrec_pb_by_key,
>>>> +                                 struct ovsdb_idl_index
>>> *sbrec_dp_by_key,
>>>> +                                 struct ovsdb_idl_index
>>> *sbrec_pb_by_name,
>>>> +                                 struct ovsdb_idl_index
>>> *sbrec_mb_by_lport_ip);
>>>>
>>>> -bool buffered_packets_ctx_is_ready_to_send(struct buffered_packets_ctx
>>> *ctx);
>>>> +void buffered_packets_run(struct cmap *bp_map, struct vector *rpd);
>>>>
>>>> -bool buffered_packets_ctx_has_packets(struct buffered_packets_ctx *ctx);
>>>> +void buffered_packets_map_destroy(struct cmap *bp_map);
>>>>
>>>>  void mac_binding_probe_stats_process_flow_stats(
>>>>          struct vector *stats_vec,
>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>>> index 4473ec668..bb8d20e7f 100644
>>>> --- a/controller/pinctrl.c
>>>> +++ b/controller/pinctrl.c
>>>> @@ -185,16 +185,15 @@ struct pinctrl {
>>>>  static struct pinctrl pinctrl;
>>>>
>>>>  static bool pinctrl_is_sb_commited(int64_t commit_cfg, int64_t cur_cfg);
>>>> -static void init_buffered_packets_ctx(void);
>>>> -static void destroy_buffered_packets_ctx(void);
>>>> +static void init_buffered_packets_map(void);
>>>> +static void destroy_buffered_packets_map(void);
>>>>  static void
>>>>  run_buffered_binding(const struct sbrec_mac_binding_table
>>> *mac_binding_table,
>>>>                       const struct hmap *local_datapaths,
>>>>                       struct ovsdb_idl_index *sbrec_port_binding_by_key,
>>>>                       struct ovsdb_idl_index
>>> *sbrec_datapath_binding_by_key,
>>>>                       struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>>> -                     struct ovsdb_idl_index
>>> *sbrec_mac_binding_by_lport_ip)
>>>> -    OVS_REQUIRES(pinctrl_mutex);
>>>> +                     struct ovsdb_idl_index
>>> *sbrec_mac_binding_by_lport_ip);
>>>>
>>>>  static void pinctrl_handle_put_mac_binding(const struct flow *md,
>>>>                                             const struct flow *headers,
>>>> @@ -209,8 +208,7 @@ static void run_put_mac_bindings(
>>>>      struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip)
>>>>      OVS_REQUIRES(pinctrl_mutex);
>>>>  static void wait_put_mac_bindings(void);
>>>> -static void send_mac_binding_buffered_pkts(struct rconn *swconn)
>>>> -    OVS_REQUIRES(pinctrl_mutex);
>>>> +static void send_mac_binding_buffered_pkts(struct rconn *swconn);
>>>>
>>>>  static void pinctrl_activation_strategy_handler(const struct match *md);
>>>>
>>>> @@ -562,7 +560,7 @@ pinctrl_init(void)
>>>>      init_send_arps_nds();
>>>>      init_ipv6_ras();
>>>>      init_ipv6_prefixd();
>>>> -    init_buffered_packets_ctx();
>>>> +    init_buffered_packets_map();
>>>>      init_activated_ports();
>>>>      init_event_table();
>>>>      ip_mcast_snoop_init();
>>>> @@ -1577,18 +1575,18 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn
>>> *ovnsb_idl_txn,
>>>>      }
>>>>  }
>>>>
>>>> -static struct buffered_packets_ctx buffered_packets_ctx;
>>>> +static struct cmap buffered_packets_map;
>>>>
>>>>  static void
>>>> -init_buffered_packets_ctx(void)
>>>> +init_buffered_packets_map(void)
>>>>  {
>>>> -    buffered_packets_ctx_init(&buffered_packets_ctx);
>>>> +    cmap_init(&buffered_packets_map);
>>>>  }
>>>>
>>>>  static void
>>>> -destroy_buffered_packets_ctx(void)
>>>> +destroy_buffered_packets_map(void)
>>>>  {
>>>> -    buffered_packets_ctx_destroy(&buffered_packets_ctx);
>>>> +    buffered_packets_map_destroy(&buffered_packets_map);
>>>>  }
>>>>
>>>>  /* Called with in the pinctrl_handler thread context. */
>>>> @@ -1596,7 +1594,6 @@ static void
>>>>  pinctrl_handle_buffered_packets(const struct ofputil_packet_in *pin,
>>>>                                  const struct ofpbuf *continuation,
>>>>                                  bool is_arp)
>>>> -OVS_REQUIRES(pinctrl_mutex)
>>>>  {
>>>>      const struct match *md = &pin->flow_metadata;
>>>>      struct mac_binding_data mb_data;
>>>> @@ -1613,7 +1610,7 @@ OVS_REQUIRES(pinctrl_mutex)
>>>>                            md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0],
>>>>                            ip, eth_addr_zero);
>>>>
>>>> -    struct buffered_packets *bp =
>>> buffered_packets_add(&buffered_packets_ctx,
>>>> +    struct buffered_packets *bp =
>>> buffered_packets_add(&buffered_packets_map,
>>>>                                                         mb_data);
>>>>      if (!bp) {
>>>>          COVERAGE_INC(pinctrl_drop_buffered_packets_map);
>>>> @@ -1643,9 +1640,7 @@ pinctrl_handle_arp(struct rconn *swconn, const
>>> struct flow *ip_flow,
>>>>          return;
>>>>      }
>>>>
>>>> -    ovs_mutex_lock(&pinctrl_mutex);
>>>>      pinctrl_handle_buffered_packets(pin, continuation, true);
>>>> -    ovs_mutex_unlock(&pinctrl_mutex);
>>>>
>>>>      /* Compose an ARP packet. */
>>>>      uint64_t packet_stub[128 / 8];
>>>> @@ -4081,12 +4076,12 @@ pinctrl_handler(void *arg_)
>>>>                      send_arp_nd_run(swconn, &send_arp_nd_time);
>>>>                      send_ipv6_ras(swconn, &send_ipv6_ra_time);
>>>>                      send_ipv6_prefixd(swconn, &send_prefixd_time);
>>>> -                    send_mac_binding_buffered_pkts(swconn);
>>>>                      bfd_monitor_send_msg(swconn, &bfd_time);
>>>>                      ovs_mutex_unlock(&pinctrl_mutex);
>>>>                  } else {
>>>>                      lock_failed = true;
>>>>                  }
>>>> +                send_mac_binding_buffered_pkts(swconn);
>>>>                  send_garp_rarp_run(swconn, &send_garp_rarp_time);
>>>>                  ip_mcast_querier_run(swconn, &send_mcast_query_time);
>>>>              }
>>>> @@ -4226,11 +4221,6 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>                    sbrec_port_binding_by_key,
>>>>                    sbrec_igmp_groups,
>>>>                    sbrec_ip_multicast_opts);
>>>> -    run_buffered_binding(mac_binding_table, local_datapaths,
>>>> -                         sbrec_port_binding_by_key,
>>>> -                         sbrec_datapath_binding_by_key,
>>>> -                         sbrec_port_binding_by_name,
>>>> -                         sbrec_mac_binding_by_lport_ip);
>>>>      sync_svc_monitors(ovnsb_idl_txn, svc_mon_table,
>>> sbrec_port_binding_by_name,
>>>>                        chassis);
>>>>      bfd_monitor_run(ovnsb_idl_txn, bfd_table,
>>> sbrec_port_binding_by_name,
>>>> @@ -4241,6 +4231,12 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>>>>      run_activated_ports(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
>>>>                          sbrec_port_binding_by_key, chassis);
>>>>      ovs_mutex_unlock(&pinctrl_mutex);
>>>> +
>>>> +    run_buffered_binding(mac_binding_table, local_datapaths,
>>>> +                         sbrec_port_binding_by_key,
>>>> +                         sbrec_datapath_binding_by_key,
>>>> +                         sbrec_port_binding_by_name,
>>>> +                         sbrec_mac_binding_by_lport_ip);
>>>>  }
>>>>
>>>>  /* Table of ipv6_ra_state structures, keyed on logical port name.
>>>> @@ -4758,7 +4754,7 @@ pinctrl_destroy(void)
>>>>      destroy_send_arps_nds();
>>>>      destroy_ipv6_ras();
>>>>      destroy_ipv6_prefixd();
>>>> -    destroy_buffered_packets_ctx();
>>>> +    destroy_buffered_packets_map();
>>>>      destroy_activated_ports();
>>>>      event_table_destroy();
>>>>      destroy_put_mac_bindings();
>>>> @@ -4843,30 +4839,24 @@ pinctrl_handle_put_mac_binding(const struct flow
>>> *md,
>>>>      notify_pinctrl_main();
>>>>  }
>>>>
>>>> -#define READY_PACKETS_VEC_CAPACITY_THRESHOLD 1024
>>>> -
>>>>  /* Called with in the pinctrl_handler thread context. */
>>>>  static void
>>>>  send_mac_binding_buffered_pkts(struct rconn *swconn)
>>>> -    OVS_REQUIRES(pinctrl_mutex)
>>>>  {
>>>>      enum ofp_version version = rconn_get_version(swconn);
>>>>      enum ofputil_protocol proto =
>>> ofputil_protocol_from_ofp_version(version);
>>>> -    struct vector *rpd = &buffered_packets_ctx.ready_packets_data;
>>>> +    struct vector rpd = VECTOR_EMPTY_INITIALIZER(struct bp_packet_data);
>>>> +
>>>> +    buffered_packets_run(&buffered_packets_map, &rpd);
>>>>
>>>>      struct bp_packet_data *pd;
>>>> -    VECTOR_FOR_EACH_PTR (rpd, pd) {
>>>> +    VECTOR_FOR_EACH_PTR (&rpd, pd) {
>>>>          queue_msg(swconn, ofputil_encode_resume(&pd->pin,
>>> pd->continuation,
>>>>                                                  proto));
>>>>          bp_packet_data_destroy(pd);
>>>>      }
>>>>
>>>> -    vector_clear(rpd);
>>>> -    if (vector_capacity(rpd) >= READY_PACKETS_VEC_CAPACITY_THRESHOLD) {
>>>> -        VLOG_DBG("The ready_packets_data vector capacity (%"PRIuSIZE") "
>>>> -                 "is over threshold.", vector_capacity(rpd));
>>>> -        vector_shrink_to_fit(rpd);
>>>> -    }
>>>> +    vector_destroy(&rpd);
>>>>  }
>>>>
>>>>  static void
>>>> @@ -4935,9 +4925,8 @@ run_buffered_binding(const struct
>>> sbrec_mac_binding_table *mac_binding_table,
>>>>                       struct ovsdb_idl_index
>>> *sbrec_datapath_binding_by_key,
>>>>                       struct ovsdb_idl_index *sbrec_port_binding_by_name,
>>>>                       struct ovsdb_idl_index
>>> *sbrec_mac_binding_by_lport_ip)
>>>> -    OVS_REQUIRES(pinctrl_mutex)
>>>>  {
>>>> -    if (!buffered_packets_ctx_has_packets(&buffered_packets_ctx)) {
>>>> +    if (cmap_is_empty(&buffered_packets_map)) {
>>>>          return;
>>>>      }
>>>>
>>>> @@ -4983,18 +4972,16 @@ run_buffered_binding(const struct
>>> sbrec_mac_binding_table *mac_binding_table,
>>>>          mac_binding_add(&recent_mbs, mb_data, smb, 0);
>>>>      }
>>>>
>>>> -    buffered_packets_ctx_run(&buffered_packets_ctx, &recent_mbs,
>>>> -                                 sbrec_port_binding_by_key,
>>>> -                                 sbrec_datapath_binding_by_key,
>>>> -                                 sbrec_port_binding_by_name,
>>>> -                                 sbrec_mac_binding_by_lport_ip);
>>>> +    if (buffered_packets_lookup_run(&buffered_packets_map, &recent_mbs,
>>>> +                                    sbrec_port_binding_by_key,
>>>> +                                    sbrec_datapath_binding_by_key,
>>>> +                                    sbrec_port_binding_by_name,
>>>> +                                    sbrec_mac_binding_by_lport_ip)) {
>>>> +        notify_pinctrl_handler();
>>>> +    }
>>>>
>>>>      mac_bindings_clear(&recent_mbs);
>>>>      hmap_destroy(&recent_mbs);
>>>> -
>>>> -    if (buffered_packets_ctx_is_ready_to_send(&buffered_packets_ctx)) {
>>>> -        notify_pinctrl_handler();
>>>> -    }
>>>>  }
>>>>
>>>>  static void
>>>> @@ -6438,7 +6425,7 @@ may_inject_pkts(void)
>>>>              !cmap_is_empty(&garp_rarp_get_data()->data) ||
>>>>              ipv6_prefixd_should_inject() ||
>>>>              !ovs_list_is_empty(&mcast_query_list) ||
>>>> -
>>> buffered_packets_ctx_is_ready_to_send(&buffered_packets_ctx) ||
>>>> +            !cmap_is_empty(&buffered_packets_map) ||
>>>>              bfd_monitor_should_inject());
>>>>  }
>>>>
>>>> @@ -6528,9 +6515,7 @@ pinctrl_handle_nd_ns(struct rconn *swconn, const
>>> struct flow *ip_flow,
>>>>          return;
>>>>      }
>>>>
>>>> -    ovs_mutex_lock(&pinctrl_mutex);
>>>>      pinctrl_handle_buffered_packets(pin, continuation, false);
>>>> -    ovs_mutex_unlock(&pinctrl_mutex);
>>>>
>>>>      uint64_t packet_stub[128 / 8];
>>>>      struct dp_packet packet;
>>>> --
>>>> 2.54.0
>>>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> [email protected]
>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>>>
>>>
>>
>> Thank you Mark and Lorenzo,
>> applied to main. I was also considering backporting this to 26.03, are
>> there any concerns or objections?
>>

Given that 26.03 is our new LTS, I'd also apply it there.

Regards,
Dumitru

>> Regards,
>> Ales

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to