Hi all, On 5/28/26 11:55 AM, Lorenzo Bianconi wrote: >> On Wed, May 27, 2026 at 7:06 PM Lorenzo Bianconi < >> [email protected]> wrote: >> >>>> The packet buffering, which is part of the ARP processing, required >>>> a pinctrl lock. As a consequence we could block packet processing >>>> when the lock is taken by main thread and possibly drop some traffic >>>> that is heading towards pinctrl. Make sure the packet buffering is >>>> now handled without any lock, that is done by using cmap instead of >>>> hmap to store the objects and all concurrent members are accessed >>>> via atomics instead. This should increase the throughput of the >>>> packet-in processing for ARP handling. >>> >>> Hi Ales, >>> >>> I think the patch is fine. Moreover, reading the comments from Marc, I >>> consider >>> this one the first step toward reducing the contention in pinctrl_thread >>> codepath. >>> >>> Acked-by: Lorenzo Bianconi <[email protected]> >>> >>> Regards, >>> Lorenzo >> >> >>>> >>>> Reported-at: https://redhat.atlassian.net/browse/FDP-3301 >>>> Signed-off-by: Ales Musil <[email protected]> >>>> --- >>>> controller/mac-cache.c | 127 +++++++++++++++++++++++------------------ >>>> controller/mac-cache.h | 48 +++++++--------- >>>> controller/pinctrl.c | 81 +++++++++++--------------- >>>> 3 files changed, 125 insertions(+), 131 deletions(-) >>>> >>> >>> [...] >>> >>>> @@ -115,25 +115,25 @@ struct bp_packet_data { >>>> }; >>>> >>>> struct buffered_packets { >>>> - struct hmap_node hmap_node; >>>> + struct cmap_node cmap_node; >>>> >>>> - struct mac_binding_data mb_data; >>>> + struct mac_binding_data mb_data; /* Immutable after insert. */ >>> >>> nit: in order to make it really immutable we can do something like: >>> >>> const struct mac_binding_data * const mb_data; >>> >> >> That would require extra allocation because it would become >> a pointer instead of embedded struct. Which is probably not worth >> Also, making it const struct is a bit clunky during initialization. > > ack, I agree it is uglier, but it would be safer. Anyway, just a nit, up to > you. > >> >> >>>> >>>> - /* Queue of packet_data associated with this struct. */ >>>> + /* Queue of packet_data associated with this struct. >>>> + * Handler thread only. */ >>>> struct vector queue; >>> >>> nit: can we make vector (and the fields below) really accessible just by >>> pinctrl >>> thread? >>> >> >> I was thinking about that, but it didn't seem worth splitting into >> two structs just because of this. > > even here, just a nit, up to you. > > Regards, > Lorenzo > >> >> >>>> >>>> - /* Timestamp in ms when the buffered packet should expire. */ >>>> + /* Timestamp in ms when the buffered packet should expire. >>>> + * Handler thread only. */ >>>> long long int expire_at_ms; >>>> >>>> - /* Timestamp in ms when the buffered packet should do full SB >>> lookup.*/ >>>> - long long int lookup_at_ms; >>>> -}; >>>> + /* Resolved MAC address packed as uint64. 0 means unresolved. >>>> + * Written by main thread, read by handler thread. */ >>>> + atomic_uint64_t resolved_mac; >>>> >>>> -struct buffered_packets_ctx { >>>> - /* Map of all buffered packets waiting for the MAC address. */ >>>> - struct hmap buffered_packets; >>>> - /* List of packet data that are ready to be sent. */ >>>> - struct vector ready_packets_data; >>>> + /* Timestamp in ms when the buffered packet should do full SB >>> lookup. >>>> + * Main thread only. */ >>>> + long long int lookup_at_ms; >>>> }; >>>> >>>> /* Thresholds. */ >>>> @@ -211,27 +211,23 @@ void fdb_stats_run(struct vector *stats_vec, >>> uint64_t *req_delay, void *data); >>>> void bp_packet_data_destroy(struct bp_packet_data *pd); >>>> >>>> struct buffered_packets * >>>> -buffered_packets_add(struct buffered_packets_ctx *ctx, >>>> +buffered_packets_add(struct cmap *bp_map, >>>> struct mac_binding_data mb_data); >>>> >>>> void buffered_packets_packet_data_enqueue(struct buffered_packets *bp, >>>> const struct >>> ofputil_packet_in *pin, >>>> const struct ofpbuf >>> *continuation); >>>> >>>> -void buffered_packets_ctx_run(struct buffered_packets_ctx *ctx, >>>> - const struct hmap *recent_mbs, >>>> - struct ovsdb_idl_index *sbrec_pb_by_key, >>>> - struct ovsdb_idl_index *sbrec_dp_by_key, >>>> - struct ovsdb_idl_index *sbrec_pb_by_name, >>>> - struct ovsdb_idl_index >>> *sbrec_mb_by_lport_ip); >>>> - >>>> -void buffered_packets_ctx_init(struct buffered_packets_ctx *ctx); >>>> - >>>> -void buffered_packets_ctx_destroy(struct buffered_packets_ctx *ctx); >>>> +bool buffered_packets_lookup_run(struct cmap *bp_map, >>>> + const struct hmap *recent_mbs, >>>> + struct ovsdb_idl_index >>> *sbrec_pb_by_key, >>>> + struct ovsdb_idl_index >>> *sbrec_dp_by_key, >>>> + struct ovsdb_idl_index >>> *sbrec_pb_by_name, >>>> + struct ovsdb_idl_index >>> *sbrec_mb_by_lport_ip); >>>> >>>> -bool buffered_packets_ctx_is_ready_to_send(struct buffered_packets_ctx >>> *ctx); >>>> +void buffered_packets_run(struct cmap *bp_map, struct vector *rpd); >>>> >>>> -bool buffered_packets_ctx_has_packets(struct buffered_packets_ctx *ctx); >>>> +void buffered_packets_map_destroy(struct cmap *bp_map); >>>> >>>> void mac_binding_probe_stats_process_flow_stats( >>>> struct vector *stats_vec, >>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c >>>> index 4473ec668..bb8d20e7f 100644 >>>> --- a/controller/pinctrl.c >>>> +++ b/controller/pinctrl.c >>>> @@ -185,16 +185,15 @@ struct pinctrl { >>>> static struct pinctrl pinctrl; >>>> >>>> static bool pinctrl_is_sb_commited(int64_t commit_cfg, int64_t cur_cfg); >>>> -static void init_buffered_packets_ctx(void); >>>> -static void destroy_buffered_packets_ctx(void); >>>> +static void init_buffered_packets_map(void); >>>> +static void destroy_buffered_packets_map(void); >>>> static void >>>> run_buffered_binding(const struct sbrec_mac_binding_table >>> *mac_binding_table, >>>> const struct hmap *local_datapaths, >>>> struct ovsdb_idl_index *sbrec_port_binding_by_key, >>>> struct ovsdb_idl_index >>> *sbrec_datapath_binding_by_key, >>>> struct ovsdb_idl_index *sbrec_port_binding_by_name, >>>> - struct ovsdb_idl_index >>> *sbrec_mac_binding_by_lport_ip) >>>> - OVS_REQUIRES(pinctrl_mutex); >>>> + struct ovsdb_idl_index >>> *sbrec_mac_binding_by_lport_ip); >>>> >>>> static void pinctrl_handle_put_mac_binding(const struct flow *md, >>>> const struct flow *headers, >>>> @@ -209,8 +208,7 @@ static void run_put_mac_bindings( >>>> struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip) >>>> OVS_REQUIRES(pinctrl_mutex); >>>> static void wait_put_mac_bindings(void); >>>> -static void send_mac_binding_buffered_pkts(struct rconn *swconn) >>>> - OVS_REQUIRES(pinctrl_mutex); >>>> +static void send_mac_binding_buffered_pkts(struct rconn *swconn); >>>> >>>> static void pinctrl_activation_strategy_handler(const struct match *md); >>>> >>>> @@ -562,7 +560,7 @@ pinctrl_init(void) >>>> init_send_arps_nds(); >>>> init_ipv6_ras(); >>>> init_ipv6_prefixd(); >>>> - init_buffered_packets_ctx(); >>>> + init_buffered_packets_map(); >>>> init_activated_ports(); >>>> init_event_table(); >>>> ip_mcast_snoop_init(); >>>> @@ -1577,18 +1575,18 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn >>> *ovnsb_idl_txn, >>>> } >>>> } >>>> >>>> -static struct buffered_packets_ctx buffered_packets_ctx; >>>> +static struct cmap buffered_packets_map; >>>> >>>> static void >>>> -init_buffered_packets_ctx(void) >>>> +init_buffered_packets_map(void) >>>> { >>>> - buffered_packets_ctx_init(&buffered_packets_ctx); >>>> + cmap_init(&buffered_packets_map); >>>> } >>>> >>>> static void >>>> -destroy_buffered_packets_ctx(void) >>>> +destroy_buffered_packets_map(void) >>>> { >>>> - buffered_packets_ctx_destroy(&buffered_packets_ctx); >>>> + buffered_packets_map_destroy(&buffered_packets_map); >>>> } >>>> >>>> /* Called with in the pinctrl_handler thread context. */ >>>> @@ -1596,7 +1594,6 @@ static void >>>> pinctrl_handle_buffered_packets(const struct ofputil_packet_in *pin, >>>> const struct ofpbuf *continuation, >>>> bool is_arp) >>>> -OVS_REQUIRES(pinctrl_mutex) >>>> { >>>> const struct match *md = &pin->flow_metadata; >>>> struct mac_binding_data mb_data; >>>> @@ -1613,7 +1610,7 @@ OVS_REQUIRES(pinctrl_mutex) >>>> md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0], >>>> ip, eth_addr_zero); >>>> >>>> - struct buffered_packets *bp = >>> buffered_packets_add(&buffered_packets_ctx, >>>> + struct buffered_packets *bp = >>> buffered_packets_add(&buffered_packets_map, >>>> mb_data); >>>> if (!bp) { >>>> COVERAGE_INC(pinctrl_drop_buffered_packets_map); >>>> @@ -1643,9 +1640,7 @@ pinctrl_handle_arp(struct rconn *swconn, const >>> struct flow *ip_flow, >>>> return; >>>> } >>>> >>>> - ovs_mutex_lock(&pinctrl_mutex); >>>> pinctrl_handle_buffered_packets(pin, continuation, true); >>>> - ovs_mutex_unlock(&pinctrl_mutex); >>>> >>>> /* Compose an ARP packet. */ >>>> uint64_t packet_stub[128 / 8]; >>>> @@ -4081,12 +4076,12 @@ pinctrl_handler(void *arg_) >>>> send_arp_nd_run(swconn, &send_arp_nd_time); >>>> send_ipv6_ras(swconn, &send_ipv6_ra_time); >>>> send_ipv6_prefixd(swconn, &send_prefixd_time); >>>> - send_mac_binding_buffered_pkts(swconn); >>>> bfd_monitor_send_msg(swconn, &bfd_time); >>>> ovs_mutex_unlock(&pinctrl_mutex); >>>> } else { >>>> lock_failed = true; >>>> } >>>> + send_mac_binding_buffered_pkts(swconn); >>>> send_garp_rarp_run(swconn, &send_garp_rarp_time); >>>> ip_mcast_querier_run(swconn, &send_mcast_query_time); >>>> } >>>> @@ -4226,11 +4221,6 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, >>>> sbrec_port_binding_by_key, >>>> sbrec_igmp_groups, >>>> sbrec_ip_multicast_opts); >>>> - run_buffered_binding(mac_binding_table, local_datapaths, >>>> - sbrec_port_binding_by_key, >>>> - sbrec_datapath_binding_by_key, >>>> - sbrec_port_binding_by_name, >>>> - sbrec_mac_binding_by_lport_ip); >>>> sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, >>> sbrec_port_binding_by_name, >>>> chassis); >>>> bfd_monitor_run(ovnsb_idl_txn, bfd_table, >>> sbrec_port_binding_by_name, >>>> @@ -4241,6 +4231,12 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, >>>> run_activated_ports(ovnsb_idl_txn, sbrec_datapath_binding_by_key, >>>> sbrec_port_binding_by_key, chassis); >>>> ovs_mutex_unlock(&pinctrl_mutex); >>>> + >>>> + run_buffered_binding(mac_binding_table, local_datapaths, >>>> + sbrec_port_binding_by_key, >>>> + sbrec_datapath_binding_by_key, >>>> + sbrec_port_binding_by_name, >>>> + sbrec_mac_binding_by_lport_ip); >>>> } >>>> >>>> /* Table of ipv6_ra_state structures, keyed on logical port name. >>>> @@ -4758,7 +4754,7 @@ pinctrl_destroy(void) >>>> destroy_send_arps_nds(); >>>> destroy_ipv6_ras(); >>>> destroy_ipv6_prefixd(); >>>> - destroy_buffered_packets_ctx(); >>>> + destroy_buffered_packets_map(); >>>> destroy_activated_ports(); >>>> event_table_destroy(); >>>> destroy_put_mac_bindings(); >>>> @@ -4843,30 +4839,24 @@ pinctrl_handle_put_mac_binding(const struct flow >>> *md, >>>> notify_pinctrl_main(); >>>> } >>>> >>>> -#define READY_PACKETS_VEC_CAPACITY_THRESHOLD 1024 >>>> - >>>> /* Called with in the pinctrl_handler thread context. */ >>>> static void >>>> send_mac_binding_buffered_pkts(struct rconn *swconn) >>>> - OVS_REQUIRES(pinctrl_mutex) >>>> { >>>> enum ofp_version version = rconn_get_version(swconn); >>>> enum ofputil_protocol proto = >>> ofputil_protocol_from_ofp_version(version); >>>> - struct vector *rpd = &buffered_packets_ctx.ready_packets_data; >>>> + struct vector rpd = VECTOR_EMPTY_INITIALIZER(struct bp_packet_data); >>>> + >>>> + buffered_packets_run(&buffered_packets_map, &rpd); >>>> >>>> struct bp_packet_data *pd; >>>> - VECTOR_FOR_EACH_PTR (rpd, pd) { >>>> + VECTOR_FOR_EACH_PTR (&rpd, pd) { >>>> queue_msg(swconn, ofputil_encode_resume(&pd->pin, >>> pd->continuation, >>>> proto)); >>>> bp_packet_data_destroy(pd); >>>> } >>>> >>>> - vector_clear(rpd); >>>> - if (vector_capacity(rpd) >= READY_PACKETS_VEC_CAPACITY_THRESHOLD) { >>>> - VLOG_DBG("The ready_packets_data vector capacity (%"PRIuSIZE") " >>>> - "is over threshold.", vector_capacity(rpd)); >>>> - vector_shrink_to_fit(rpd); >>>> - } >>>> + vector_destroy(&rpd); >>>> } >>>> >>>> static void >>>> @@ -4935,9 +4925,8 @@ run_buffered_binding(const struct >>> sbrec_mac_binding_table *mac_binding_table, >>>> struct ovsdb_idl_index >>> *sbrec_datapath_binding_by_key, >>>> struct ovsdb_idl_index *sbrec_port_binding_by_name, >>>> struct ovsdb_idl_index >>> *sbrec_mac_binding_by_lport_ip) >>>> - OVS_REQUIRES(pinctrl_mutex) >>>> { >>>> - if (!buffered_packets_ctx_has_packets(&buffered_packets_ctx)) { >>>> + if (cmap_is_empty(&buffered_packets_map)) { >>>> return; >>>> } >>>> >>>> @@ -4983,18 +4972,16 @@ run_buffered_binding(const struct >>> sbrec_mac_binding_table *mac_binding_table, >>>> mac_binding_add(&recent_mbs, mb_data, smb, 0); >>>> } >>>> >>>> - buffered_packets_ctx_run(&buffered_packets_ctx, &recent_mbs, >>>> - sbrec_port_binding_by_key, >>>> - sbrec_datapath_binding_by_key, >>>> - sbrec_port_binding_by_name, >>>> - sbrec_mac_binding_by_lport_ip); >>>> + if (buffered_packets_lookup_run(&buffered_packets_map, &recent_mbs, >>>> + sbrec_port_binding_by_key, >>>> + sbrec_datapath_binding_by_key, >>>> + sbrec_port_binding_by_name, >>>> + sbrec_mac_binding_by_lport_ip)) { >>>> + notify_pinctrl_handler(); >>>> + } >>>> >>>> mac_bindings_clear(&recent_mbs); >>>> hmap_destroy(&recent_mbs); >>>> - >>>> - if (buffered_packets_ctx_is_ready_to_send(&buffered_packets_ctx)) { >>>> - notify_pinctrl_handler(); >>>> - } >>>> } >>>> >>>> static void >>>> @@ -6438,7 +6425,7 @@ may_inject_pkts(void) >>>> !cmap_is_empty(&garp_rarp_get_data()->data) || >>>> ipv6_prefixd_should_inject() || >>>> !ovs_list_is_empty(&mcast_query_list) || >>>> - >>> buffered_packets_ctx_is_ready_to_send(&buffered_packets_ctx) || >>>> + !cmap_is_empty(&buffered_packets_map) || >>>> bfd_monitor_should_inject()); >>>> } >>>> >>>> @@ -6528,9 +6515,7 @@ pinctrl_handle_nd_ns(struct rconn *swconn, const >>> struct flow *ip_flow, >>>> return; >>>> } >>>> >>>> - ovs_mutex_lock(&pinctrl_mutex); >>>> pinctrl_handle_buffered_packets(pin, continuation, false); >>>> - ovs_mutex_unlock(&pinctrl_mutex); >>>> >>>> uint64_t packet_stub[128 / 8]; >>>> struct dp_packet packet; >>>> -- >>>> 2.54.0 >>>> >>>> _______________________________________________ >>>> dev mailing list >>>> [email protected] >>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev >>>> >>> >> >> Thank you Mark and Lorenzo, >> applied to main. I was also considering backporting this to 26.03, are >> there any concerns or objections? >>
Given that 26.03 is our new LTS, I'd also apply it there. Regards, Dumitru >> Regards, >> Ales _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
