On 08/06/2021 21:07, Flavio Leitner wrote: > > Hi Mark, > > This looks good to me. > > Since the new scheme doesn't allow users to change the number > of handlers, we must update ovs-vswitchd.conf.db(5) as well.
I updated this documentation > > Some comments below. > > On Fri, Apr 30, 2021 at 11:31:29AM -0400, Mark Gray wrote: >> The Open vSwitch kernel module uses the upcall mechanism to send >> packets from kernel space to user space when it misses in the kernel >> space flow table. The upcall sends packets via a Netlink socket. >> Currently, a Netlink socket is created for every vport. In this way, >> there is a 1:1 mapping between a vport and a Netlink socket. >> When a packet is received by a vport, if it needs to be sent to >> user space, it is sent via the corresponding Netlink socket. >> >> This mechanism, with various iterations of the corresponding user >> space code, has seen some limitations and issues: >> >> * On systems with a large number of vports, there is correspondingly >> a large number of Netlink sockets which can limit scaling. >> (https://bugzilla.redhat.com/show_bug.cgi?id=1526306) >> * Packet reordering on upcalls. >> (https://bugzilla.redhat.com/show_bug.cgi?id=1844576) >> * A thundering herd issue. >> (https://bugzilla.redhat.com/show_bug.cgi?id=1834444) >> >> This patch introduces an alternative, feature-negotiated, upcall >> mode using a per-cpu dispatch rather than a per-vport dispatch. >> >> In this mode, the Netlink socket to be used for the upcall is >> selected based on the CPU of the thread that is executing the upcall. >> In this way, it resolves the issues above as: >> >> a) The number of Netlink sockets scales with the number of CPUs >> rather than the number of vports. >> b) Ordering per-flow is maintained as packets are distributed to >> CPUs based on mechanisms such as RSS and flows are distributed >> to a single user space thread. >> c) Packets from a flow can only wake up one user space thread. >> >> Reported-at: https://bugzilla.redhat.com/1844576 >> Signed-off-by: Mark Gray <mark.d.g...@redhat.com> >> --- >> .../linux/compat/include/linux/openvswitch.h | 7 + >> lib/dpif-netdev.c | 1 + >> lib/dpif-netlink.c | 405 +++++++++++++++--- >> lib/dpif-provider.h | 10 + >> lib/dpif.c | 17 + >> lib/dpif.h | 1 + >> ofproto/ofproto-dpif-upcall.c | 51 ++- >> ofproto/ofproto.c | 12 - >> 8 files changed, 430 insertions(+), 74 deletions(-) >> >> diff --git a/datapath/linux/compat/include/linux/openvswitch.h >> b/datapath/linux/compat/include/linux/openvswitch.h >> index 875de20250ce..f29265df055e 100644 >> --- a/datapath/linux/compat/include/linux/openvswitch.h >> +++ b/datapath/linux/compat/include/linux/openvswitch.h >> @@ -89,6 +89,8 @@ enum ovs_datapath_cmd { >> * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on >> * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should >> * not be sent. >> + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when >> + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. >> * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the >> * datapath. Always present in notifications. >> * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for >> the >> @@ -105,6 +107,8 @@ enum ovs_datapath_attr { >> OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ >> OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ >> OVS_DP_ATTR_PAD, >> + OVS_DP_ATTR_PAD2, >> + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls */ >> __OVS_DP_ATTR_MAX >> }; >> >> @@ -146,6 +150,9 @@ struct ovs_vport_stats { >> /* Allow tc offload recirc sharing */ >> #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) >> >> +/* Allow per-cpu dispatch of upcalls */ >> +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3) >> + >> /* Fixed logical ports. */ >> #define OVSP_LOCAL ((__u32)0) >> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index 251788b04965..24e6911dd4ff 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -8488,6 +8488,7 @@ const struct dpif_class dpif_netdev_class = { >> dpif_netdev_operate, >> NULL, /* recv_set */ >> NULL, /* handlers_set */ >> + NULL, /* handlers_get */ > > That is number_handlers_required. > oops. Thanks for catching this >> dpif_netdev_set_config, >> dpif_netdev_queue_to_priority, >> NULL, /* recv */ >> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c >> index 2ded5fdd01b3..349897e70632 100644 >> --- a/lib/dpif-netlink.c >> +++ b/lib/dpif-netlink.c >> @@ -80,6 +80,9 @@ enum { MAX_PORTS = USHRT_MAX }; >> #define FLOW_DUMP_MAX_BATCH 50 >> #define OPERATE_MAX_OPS 50 >> >> +#define DISPATCH_MODE_PER_CPU(dpif) ((dpif)->user_features & \ >> + OVS_DP_F_DISPATCH_UPCALL_PER_CPU) >> + > > Perhaps a function like this: > static inline bool > dpif_netlink_upcall_per_cpu(struct ... dpif) { > return !!((dpif)->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU); > } > >> #ifndef EPOLLEXCLUSIVE >> #define EPOLLEXCLUSIVE (1u << 28) >> #endif >> @@ -98,6 +101,8 @@ struct dpif_netlink_dp { >> const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */ >> const struct ovs_dp_megaflow_stats *megaflow_stats; >> /* OVS_DP_ATTR_MEGAFLOW_STATS.*/ >> + const uint32_t *upcall_pids; /* OVS_DP_ATTR_PER_CPU_PIDS */ >> + uint32_t n_upcall_pids; >> }; >> >> static void dpif_netlink_dp_init(struct dpif_netlink_dp *); >> @@ -178,11 +183,16 @@ struct dpif_windows_vport_sock { >> #endif >> >> struct dpif_handler { >> + /* per-vport dispatch mode */ >> struct epoll_event *epoll_events; >> int epoll_fd; /* epoll fd that includes channel socks. >> */ >> int n_events; /* Num events returned by epoll_wait(). */ >> int event_offset; /* Offset into 'epoll_events'. */ >> >> + /* per-cpu dispatch mode */ >> + struct nl_sock *sock; /* Each handler thread holds one netlink >> + socket. */ >> + >> #ifdef _WIN32 >> /* Pool of sockets. */ >> struct dpif_windows_vport_sock *vport_sock_pool; >> @@ -201,6 +211,8 @@ struct dpif_netlink { >> struct fat_rwlock upcall_lock; >> struct dpif_handler *handlers; >> uint32_t n_handlers; /* Num of upcall handlers. */ >> + >> + /* Per-vport dispatch mode */ >> struct dpif_channel *channels; /* Array of channels for each port. */ >> int uc_array_size; /* Size of 'handler->channels' and */ >> /* 'handler->epoll_events'. */ >> @@ -241,8 +253,12 @@ static int open_dpif(const struct dpif_netlink_dp *, >> struct dpif **); >> static uint32_t dpif_netlink_port_get_pid(const struct dpif *, >> odp_port_t port_no); >> static void dpif_netlink_handler_uninit(struct dpif_handler *handler); >> -static int dpif_netlink_refresh_channels(struct dpif_netlink *, >> - uint32_t n_handlers); >> +static int dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink >> *, >> + uint32_t >> n_handlers); >> +static void destroy_all_channels(struct dpif_netlink *); >> +static int dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink >> *); >> +static void destroy_all_handlers(struct dpif_netlink *); >> + >> static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *, >> struct ofpbuf *); >> static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *, >> @@ -357,11 +373,34 @@ dpif_netlink_open(const struct dpif_class *class >> OVS_UNUSED, const char *name, >> dp_request.cmd = OVS_DP_CMD_SET; >> } >> >> + /* The Open vSwitch kernel module has two modes for dispatching upcalls: >> + * per-vport and per-cpu. >> + * >> + * When dispatching upcalls per-vport, the kernel will >> + * send the upcall via a Netlink socket that has been selected based on >> the >> + * vport that received the packet that is causing the upcall. >> + * >> + * When dispatching upcall per-cpu, the kernel will send the upcall via >> + * a Netlink socket that has been selected based on the cpu that >> received >> + * the packet that is causing the upcall. >> + * >> + * First we test to see if the kernel module supports per-cpu >> dispatching >> + * (the preferred method). If it does not support per-cpu dispatching, >> we >> + * fall back to the per-vport dispatch mode. >> + */ >> dp_request.user_features |= OVS_DP_F_UNALIGNED; >> - dp_request.user_features |= OVS_DP_F_VPORT_PIDS; >> + dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU; >> error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); >> if (error) { >> - return error; >> + dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU; >> + dp_request.user_features |= OVS_DP_F_VPORT_PIDS; >> + error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); >> + if (error) { >> + return error; >> + } >> + VLOG_INFO("Dispatch mode(per-vport)"); >> + } else { >> + VLOG_INFO("Dispatch mode:(per-cpu)"); > > > It would be nice if we could expose the mode using a command. I added `ovs-appctl dpif-netlink/dispatch-mode` # ovs-appctl dpif-netlink/dispatch-mode ovs-system: per-cpu dispatch mode > > >> } >> >> error = open_dpif(&dp, dpifp); >> @@ -609,6 +648,24 @@ destroy_all_channels(struct dpif_netlink *dpif) >> dpif->uc_array_size = 0; >> } >> >> +static void >> +destroy_all_handlers(struct dpif_netlink *dpif) >> + OVS_REQ_WRLOCK(dpif->upcall_lock) >> +{ >> + int i = 0; >> + >> + if (!dpif->handlers) { >> + return; >> + } >> + for (i = 0; i < dpif->n_handlers; i++) { >> + struct dpif_handler *handler = &dpif->handlers[i]; >> + close_nl_sock(handler->sock); >> + } >> + free(dpif->handlers); >> + dpif->handlers = NULL; >> + dpif->n_handlers = 0; >> +} >> + >> static void >> dpif_netlink_close(struct dpif *dpif_) >> { >> @@ -617,7 +674,11 @@ dpif_netlink_close(struct dpif *dpif_) >> nl_sock_destroy(dpif->port_notifier); >> >> fat_rwlock_wrlock(&dpif->upcall_lock); >> - destroy_all_channels(dpif); >> + if (DISPATCH_MODE_PER_CPU(dpif)) { >> + destroy_all_handlers(dpif); >> + } else { >> + destroy_all_channels(dpif); >> + } >> fat_rwlock_unlock(&dpif->upcall_lock); >> >> fat_rwlock_destroy(&dpif->upcall_lock); >> @@ -641,11 +702,14 @@ dpif_netlink_run(struct dpif *dpif_) >> { >> struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); >> >> - if (dpif->refresh_channels) { >> - dpif->refresh_channels = false; >> - fat_rwlock_wrlock(&dpif->upcall_lock); >> - dpif_netlink_refresh_channels(dpif, dpif->n_handlers); >> - fat_rwlock_unlock(&dpif->upcall_lock); >> + if (!DISPATCH_MODE_PER_CPU(dpif)) { >> + if (dpif->refresh_channels) { >> + dpif->refresh_channels = false; >> + fat_rwlock_wrlock(&dpif->upcall_lock); >> + dpif_netlink_refresh_handlers_vport_dispatch(dpif, >> + dpif->n_handlers); >> + fat_rwlock_unlock(&dpif->upcall_lock); >> + } >> } >> return false; >> } >> @@ -681,6 +745,41 @@ dpif_netlink_get_stats(const struct dpif *dpif_, struct >> dpif_dp_stats *stats) >> return error; >> } >> >> +static int >> +dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t >> *upcall_pids, >> + uint32_t n_upcall_pids) >> +{ >> + struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); >> + struct dpif_netlink_dp request, reply; >> + struct ofpbuf *bufp; >> + int error; >> + int n_cores; >> + >> + n_cores = count_cpu_cores(); >> + ovs_assert(n_cores == n_upcall_pids); >> + VLOG_DBG("Dispatch mode(per-cpu): Number of CPUs is %d", n_cores); >> + >> + dpif_netlink_dp_init(&request); >> + request.cmd = OVS_DP_CMD_SET; >> + request.name = dpif_->base_name; >> + request.dp_ifindex = dpif->dp_ifindex; >> + request.user_features = dpif->user_features | >> + OVS_DP_F_DISPATCH_UPCALL_PER_CPU; >> + >> + request.upcall_pids = upcall_pids; >> + request.n_upcall_pids = n_cores; >> + >> + error = dpif_netlink_dp_transact(&request, &reply, &bufp); >> + if (!error) { >> + dpif->user_features = reply.user_features; >> + ofpbuf_delete(bufp); >> + if (!(dpif->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)) { >> + return -EOPNOTSUPP; >> + } >> + } >> + return error; >> +} >> + >> static int >> dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features) >> { >> @@ -741,7 +840,7 @@ get_vport_type(const struct dpif_netlink_vport *vport) >> return "erspan"; >> >> case OVS_VPORT_TYPE_IP6ERSPAN: >> - return "ip6erspan"; >> + return "ip6erspan"; >> >> case OVS_VPORT_TYPE_IP6GRE: >> return "ip6gre"; >> @@ -807,10 +906,16 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, >> const char *name, >> uint32_t upcall_pids = 0; >> int error = 0; >> >> - if (dpif->handlers) { >> - error = create_nl_sock(dpif, &sock); >> - if (error) { >> - return error; >> + /* per-cpu dispatch mode does not require a socket per vport */ >> + if (!DISPATCH_MODE_PER_CPU(dpif)) { >> + if (dpif->handlers) { >> + error = create_nl_sock(dpif, &sock); >> + if (error) { >> + return error; >> + } >> + } >> + if (sock) { >> + upcall_pids = nl_sock_pid(sock); >> } >> } >> >> @@ -821,9 +926,6 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, const >> char *name, >> request.name = name; >> >> request.port_no = *port_nop; >> - if (sock) { >> - upcall_pids = nl_sock_pid(sock); >> - } >> request.n_upcall_pids = 1; >> request.upcall_pids = &upcall_pids; >> >> @@ -845,19 +947,21 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, >> const char *name, >> goto exit; >> } >> >> - error = vport_add_channel(dpif, *port_nop, sock); >> - if (error) { >> - VLOG_INFO("%s: could not add channel for port %s", >> - dpif_name(&dpif->dpif), name); >> - >> - /* Delete the port. */ >> - dpif_netlink_vport_init(&request); >> - request.cmd = OVS_VPORT_CMD_DEL; >> - request.dp_ifindex = dpif->dp_ifindex; >> - request.port_no = *port_nop; >> - dpif_netlink_vport_transact(&request, NULL, NULL); >> - close_nl_sock(sock); >> - goto exit; >> + if (!DISPATCH_MODE_PER_CPU(dpif)) { >> + error = vport_add_channel(dpif, *port_nop, sock); >> + if (error) { >> + VLOG_INFO("%s: could not add channel for port %s", >> + dpif_name(&dpif->dpif), name); >> + >> + /* Delete the port. */ >> + dpif_netlink_vport_init(&request); >> + request.cmd = OVS_VPORT_CMD_DEL; >> + request.dp_ifindex = dpif->dp_ifindex; >> + request.port_no = *port_nop; >> + dpif_netlink_vport_transact(&request, NULL, NULL); >> + close_nl_sock(sock); >> + goto exit; >> + } >> } >> >> exit: >> @@ -1115,6 +1219,11 @@ dpif_netlink_port_get_pid(const struct dpif *dpif_, >> odp_port_t port_no) >> const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); >> uint32_t ret; >> >> + /* In per-cpu dispatch mode, vports do not have an associated PID */ >> + if (DISPATCH_MODE_PER_CPU(dpif)) { >> + return 0; >> + } > > I mentioned this in the kernel patch's review that this is used with > userspace() action and 0 means it is 'unset'. I resolved this and tested using `ovs-ofctl monitor br0 65534 -P nxt_packet_in` > > >> + >> fat_rwlock_rdlock(&dpif->upcall_lock); >> ret = dpif_netlink_port_get_pid__(dpif, port_no); >> fat_rwlock_unlock(&dpif->upcall_lock); >> @@ -2326,12 +2435,51 @@ dpif_netlink_handler_uninit(struct dpif_handler >> *handler) >> } >> #endif >> >> +static int >> +dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink *dpif) >> + OVS_REQ_WRLOCK(dpif->upcall_lock) >> +{ >> + int handler_id; >> + int error = 0; >> + uint32_t n_handlers; >> + uint32_t *upcall_pids; >> + >> + n_handlers = count_cpu_cores(); >> + if (dpif->n_handlers != n_handlers) { >> + VLOG_DBG("Dispatch mode(per-cpu): initializing %d handlers", >> + n_handlers); >> + destroy_all_handlers(dpif); >> + upcall_pids = xzalloc(n_handlers * sizeof *upcall_pids); >> + dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers); >> + for (handler_id = 0; handler_id < n_handlers; handler_id++) { >> + struct dpif_handler *handler = &dpif->handlers[handler_id]; >> + error = create_nl_sock(dpif, &handler->sock); >> + if (error) { >> + VLOG_ERR("Dispatch mode(per-cpu): Cannot create socket for" >> + "handler %d", handler_id); >> + continue; >> + } >> + upcall_pids[handler_id] = nl_sock_pid(handler->sock); >> + VLOG_DBG("Dispatch mode(per-cpu): " >> + "handler %d has Netlink PID of %u", >> + handler_id, upcall_pids[handler_id]); >> + } >> + >> + dpif->n_handlers = n_handlers; >> + error = dpif_netlink_set_handler_pids(&dpif->dpif, upcall_pids, >> + n_handlers); >> + free(upcall_pids); >> + } >> + return error; >> +} >> + >> /* Synchronizes 'channels' in 'dpif->handlers' with the set of vports >> * currently in 'dpif' in the kernel, by adding a new set of channels for >> * any kernel vport that lacks one and deleting any channels that have no >> * backing kernel vports. */ >> static int >> -dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t >> n_handlers) >> +dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink *dpif, >> + uint32_t n_handlers) >> OVS_REQ_WRLOCK(dpif->upcall_lock) >> { >> unsigned long int *keep_channels; >> @@ -2458,7 +2606,7 @@ dpif_netlink_refresh_channels(struct dpif_netlink >> *dpif, uint32_t n_handlers) >> } >> >> static int >> -dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable) >> +dpif_netlink_recv_set_vport_dispatch(struct dpif_netlink *dpif, bool enable) >> OVS_REQ_WRLOCK(dpif->upcall_lock) >> { >> if ((dpif->handlers != NULL) == enable) { >> @@ -2467,7 +2615,21 @@ dpif_netlink_recv_set__(struct dpif_netlink *dpif, >> bool enable) >> destroy_all_channels(dpif); >> return 0; >> } else { >> - return dpif_netlink_refresh_channels(dpif, 1); >> + return dpif_netlink_refresh_handlers_vport_dispatch(dpif, 1); >> + } >> +} >> + >> +static int >> +dpif_netlink_recv_set_cpu_dispatch(struct dpif_netlink *dpif, bool enable) >> + OVS_REQ_WRLOCK(dpif->upcall_lock) >> +{ >> + if ((dpif->handlers != NULL) == enable) { >> + return 0; >> + } else if (!enable) { >> + destroy_all_handlers(dpif); >> + return 0; >> + } else { >> + return dpif_netlink_refresh_handlers_cpu_dispatch(dpif); >> } >> } >> >> @@ -2478,7 +2640,11 @@ dpif_netlink_recv_set(struct dpif *dpif_, bool enable) >> int error; >> >> fat_rwlock_wrlock(&dpif->upcall_lock); >> - error = dpif_netlink_recv_set__(dpif, enable); >> + if (DISPATCH_MODE_PER_CPU(dpif)) { >> + error = dpif_netlink_recv_set_cpu_dispatch(dpif, enable); >> + } else { >> + error = dpif_netlink_recv_set_vport_dispatch(dpif, enable); >> + } >> fat_rwlock_unlock(&dpif->upcall_lock); >> >> return error; >> @@ -2500,13 +2666,31 @@ dpif_netlink_handlers_set(struct dpif *dpif_, >> uint32_t n_handlers) >> >> fat_rwlock_wrlock(&dpif->upcall_lock); >> if (dpif->handlers) { >> - error = dpif_netlink_refresh_channels(dpif, n_handlers); >> + if (DISPATCH_MODE_PER_CPU(dpif)) { >> + error = dpif_netlink_refresh_handlers_cpu_dispatch(dpif); >> + } else { >> + error = dpif_netlink_refresh_handlers_vport_dispatch(dpif, >> + >> n_handlers); >> + } >> } >> fat_rwlock_unlock(&dpif->upcall_lock); >> >> return error; >> } >> >> +static bool >> +dpif_netlink_number_handlers_required(struct dpif *dpif_, uint32_t >> *n_handlers) >> +{ >> + struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); >> + >> + if (DISPATCH_MODE_PER_CPU(dpif)) { >> + *n_handlers = count_cpu_cores(); >> + return true; >> + } >> + >> + return false; >> +} >> + >> static int >> dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED, >> uint32_t queue_id, uint32_t *priority) >> @@ -2669,8 +2853,59 @@ dpif_netlink_recv_windows(struct dpif_netlink *dpif, >> uint32_t handler_id, >> } >> #else >> static int >> -dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id, >> - struct dpif_upcall *upcall, struct ofpbuf *buf) >> +dpif_netlink_recv_cpu_dispatch(struct dpif_netlink *dpif, uint32_t >> handler_id, >> + struct dpif_upcall *upcall, struct ofpbuf >> *buf) >> + OVS_REQ_RDLOCK(dpif->upcall_lock) >> +{ >> + struct dpif_handler *handler; >> + int read_tries = 0; >> + >> + if (!dpif->handlers || handler_id >= dpif->n_handlers) { >> + return EAGAIN; >> + } >> + >> + handler = &dpif->handlers[handler_id]; >> + >> + for (;;) { >> + int dp_ifindex; >> + int error; >> + >> + if (++read_tries > 50) { >> + return EAGAIN; >> + } >> + error = nl_sock_recv(handler->sock, buf, NULL, false); >> + if (error == ENOBUFS) { >> + /* ENOBUFS typically means that we've received so many >> + * packets that the buffer overflowed. Try again >> + * immediately because there's almost certainly a packet >> + * waiting for us. */ >> + report_loss(dpif, NULL, 0, handler_id); >> + continue; >> + } >> + >> + if (error) { >> + if (error == EAGAIN) { >> + break; >> + } >> + return error; >> + } >> + >> + error = parse_odp_packet(buf, upcall, &dp_ifindex); >> + if (!error && dp_ifindex == dpif->dp_ifindex) { >> + return 0; >> + } else if (error) { >> + return error; >> + } >> + } >> + >> + return EAGAIN; >> +} >> + >> +static int >> +dpif_netlink_recv_vport_dispatch(struct dpif_netlink *dpif, >> + uint32_t handler_id, >> + struct dpif_upcall *upcall, >> + struct ofpbuf *buf) >> OVS_REQ_RDLOCK(dpif->upcall_lock) >> { >> struct dpif_handler *handler; >> @@ -2755,18 +2990,24 @@ dpif_netlink_recv(struct dpif *dpif_, uint32_t >> handler_id, >> #ifdef _WIN32 >> error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf); >> #else >> - error = dpif_netlink_recv__(dpif, handler_id, upcall, buf); >> + if (DISPATCH_MODE_PER_CPU(dpif)) { >> + error = dpif_netlink_recv_cpu_dispatch(dpif, handler_id, upcall, >> buf); >> + } else { >> + error = dpif_netlink_recv_vport_dispatch(dpif, >> + handler_id, upcall, buf); >> + } >> #endif >> fat_rwlock_unlock(&dpif->upcall_lock); >> >> return error; >> } >> >> +#ifdef _WIN32 >> static void >> -dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id) >> +dpif_netlink_recv_wait_windows(struct dpif_netlink *dpif, uint32_t >> handler_id) >> OVS_REQ_RDLOCK(dpif->upcall_lock) >> { >> -#ifdef _WIN32 >> + >> uint32_t i; >> struct dpif_windows_vport_sock *sock_pool = >> dpif->handlers[handler_id].vport_sock_pool; >> @@ -2779,13 +3020,31 @@ dpif_netlink_recv_wait__(struct dpif_netlink *dpif, >> uint32_t handler_id) >> for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) { >> nl_sock_wait(sock_pool[i].nl_sock, POLLIN); >> } >> -#else >> +} >> +#endif >> + >> +static void >> +dpif_netlink_recv_wait_vport_dispatch(struct dpif_netlink *dpif, >> + uint32_t handler_id) >> + OVS_REQ_RDLOCK(dpif->upcall_lock) >> +{ >> if (dpif->handlers && handler_id < dpif->n_handlers) { >> struct dpif_handler *handler = &dpif->handlers[handler_id]; >> >> poll_fd_wait(handler->epoll_fd, POLLIN); >> } >> -#endif >> +} >> + >> +static void >> +dpif_netlink_recv_wait_cpu_dispatch(struct dpif_netlink *dpif, >> + uint32_t handler_id) >> + OVS_REQ_RDLOCK(dpif->upcall_lock) >> +{ >> + if (dpif->handlers && handler_id < dpif->n_handlers) { >> + struct dpif_handler *handler = &dpif->handlers[handler_id]; >> + >> + poll_fd_wait(nl_sock_fd(handler->sock), POLLIN); >> + } >> } >> >> static void >> @@ -2794,12 +3053,20 @@ dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t >> handler_id) >> struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); >> >> fat_rwlock_rdlock(&dpif->upcall_lock); >> - dpif_netlink_recv_wait__(dpif, handler_id); >> +#ifdef _WIN32 >> + dpif_netlink_recv_wait_windows(dpif, handler_id); >> +#else >> + if (DISPATCH_MODE_PER_CPU(dpif)) { >> + dpif_netlink_recv_wait_cpu_dispatch(dpif, handler_id); >> + } else { >> + dpif_netlink_recv_wait_vport_dispatch(dpif, handler_id); >> + } >> +#endif >> fat_rwlock_unlock(&dpif->upcall_lock); >> } >> >> static void >> -dpif_netlink_recv_purge__(struct dpif_netlink *dpif) >> +dpif_netlink_recv_purge_vport_dispatch(struct dpif_netlink *dpif) >> OVS_REQ_WRLOCK(dpif->upcall_lock) >> { >> if (dpif->handlers) { >> @@ -2815,13 +3082,31 @@ dpif_netlink_recv_purge__(struct dpif_netlink *dpif) >> } >> } >> >> +static void >> +dpif_netlink_recv_purge_cpu_dispatch(struct dpif_netlink *dpif) >> + OVS_REQ_WRLOCK(dpif->upcall_lock) >> +{ >> + int handler_id; >> + >> + if (dpif->handlers) { >> + for (handler_id = 0; handler_id < dpif->n_handlers; handler_id++) { >> + struct dpif_handler *handler = &dpif->handlers[handler_id]; >> + nl_sock_drain(handler->sock); >> + } >> + } >> +} >> + >> static void >> dpif_netlink_recv_purge(struct dpif *dpif_) >> { >> struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); >> >> fat_rwlock_wrlock(&dpif->upcall_lock); >> - dpif_netlink_recv_purge__(dpif); >> + if (DISPATCH_MODE_PER_CPU(dpif)) { >> + dpif_netlink_recv_purge_cpu_dispatch(dpif); >> + } else { >> + dpif_netlink_recv_purge_vport_dispatch(dpif); >> + } >> fat_rwlock_unlock(&dpif->upcall_lock); >> } >> >> @@ -3974,6 +4259,7 @@ const struct dpif_class dpif_netlink_class = { >> dpif_netlink_operate, >> dpif_netlink_recv_set, >> dpif_netlink_handlers_set, >> + dpif_netlink_number_handlers_required, >> NULL, /* set_config */ >> dpif_netlink_queue_to_priority, >> dpif_netlink_recv, >> @@ -4337,6 +4623,11 @@ dpif_netlink_dp_to_ofpbuf(const struct >> dpif_netlink_dp *dp, struct ofpbuf *buf) >> nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features); >> } >> >> + if (dp->upcall_pids) { >> + nl_msg_put_unspec(buf, OVS_DP_ATTR_PER_CPU_PIDS, dp->upcall_pids, >> + sizeof *dp->upcall_pids * dp->n_upcall_pids); >> + } >> + >> /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. >> */ >> } >> >> @@ -4642,7 +4933,6 @@ dpif_netlink_flow_get_stats(const struct >> dpif_netlink_flow *flow, >> stats->used = flow->used ? get_32aligned_u64(flow->used) : 0; >> stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0; >> } >> - >> /* Logs information about a packet that was recently lost in 'ch' (in >> * 'dpif_'). */ >> static void >> @@ -4656,13 +4946,18 @@ report_loss(struct dpif_netlink *dpif, struct >> dpif_channel *ch, uint32_t ch_idx, >> return; >> } >> >> - ds_init(&s); >> - if (ch->last_poll != LLONG_MIN) { >> - ds_put_format(&s, " (last polled %lld ms ago)", >> - time_msec() - ch->last_poll); >> - } >> + if (DISPATCH_MODE_PER_CPU(dpif)) { >> + VLOG_WARN("%s: lost packet on handler %u", >> + dpif_name(&dpif->dpif), handler_id); >> + } else { >> + ds_init(&s); >> + if (ch->last_poll != LLONG_MIN) { >> + ds_put_format(&s, " (last polled %lld ms ago)", >> + time_msec() - ch->last_poll); >> + } >> >> - VLOG_WARN("%s: lost packet on port channel %u of handler %u%s", >> - dpif_name(&dpif->dpif), ch_idx, handler_id, ds_cstr(&s)); >> - ds_destroy(&s); >> + VLOG_WARN("%s: lost packet on port channel %u of handler %u%s", >> + dpif_name(&dpif->dpif), ch_idx, handler_id, ds_cstr(&s)); >> + ds_destroy(&s); >> + } >> } >> diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h >> index b817fceac698..32711f19d1f6 100644 >> --- a/lib/dpif-provider.h >> +++ b/lib/dpif-provider.h >> @@ -357,6 +357,16 @@ struct dpif_class { >> * */ >> int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers); >> >> + /* Queries 'dpif' to see if a certain number of handlers are required by >> + * the implementation. >> + * >> + * If a certain number of handlers are required, returns 'true' and sets >> + * 'n_handlers' to that number of handler threads. >> + * >> + * If not, returns 'false'. >> + */ >> + bool (*number_handlers_required)(struct dpif *dpif, uint32_t >> *n_handlers); >> + > > It also needs to update struct dpif_class handlers_set() documentation. > > Good point. I updated it. >> /* Pass custom configuration options to the datapath. The >> implementation >> * might postpone applying the changes until run() is called. */ >> int (*set_config)(struct dpif *dpif, const struct smap *other_config); >> diff --git a/lib/dpif.c b/lib/dpif.c >> index 26e8bfb7db98..511383514d5b 100644 >> --- a/lib/dpif.c >> +++ b/lib/dpif.c >> @@ -1489,6 +1489,23 @@ dpif_handlers_set(struct dpif *dpif, uint32_t >> n_handlers) >> return error; >> } >> >> +/* Checks if a certain number of handlers are required. >> + * >> + * If a certain number of handlers are required, returns 'true' and sets >> + * 'n_handlers' to that number of handler threads. >> + * >> + * If not, returns 'false' >> + */ >> +bool >> +dpif_number_handlers_required(struct dpif *dpif, uint32_t *n_handlers) { >> + bool ret = false; >> + >> + if (dpif->dpif_class->number_handlers_required) { >> + ret = dpif->dpif_class->number_handlers_required(dpif, n_handlers); >> + } >> + return ret; >> +} >> + >> void >> dpif_register_dp_purge_cb(struct dpif *dpif, dp_purge_callback *cb, void >> *aux) >> { >> diff --git a/lib/dpif.h b/lib/dpif.h >> index f9728e67393b..7c322d20e6c7 100644 >> --- a/lib/dpif.h >> +++ b/lib/dpif.h >> @@ -873,6 +873,7 @@ void dpif_register_upcall_cb(struct dpif *, >> upcall_callback *, void *aux); >> >> int dpif_recv_set(struct dpif *, bool enable); >> int dpif_handlers_set(struct dpif *, uint32_t n_handlers); >> +bool dpif_number_handlers_required(struct dpif *, uint32_t *n_handlers); >> int dpif_set_config(struct dpif *, const struct smap *cfg); >> int dpif_port_set_config(struct dpif *, odp_port_t, const struct smap *cfg); >> int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *, >> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c >> index 88406fea1391..45c22d20c183 100644 >> --- a/ofproto/ofproto-dpif-upcall.c >> +++ b/ofproto/ofproto-dpif-upcall.c >> @@ -636,24 +636,61 @@ udpif_set_threads(struct udpif *udpif, uint32_t >> n_handlers_, >> uint32_t n_revalidators_) >> { >> ovs_assert(udpif); >> - ovs_assert(n_handlers_ && n_revalidators_); >> + uint32_t n_handlers_requested; >> + uint32_t n_revalidators_requested; >> + bool forced = false; >> + >> + if (dpif_number_handlers_required(udpif->dpif, &n_handlers_requested)) { >> + forced = true; >> + if (!n_revalidators_) { >> + n_revalidators_requested = n_handlers_requested / 4 + 1; >> + } else { >> + n_revalidators_requested = n_revalidators_; >> + } >> + } else { >> + int threads = MAX(count_cpu_cores(), 2); >> + >> + n_revalidators_requested = MAX(n_revalidators_, 0); >> + n_handlers_requested = MAX(n_handlers_, 0); >> >> - if (udpif->n_handlers != n_handlers_ >> - || udpif->n_revalidators != n_revalidators_) { >> + if (!n_revalidators_requested) { >> + n_revalidators_requested = n_handlers_requested >> + ? MAX(threads - (int) n_handlers_requested, 1) >> + : threads / 4 + 1; >> + } >> + >> + if (!n_handlers_requested) { >> + n_handlers_requested = MAX(threads - >> + (int) n_revalidators_requested, 1); >> + } >> + } >> + >> + if (udpif->n_handlers != n_handlers_requested >> + || udpif->n_revalidators != n_revalidators_requested) { >> + if (forced) { >> + VLOG_INFO("Overriding n-handler-threads to %u, setting " >> + "n-revalidator-threads to %u", n_handlers_requested, >> + n_revalidators_requested); >> + } else { >> + VLOG_INFO("Setting n-handler-threads to %u, setting " >> + "n-revalidator-threads to %u", n_handlers_requested, >> + n_revalidators_requested); >> + } >> udpif_stop_threads(udpif, true); >> } >> >> if (!udpif->handlers && !udpif->revalidators) { >> + VLOG_INFO("Starting %u threads", n_handlers_requested + >> + n_revalidators_requested); >> int error; >> - >> - error = dpif_handlers_set(udpif->dpif, n_handlers_); >> + error = dpif_handlers_set(udpif->dpif, n_handlers_requested); >> if (error) { >> VLOG_ERR("failed to configure handlers in dpif %s: %s", >> dpif_name(udpif->dpif), ovs_strerror(error)); >> return; >> } >> - >> - udpif_start_threads(udpif, n_handlers_, n_revalidators_); >> + udpif_start_threads(udpif, n_handlers_requested, >> + n_revalidators_requested); >> } >> } >> >> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c >> index 69e5834e766c..1501eca9a989 100644 >> --- a/ofproto/ofproto.c >> +++ b/ofproto/ofproto.c >> @@ -792,20 +792,8 @@ ofproto_type_set_config(const char *datapath_type, >> const struct smap *cfg) >> void >> ofproto_set_threads(int n_handlers_, int n_revalidators_) >> { >> - int threads = MAX(count_cpu_cores(), 2); >> - >> n_revalidators = MAX(n_revalidators_, 0); >> n_handlers = MAX(n_handlers_, 0); >> - >> - if (!n_revalidators) { >> - n_revalidators = n_handlers >> - ? MAX(threads - (int) n_handlers, 1) >> - : threads / 4 + 1; >> - } >> - >> - if (!n_handlers) { >> - n_handlers = MAX(threads - (int) n_revalidators, 1); >> - } >> } >> >> void >> -- >> 2.27.0 >> >> _______________________________________________ >> dev mailing list >> d...@openvswitch.org >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev