Sendmmsg can reduce cpu cycles in sending packets to kernel. Replace sendmsg with sendmmsg in function netdev_linux_send to send batch packets if sendmmsg is available.
If kernel side doesn't support sendmmsg, will fallback to sendmsg. netserver |------------| | | | container | |----veth----| | | |------------| |---veth-| dpdk-ovs | netperf | | |--------------| |----dpdk----| | bare-metal | | |--------------| | | | | pnic-----------pnic Netperf was consumed to test the performance: 1)cmd:netperf -H remote-container -t UDP_STREAM -l 60 -- -m 1400 result: netserver received 2383.21Mb(sendmsg)/2551.64Mb(sendmmsg) 2)cmd:netperf -H remote-container -t UDP_STREAM -l 60 -- -m 60 result: netserver received 109.72Mb(sendmsg)/115.18Mb(sendmmsg) Sendmmsg show about 6% improvement in netperf UDP testing. Signed-off-by: Zhenyu Gao <sysugaozhe...@gmail.com> --- configure.ac | 2 +- lib/netdev-linux.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 6404b5f..b02c7c4 100644 --- a/configure.ac +++ b/configure.ac @@ -106,7 +106,7 @@ AC_CHECK_DECLS([sys_siglist], [], [], [[#include <signal.h>]]) AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec], [], [], [[#include <sys/stat.h>]]) AC_CHECK_MEMBERS([struct ifreq.ifr_flagshigh], [], [], [[#include <net/if.h>]]) -AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r]) +AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r sendmmsg]) AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h stdatomic.h]) AC_CHECK_HEADERS([net/if_mib.h], [], [], [[#include <sys/types.h> #include <net/if.h>]]) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 1b88775..b90a22a 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1187,6 +1187,54 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_) } } +static inline int +netdev_linux_sock_batch_send(int sock, struct msghdr *msg, + struct dp_packet_batch *batch) +{ + int error = 0; + ssize_t retval; + uint32_t resend_idx = 0; + struct mmsghdr *mmsg; + struct iovec *iov; + + mmsg = xmalloc(sizeof(*mmsg) * batch->count); + iov = xmalloc(sizeof(*iov) * batch->count); + + for (int i = 0; i < batch->count; i++) { + const void *data = dp_packet_data(batch->packets[i]); + size_t size = dp_packet_size(batch->packets[i]); + + /* Truncate the packet if it is configured. */ + size -= dp_packet_get_cutlen(batch->packets[i]); + + iov[i].iov_base = CONST_CAST(void *, data); + iov[i].iov_len = size; + mmsg[i].msg_hdr = *msg; + mmsg[i].msg_hdr.msg_iov = &iov[i]; + } + +resend_batch: + retval = sendmmsg(sock, mmsg + resend_idx, + batch->count - resend_idx, 0); + if (retval < 0) { + if (errno == EINTR) { + goto resend_batch; + } + /* The Linux AF_PACKET implementation never blocks waiting for + * room for packets, instead returning ENOBUFS. Translate this + * into EAGAIN for the caller. */ + error = errno == ENOBUFS ? EAGAIN : errno; + } else if (retval != batch->count - resend_idx) { + /* Send remain packets again. */ + resend_idx += retval; + goto resend_batch; + } + + free(mmsg); + free(iov); + return error; +} + /* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive * errno value. Returns EAGAIN without blocking if the packet cannot be queued * immediately. Returns EMSGSIZE if a partial packet was transmitted or if @@ -1207,6 +1255,9 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, struct sockaddr_ll sll; struct msghdr msg; if (!is_tap_netdev(netdev_)) { +#ifdef HAVE_SENDMMSG + static bool try_sendmmsg = true; +#endif sock = af_packet_sock(); if (sock < 0) { error = -sock; @@ -1231,6 +1282,21 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; + +#ifdef HAVE_SENDMMSG + if (try_sendmmsg) { + /* Try batch sending to socket */ + error = netdev_linux_sock_batch_send(sock, &msg, batch); + if (error == ENOSYS) { + /* Linux kernel does not implement this function */ + try_sendmmsg = false; + VLOG_WARN("Linux kernel doesn't implement sendmmsg, " + "going to consume sendmsg"); + } else { + goto check_error; + } + } +#endif } /* 'i' is incremented only if there's no error */ @@ -1290,9 +1356,10 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, i++; } +check_error: if (error && error != EAGAIN) { - VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", - netdev_get_name(netdev_), ovs_strerror(error)); + VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", + netdev_get_name(netdev_), ovs_strerror(error)); } free_batch: -- 1.8.3.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev