Sendmmsg can reduce cpu cycles in sending packets to kernel. Replace sendmsg with sendmmsg in function netdev_linux_send to send batch packets.
Signed-off-by: Zhenyu Gao <sysugaozhe...@gmail.com> --- lib/netdev-linux.c | 122 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 73 insertions(+), 49 deletions(-) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 8ae740a..bc975cd 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1194,10 +1194,14 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, { int error = 0; int sock = 0; + ssize_t retval; - struct sockaddr_ll sll; - struct msghdr msg; if (!is_tap_netdev(netdev_)) { + uint32_t resend_idx = 0; + struct mmsghdr *msg; + struct iovec *iov; + struct sockaddr_ll sll; + sock = af_packet_sock(); if (sock < 0) { error = -sock; @@ -1216,34 +1220,56 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, sll.sll_family = AF_PACKET; sll.sll_ifindex = ifindex; - msg.msg_name = &sll; - msg.msg_namelen = sizeof sll; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - } + msg = xmalloc(sizeof(*msg) * batch->count); + iov = xmalloc(sizeof(*iov) * batch->count); - /* 'i' is incremented only if there's no error */ - for (int i = 0; i < batch->count; ) { - const void *data = dp_packet_data(batch->packets[i]); - size_t size = dp_packet_size(batch->packets[i]); - ssize_t retval; + for (int i = 0; i < batch->count; i++) { + const void *data = dp_packet_data(batch->packets[i]); + size_t size = dp_packet_size(batch->packets[i]); - /* Truncate the packet if it is configured. */ - size -= dp_packet_get_cutlen(batch->packets[i]); + /* Truncate the packet if it is configured. */ + size -= dp_packet_get_cutlen(batch->packets[i]); - if (!is_tap_netdev(netdev_)) { /* Use our AF_PACKET socket to send to this device. */ - struct iovec iov; - iov.iov_base = CONST_CAST(void *, data); - iov.iov_len = size; + msg[i].msg_hdr.msg_name = &sll; + msg[i].msg_hdr.msg_namelen = sizeof sll; + msg[i].msg_hdr.msg_iovlen = 1; + msg[i].msg_hdr.msg_control = NULL; + msg[i].msg_hdr.msg_controllen = 0; + msg[i].msg_hdr.msg_flags = 0; + iov[i].iov_base = CONST_CAST(void *, data); + iov[i].iov_len = size; + msg[i].msg_hdr.msg_iov = &iov[i]; + } - msg.msg_iov = &iov; +resend_batch: + retval = sendmmsg(sock, msg + resend_idx, + batch->count - resend_idx, 0); + if (retval < 0) { + if (errno == EINTR) { + goto resend_batch; + } + /* The Linux AF_PACKET implementation never blocks waiting for + * room for packets, instead returning ENOBUFS. Translate this + * into EAGAIN for the caller. */ + error = errno == ENOBUFS ? EAGAIN : errno; + } else if(retval != batch->count - resend_idx) { + resend_idx += retval; + goto resend_batch; + } + + free(msg); + free(iov); + } else { + /* 'i' is incremented only if there's no error */ + for (int i = 0; i < batch->count; ) { + const void *data = dp_packet_data(batch->packets[i]); + size_t size = dp_packet_size(batch->packets[i]); + + /* Truncate the packet if it is configured. */ + size -= dp_packet_get_cutlen(batch->packets[i]); - retval = sendmsg(sock, &msg, 0); - } else { /* Use the tap fd to send to this device. This is essential for * tap devices, because packets sent to a tap device with an * AF_PACKET socket will loop back to be *received* again on the @@ -1252,45 +1278,43 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, struct netdev_linux *netdev = netdev_linux_cast(netdev_); retval = write(netdev->tap_fd, data, size); - } - if (retval < 0) { - if (errno == EINTR) { - /* The send was interrupted by a signal. Retry the packet by - * continuing without incrementing 'i'.*/ - continue; - } else if (errno == EIO && is_tap_netdev(netdev_)) { - /* The Linux tap driver returns EIO if the device is not up. - * From the OVS side this is not an error, so ignore it. */ - } else { - /* The Linux AF_PACKET implementation never blocks waiting for - * room for packets, instead returning ENOBUFS. Translate this - * into EAGAIN for the caller. */ - error = errno == ENOBUFS ? EAGAIN : errno; + if (retval < 0) { + if (errno == EINTR) { + /* The send was interrupted by a signal. + * Retry the packet by continuing without + * incrementing 'i'.*/ + continue; + } else if (errno == EIO) { + /* The Linux tap driver returns EIO if the device + * is not up. From the OVS side this is not an error, + * so ignore it. */ + } else { + break; + } + } else if (retval != size) { + VLOG_WARN_RL(&rl, "sent partial Ethernet packet" + " (%"PRIuSIZE" bytes" + " of %"PRIuSIZE") on %s", retval, size, + netdev_get_name(netdev_)); + error = EMSGSIZE; break; } - } else if (retval != size) { - VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRIuSIZE" bytes" - " of %"PRIuSIZE") on %s", retval, size, - netdev_get_name(netdev_)); - error = EMSGSIZE; - break; - } - /* Process the next packet in the batch */ - i++; + /* Process the next packet in the batch */ + i++; + } } if (error && error != EAGAIN) { - VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", - netdev_get_name(netdev_), ovs_strerror(error)); + VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", + netdev_get_name(netdev_), ovs_strerror(error)); } free_batch: dp_packet_delete_batch(batch, may_steal); return error; - } /* Registers with the poll loop to wake up from the next call to poll_block() -- 1.8.3.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev