On 6/21/23 22:36, Mike Pattrick wrote: > From: Flavio Leitner <f...@sysclose.org> > > This provides a software implementation in the case > the egress netdev doesn't support segmentation in hardware. > > The challenge here is to guarantee packet ordering in the > original batch that may be full of TSO packets. Each TSO > packet can go up to ~64kB, so with segment size of 1440 > that means about 44 packets for each TSO. Each batch has > 32 packets, so the total batch amounts to 1408 normal > packets. > > The segmentation estimates the total number of packets > and then the total number of batches. Then allocate > enough memory and finally do the work. > > Finally each batch is sent in order to the netdev. > > Signed-off-by: Flavio Leitner <f...@sysclose.org> > Co-authored-by: Mike Pattrick <m...@redhat.com> > Signed-off-by: Mike Pattrick <m...@redhat.com> > --- > lib/automake.mk | 2 + > lib/dp-packet-gso.c | 172 ++++++++++++++++++++++++++++++++++++++++++++ > lib/dp-packet-gso.h | 24 +++++++ > lib/dp-packet.h | 11 +++ > lib/netdev-dpdk.c | 46 ++++++++---- > lib/netdev-linux.c | 58 --------------- > lib/netdev.c | 120 ++++++++++++++++++------------- > lib/packets.c | 4 +- > 8 files changed, 314 insertions(+), 123 deletions(-) > create mode 100644 lib/dp-packet-gso.c > create mode 100644 lib/dp-packet-gso.h > > diff --git a/lib/automake.mk b/lib/automake.mk > index e64ee76ce..49a92958d 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -118,6 +118,8 @@ lib_libopenvswitch_la_SOURCES = \ > lib/dpctl.h \ > lib/dp-packet.h \ > lib/dp-packet.c \ > + lib/dp-packet-gso.c \ > + lib/dp-packet-gso.h \ > lib/dpdk.h \ > lib/dpif-netdev-extract-study.c \ > lib/dpif-netdev-lookup.h \ > diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c > new file mode 100644 > index 000000000..bc72e2f90 > --- /dev/null > +++ b/lib/dp-packet-gso.c > @@ -0,0 +1,172 @@ > +/* > + * Copyright (c) 2021 Red Hat, Inc.
Need to adjust the year. :) > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > +#include <stdlib.h> > +#include <string.h> > + > +#include "coverage.h" > +#include "dp-packet.h" > +#include "dp-packet-gso.h" > +#include "netdev-provider.h" > +#include "openvswitch/vlog.h" > + > +VLOG_DEFINE_THIS_MODULE(dp_packet_gso); > + > +COVERAGE_DEFINE(soft_seg_good); > + > +/* Retuns a new packet that is a segment of packet 'p'. > + * > + * The new packet is initialized with 'hdr_len' bytes from the > + * start of packet 'p' and then appended with 'data_len' bytes > + * from the 'data' buffer. > + * > + * Note: The packet headers are not updated. */ > +static struct dp_packet * > +dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len, > + const char *data, size_t data_len) > +{ > + struct dp_packet *seg = dp_packet_new_with_headroom(hdr_len + data_len, > + > dp_packet_headroom(p)); > + > + /* Append the original packet headers and then the payload. */ > + dp_packet_put(seg, dp_packet_data(p), hdr_len); > + dp_packet_put(seg, data, data_len); > + > + /* The new segment should have the same offsets. */ > + seg->l2_5_ofs = p->l2_5_ofs; > + seg->l3_ofs = p->l3_ofs; > + seg->l4_ofs = p->l4_ofs; > + > + /* The protocol headers remain the same, so preserve hash and mark. */ > + *dp_packet_rss_ptr(seg) = dp_packet_get_rss_hash(p); > + *dp_packet_flow_mark_ptr(seg) = *dp_packet_flow_mark_ptr(p); > + > + /* The segment should inherit all the offloading flags from the > + * original packet, except for the TCP segmentation, external > + * buffer and indirect buffer flags. */ > + *dp_packet_ol_flags_ptr(seg) = *dp_packet_ol_flags_ptr(p) > + & ~(DP_PACKET_OL_TX_TCP_SEG | DP_PACKET_OL_EXTERNAL > + | DP_PACKET_OL_INDIRECT); We should inherit DP_PACKET_OL_SUPPORTED_MASK & ~DP_PACKET_OL_TX_TCP_SEG. So, *dp_packet_ol_flags_ptr(seg) &= DP_PACKET_OL_SUPPORTED_MASK & ~DP_PACKET_OL_TX_TCP_SEG; Is that right? > + > + dp_packet_hwol_reset_tcp_seg(seg); Also, this function just resets the DP_PACKET_OL_TX_TCP_SEG. So, above should be just: *dp_packet_ol_flags_ptr(seg) &= DP_PACKET_OL_SUPPORTED_MASK; The same as in the clone function. Either way one of these operations is redundant. > + > + return seg; > +} > + > +/* Returns the calculated number of TCP segments in packet 'p'. */ > +int > +dp_packet_gso_nr_segs(struct dp_packet *p) > +{ > + uint16_t segsz = dp_packet_get_tso_segsz(p); > + const char *data_tail; > + const char *data_pos; > + int n_segs; > + > + data_pos = dp_packet_get_tcp_payload(p); > + data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p); > + data_pos = dp_packet_get_tcp_payload(p); Assigning twice? > + n_segs = DIV_ROUND_UP((data_tail - data_pos), segsz); Do we need this variable? Maybe just return here? > + > + return n_segs; > + Extra line. > +} > + > +/* Perform software segmentation on packet 'p'. > + * > + * Returns all the segments added to the array of preallocated > + * batches in 'batches' starting at batch position 'batch_pos'. */ > +void > +dp_packet_gso(struct dp_packet *p, struct dp_packet_batch *batches, > + size_t *batch_pos) > +{ > + struct tcp_header *tcp_hdr; > + struct ip_header *ip_hdr; > + struct dp_packet *seg; > + uint16_t tcp_offset; > + uint16_t tso_segsz; > + uint32_t tcp_seq; > + uint16_t ip_id; > + int hdr_len; > + > + tso_segsz = dp_packet_get_tso_segsz(p); > + if (!tso_segsz) { > + VLOG_WARN("GSO packet with len %d with no segment size.", > + dp_packet_size(p)); This should likely be rate limited. Also this packet is going to be leaked. And maybe we should count that drop somehow? A coverage counter? > + return; > + } > + > + tcp_hdr = dp_packet_l4(p); > + tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); > + tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq)); > + hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p)) > + + tcp_offset * 4; > + ip_id = 0; > + if (dp_packet_hwol_is_ipv4(p)) { > + ip_hdr = dp_packet_l3(p); > + ip_id = ntohs(ip_hdr->ip_id); > + } > + > + const char *data_tail = (char *) dp_packet_tail(p) > + - dp_packet_l2_pad_size(p); > + const char *data_pos = dp_packet_get_tcp_payload(p); > + int n_segs = dp_packet_gso_nr_segs(p); > + int seg_len; An empty line would be nice. > + for (int i = 0; i < n_segs; i++) { > + seg_len = data_tail - data_pos; > + if (seg_len > tso_segsz) { > + seg_len = tso_segsz; > + } > + > + seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len); > + data_pos += seg_len; > + > + /* Update L3 header. */ > + if (dp_packet_hwol_is_ipv4(seg)) { > + ip_hdr = dp_packet_l3(seg); > + ip_hdr->ip_tot_len = htons(sizeof *ip_hdr + > + dp_packet_l4_size(seg)); > + ip_hdr->ip_id = htons(ip_id); > + ip_hdr->ip_csum = 0; > + ip_id++; If we assume that IDs are sequential in the original stream, here we will produce packets with overlapping IDs. I suppose that's not a big problem? > + } else { > + struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg); > + > + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(sizeof *ip_hdr > + + dp_packet_l4_size(seg)); Maybe break the line before or after '=' and start the next one with smaller indentation. > + Extra line. > + } > + > + /* Update L4 header. */ > + tcp_hdr = dp_packet_l4(seg); > + put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); > + tcp_seq += seg_len; > + if (OVS_LIKELY(i < (n_segs - 1))) { > + /* Reset flags PUSH and FIN unless it is the last segment. */ > + uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl) > + & ~(TCP_PSH | TCP_FIN); > + tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); > + } > + > + if (dp_packet_batch_is_full(&batches[ *batch_pos])) { > + *batch_pos += 1; > + } > + > + dp_packet_batch_add(&batches[ *batch_pos], seg); The extra space looks strange. We should remove it. Alternative approach to counting batches will be to just increment a batch pointer. i.e. to have a **curr_batch pointer and do (*curr_batch)++ whenever it gets full: if (dp_packet_batch_is_full(*curr_batch)) { (*curr_batch)++; } dp_packet_batch_add(*curr_batch, seg); What do you think? > + } > + > + COVERAGE_INC(soft_seg_good); > +} > diff --git a/lib/dp-packet-gso.h b/lib/dp-packet-gso.h > new file mode 100644 > index 000000000..81cb52742 > --- /dev/null > +++ b/lib/dp-packet-gso.h > @@ -0,0 +1,24 @@ > +/* > + * Copyright (c) 2021 Red Hat, Inc. Year. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef DP_PACKET_GSO_H > +#define DP_PACKET_GSO_H 1 > + > +void dp_packet_gso(struct dp_packet *p, struct dp_packet_batch *batches, > + size_t *batch_pos); > +int dp_packet_gso_nr_segs(struct dp_packet *p); No need for struct names in prototypes. > + > +#endif /* dp-packet-gso.h */ > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 6a924f3ff..dfa9a0d6b 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -86,6 +86,10 @@ enum dp_packet_offload_mask { > DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x800), > /* Offload IP checksum. */ > DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CKSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x1000), > + /* External Buffer attached. */ > + DEF_OL_FLAG(DP_PACKET_OL_EXTERNAL, RTE_MBUF_F_EXTERNAL, 0x4000), > + /* Indirect Buffer attached. */ > + DEF_OL_FLAG(DP_PACKET_OL_INDIRECT, RTE_MBUF_F_INDIRECT, 0x8000), These are DPDK-specific and shouldn't be here. DP_PACKET_OL_SUPPORTED_MASK can be used for clearing unknown bits. > /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */ > }; > > @@ -1131,6 +1135,13 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) > *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; > } > > +/* Resets TCP Segmentation flag in packet 'p'. */ > +static inline void > +dp_packet_hwol_reset_tcp_seg(struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_TCP_SEG; > +} > + > /* Returns 'true' if the IP header has good integrity and the > * checksum in it is complete. */ > static inline bool > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 1c100c48e..82ce74c45 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2288,6 +2288,7 @@ static bool > netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) > { > struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf); > + struct tcp_header *th; > > if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK > | RTE_MBUF_F_TX_TCP_SEG))) { > @@ -2299,27 +2300,38 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, > struct rte_mbuf *mbuf) > mbuf->l4_len = 0; > mbuf->outer_l2_len = 0; > mbuf->outer_l3_len = 0; > + th = dp_packet_l4(pkt); > > if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > - struct tcp_header *th = dp_packet_l4(pkt); > - int hdr_len; > - > if (!th) { > VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" > + " pkt len: %"PRIu32"", dev->up.name, > mbuf->pkt_len); > + return false; > + } > + > + mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; How can that happen that packet marked for TSO is not marked for CKSUM? > + } > + > + if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) { > + if (!th) { > + VLOG_WARN_RL(&rl, "%s: TCP offloading without L4 header" > " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); > return false; > } > > mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > - mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; > - hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; > mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > - if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) > { > - VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. " > - "hdr: %"PRIu32", gso: %"PRIu32", max len: > %"PRIu32"", > - dev->up.name, hdr_len, mbuf->tso_segsz, > - dev->max_packet_len); > - return false; > + > + if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > + int hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; > + if (OVS_UNLIKELY((hdr_len + > + mbuf->tso_segsz) > dev->max_packet_len)) { > + VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. hdr: %"PRIu32", > " > + "gso: %"PRIu32", max len: %"PRIu32"", > + dev->up.name, hdr_len, mbuf->tso_segsz, > + dev->max_packet_len); > + return false; > + } > } > > if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) { > @@ -2707,6 +2719,7 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, > struct dp_packet *pkt_orig) > mbuf_dest->packet_type = pkt_orig->mbuf.packet_type; > mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags & > ~(RTE_MBUF_F_EXTERNAL | RTE_MBUF_F_INDIRECT)); > + mbuf_dest->tso_segsz = pkt_orig->mbuf.tso_segsz; > > memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, > sizeof(struct dp_packet) - offsetof(struct dp_packet, > l2_pad_size)); > @@ -2765,11 +2778,20 @@ netdev_dpdk_common_send(struct netdev *netdev, struct > dp_packet_batch *batch, > struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > size_t cnt, pkt_cnt = dp_packet_batch_size(batch); > + struct dp_packet *packet; > + bool need_copy = false; > > memset(stats, 0, sizeof *stats); > > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + if (packet->source != DPBUF_DPDK) { > + need_copy = true; > + break; > + } > + } > + > /* Copy dp-packets to mbufs. */ > - if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { > + if (OVS_UNLIKELY(need_copy)) { > cnt = dpdk_copy_batch_to_mbuf(netdev, batch); > stats->tx_failure_drops += pkt_cnt - cnt; > pkt_cnt = cnt; > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index 52ce4947d..7a92ee534 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -6849,55 +6849,6 @@ af_packet_sock(void) > return sock; > } > > -static int > -netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto) > -{ > - struct eth_header *eth_hdr; > - ovs_be16 eth_type; > - int l2_len; > - > - eth_hdr = dp_packet_at(b, 0, ETH_HEADER_LEN); > - if (!eth_hdr) { > - return -EINVAL; > - } > - > - l2_len = ETH_HEADER_LEN; > - eth_type = eth_hdr->eth_type; > - if (eth_type_vlan(eth_type)) { > - struct vlan_header *vlan = dp_packet_at(b, l2_len, VLAN_HEADER_LEN); > - > - if (!vlan) { > - return -EINVAL; > - } > - > - eth_type = vlan->vlan_next_type; > - l2_len += VLAN_HEADER_LEN; > - } > - > - if (eth_type == htons(ETH_TYPE_IP)) { > - struct ip_header *ip_hdr = dp_packet_at(b, l2_len, IP_HEADER_LEN); > - > - if (!ip_hdr) { > - return -EINVAL; > - } > - > - *l4proto = ip_hdr->ip_proto; > - dp_packet_hwol_set_tx_ipv4(b); > - } else if (eth_type == htons(ETH_TYPE_IPV6)) { > - struct ovs_16aligned_ip6_hdr *nh6; > - > - nh6 = dp_packet_at(b, l2_len, IPV6_HEADER_LEN); > - if (!nh6) { > - return -EINVAL; > - } > - > - *l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt; > - dp_packet_hwol_set_tx_ipv6(b); > - } > - > - return 0; > -} > - > /* Initializes packet 'b' with features enabled in the prepended > * struct virtio_net_hdr. Returns 0 if successful, otherwise a > * positive errno value. */ > @@ -6915,15 +6866,6 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b) > } > > if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { > - uint16_t l4proto = 0; > - > - if (netdev_linux_parse_l2(b, &l4proto)) { > - return EINVAL; > - } > - > - if (l4proto == IPPROTO_UDP) { > - dp_packet_hwol_set_csum_udp(b); > - } Why all that is no longer needed? > /* The packet has offloaded checksum. However, there is no > * additional information like the protocol used, so it would > * require to parse the packet here. The checksum starting point > diff --git a/lib/netdev.c b/lib/netdev.c > index 8df7f8737..8eda6873d 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -35,6 +35,7 @@ > #include "coverage.h" > #include "dpif.h" > #include "dp-packet.h" > +#include "dp-packet-gso.h" > #include "openvswitch/dynamic-string.h" > #include "fatal-signal.h" > #include "hash.h" > @@ -56,6 +57,7 @@ > #include "svec.h" > #include "openvswitch/vlog.h" > #include "flow.h" > +#include "userspace-tso.h" > #include "util.h" > #ifdef __linux__ > #include "tc.h" > @@ -67,7 +69,6 @@ COVERAGE_DEFINE(netdev_received); > COVERAGE_DEFINE(netdev_sent); > COVERAGE_DEFINE(netdev_add_router); > COVERAGE_DEFINE(netdev_get_stats); > -COVERAGE_DEFINE(netdev_send_prepare_drops); > COVERAGE_DEFINE(netdev_push_header_drops); > > struct netdev_saved_flags { > @@ -792,60 +793,67 @@ netdev_get_pt_mode(const struct netdev *netdev) > : NETDEV_PT_LEGACY_L2); > } > > -/* Check if a 'packet' is compatible with 'netdev_flags'. > - * If a packet is incompatible, return 'false' with the 'errormsg' > - * pointing to a reason. */ > -static bool > -netdev_send_prepare_packet(const uint64_t netdev_flags, > - struct dp_packet *packet, char **errormsg) > -{ > - if (dp_packet_hwol_is_tso(packet) > - && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > - /* Fall back to GSO in software. */ > - VLOG_ERR_BUF(errormsg, "No TSO support"); > - return false; > - } > - > - /* Packet with IP csum offloading enabled was received with verified > csum. > - * Leave the IP csum offloading enabled even with good checksum to the > - * netdev to decide what would be the best to do. > - * Provide a software fallback in case the device doesn't support IP csum > - * offloading. Note: Encapsulated packet must have the inner IP header > - * csum already calculated. > - * Packet with L4 csum offloading enabled was received with verified > csum. > - * Leave the L4 csum offloading enabled even with good checksum for the > - * netdev to decide what would be the best to do. > - * Netdev that requires pseudo header csum needs to calculate that. > - * Provide a software fallback in case the netdev doesn't support L4 csum > - * offloading. Note: Encapsulated packet must have the inner L4 header > - * csum already calculated. */ > - dp_packet_ol_send_prepare(packet, netdev_flags); > - > - return true; > -} > - > -/* Check if each packet in 'batch' is compatible with 'netdev' features, > - * otherwise either fall back to software implementation or drop it. */ > -static void > -netdev_send_prepare_batch(const struct netdev *netdev, > - struct dp_packet_batch *batch) > +static int > +netdev_send_tso(struct netdev *netdev, int qid, > + struct dp_packet_batch *batch, bool concurrent_txq) A comment for this function would be nice. > { > + struct dp_packet_batch *batches; > struct dp_packet *packet; > - size_t i, size = dp_packet_batch_size(batch); > + int n_packets; > + int n_batches; > + int error; > > - DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > - char *errormsg = NULL; > + /* Calculate the total number of packets in the batch after > + * the segmentation. */ > + n_packets = 0; > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + if (dp_packet_hwol_is_tso(packet)) { > + n_packets += dp_packet_gso_nr_segs(packet); > + } else { > + n_packets++; > + } > + } > > - if (netdev_send_prepare_packet(netdev->ol_flags, packet, &errormsg)) > { > - dp_packet_batch_refill(batch, packet, i); > + if (!n_packets) { > + return 0; > + } > + > + /* Allocate enough batches to store all the packets in order. */ > + n_batches = DIV_ROUND_UP(n_packets, NETDEV_MAX_BURST); > + batches = xmalloc(n_batches * sizeof(struct dp_packet_batch)); Use 'sizeof *batches' or 'sizeof batches[0]' instead. > + size_t batch_pos = 0; > + for (batch_pos = 0; batch_pos < n_batches; batch_pos++) { > + dp_packet_batch_init(&batches[batch_pos]); > + } > + /* Do the packet segmentation if TSO is flagged. */ > + size_t size = dp_packet_batch_size(batch); > + size_t k; > + batch_pos = 0; > + DP_PACKET_BATCH_REFILL_FOR_EACH (k, size, packet, batch) { > + if (dp_packet_hwol_is_tso(packet)) { > + dp_packet_gso(packet, batches, &batch_pos); > } else { > - dp_packet_delete(packet); > - COVERAGE_INC(netdev_send_prepare_drops); > - VLOG_WARN_RL(&rl, "%s: Packet dropped: %s", > - netdev_get_name(netdev), errormsg); > - free(errormsg); > + if (dp_packet_batch_is_full(&batches[batch_pos])) { > + batch_pos++; > + } > + > + dp_packet_batch_add(&batches[batch_pos], packet); > + } > + } > + > + for (batch_pos = 0; batch_pos < n_batches; batch_pos++) { > + DP_PACKET_BATCH_FOR_EACH (i, packet, (&batches[batch_pos])) { You shouldn't need to parethesize the macro argument. We fixed that. Fix the macro instead if it's still a problem. > + dp_packet_ol_send_prepare(packet, netdev->ol_flags); > + } > + > + error = netdev->netdev_class->send(netdev, qid, &batches[batch_pos], > + concurrent_txq); > + if (!error) { > + COVERAGE_INC(netdev_sent); > } > } > + free(batches); > + return 0; This function never returns an error, even if all the send() calls failed. It should return an error even if one send() failed. > } > > /* Sends 'batch' on 'netdev'. Returns 0 if successful (for every packet), > @@ -877,11 +885,21 @@ int > netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, > bool concurrent_txq) > { > + const uint64_t netdev_flags = netdev->ol_flags; > + struct dp_packet *packet; > int error; > > - netdev_send_prepare_batch(netdev, batch); > - if (OVS_UNLIKELY(dp_packet_batch_is_empty(batch))) { > - return 0; > + if (userspace_tso_enabled() && > + !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + if (dp_packet_hwol_is_tso(packet)) { > + return netdev_send_tso(netdev, qid, batch, concurrent_txq); > + } > + } > + } > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + dp_packet_ol_send_prepare(packet, netdev_flags); > } > > error = netdev->netdev_class->send(netdev, qid, batch, concurrent_txq); > diff --git a/lib/packets.c b/lib/packets.c > index 462b51f92..dab823ba2 100644 > --- a/lib/packets.c > +++ b/lib/packets.c > @@ -427,7 +427,7 @@ add_mpls(struct dp_packet *packet, ovs_be16 ethtype, > ovs_be32 lse, > } > > if (!l3_encap) { > - struct mpls_hdr *header = dp_packet_push_uninit(packet, MPLS_HLEN); > + struct mpls_hdr *header = dp_packet_resize_l2(packet, MPLS_HLEN); > > put_16aligned_be32(&header->mpls_lse, lse); > packet->l2_5_ofs = 0; > @@ -513,7 +513,7 @@ push_nsh(struct dp_packet *packet, const struct nsh_hdr > *nsh_hdr_src) > OVS_NOT_REACHED(); > } > > - nsh = (struct nsh_hdr *) dp_packet_push_uninit(packet, length); > + nsh = (struct nsh_hdr *) dp_packet_resize_l2(packet, length); > memcpy(nsh, nsh_hdr_src, length); > nsh->next_proto = next_proto; > packet->packet_type = htonl(PT_NSH); _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev