On Tue, Jul 4, 2023 at 9:00 PM Ilya Maximets <i.maxim...@ovn.org> wrote: > > On 6/21/23 22:36, Mike Pattrick wrote: > > From: Flavio Leitner <f...@sysclose.org> > > > > This provides a software implementation in the case > > the egress netdev doesn't support segmentation in hardware. > > > > The challenge here is to guarantee packet ordering in the > > original batch that may be full of TSO packets. Each TSO > > packet can go up to ~64kB, so with segment size of 1440 > > that means about 44 packets for each TSO. Each batch has > > 32 packets, so the total batch amounts to 1408 normal > > packets. > > > > The segmentation estimates the total number of packets > > and then the total number of batches. Then allocate > > enough memory and finally do the work. > > > > Finally each batch is sent in order to the netdev. > > > > Signed-off-by: Flavio Leitner <f...@sysclose.org> > > Co-authored-by: Mike Pattrick <m...@redhat.com> > > Signed-off-by: Mike Pattrick <m...@redhat.com> > > --- > > lib/automake.mk | 2 + > > lib/dp-packet-gso.c | 172 ++++++++++++++++++++++++++++++++++++++++++++ > > lib/dp-packet-gso.h | 24 +++++++ > > lib/dp-packet.h | 11 +++ > > lib/netdev-dpdk.c | 46 ++++++++---- > > lib/netdev-linux.c | 58 --------------- > > lib/netdev.c | 120 ++++++++++++++++++------------- > > lib/packets.c | 4 +- > > 8 files changed, 314 insertions(+), 123 deletions(-) > > create mode 100644 lib/dp-packet-gso.c > > create mode 100644 lib/dp-packet-gso.h > > > > diff --git a/lib/automake.mk b/lib/automake.mk > > index e64ee76ce..49a92958d 100644 > > --- a/lib/automake.mk > > +++ b/lib/automake.mk > > @@ -118,6 +118,8 @@ lib_libopenvswitch_la_SOURCES = \ > > lib/dpctl.h \ > > lib/dp-packet.h \ > > lib/dp-packet.c \ > > + lib/dp-packet-gso.c \ > > + lib/dp-packet-gso.h \ > > lib/dpdk.h \ > > lib/dpif-netdev-extract-study.c \ > > lib/dpif-netdev-lookup.h \ > > diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c > > new file mode 100644 > > index 000000000..bc72e2f90 > > --- /dev/null > > +++ b/lib/dp-packet-gso.c > > @@ -0,0 +1,172 @@ > > +/* > > + * Copyright (c) 2021 Red Hat, Inc. > > Need to adjust the year. :) > > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#include <config.h> > > +#include <stdlib.h> > > +#include <string.h> > > + > > +#include "coverage.h" > > +#include "dp-packet.h" > > +#include "dp-packet-gso.h" > > +#include "netdev-provider.h" > > +#include "openvswitch/vlog.h" > > + > > +VLOG_DEFINE_THIS_MODULE(dp_packet_gso); > > + > > +COVERAGE_DEFINE(soft_seg_good); > > + > > +/* Retuns a new packet that is a segment of packet 'p'. > > + * > > + * The new packet is initialized with 'hdr_len' bytes from the > > + * start of packet 'p' and then appended with 'data_len' bytes > > + * from the 'data' buffer. > > + * > > + * Note: The packet headers are not updated. */ > > +static struct dp_packet * > > +dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len, > > + const char *data, size_t data_len) > > +{ > > + struct dp_packet *seg = dp_packet_new_with_headroom(hdr_len + data_len, > > + > > dp_packet_headroom(p)); > > + > > + /* Append the original packet headers and then the payload. */ > > + dp_packet_put(seg, dp_packet_data(p), hdr_len); > > + dp_packet_put(seg, data, data_len); > > + > > + /* The new segment should have the same offsets. */ > > + seg->l2_5_ofs = p->l2_5_ofs; > > + seg->l3_ofs = p->l3_ofs; > > + seg->l4_ofs = p->l4_ofs; > > + > > + /* The protocol headers remain the same, so preserve hash and mark. */ > > + *dp_packet_rss_ptr(seg) = dp_packet_get_rss_hash(p); > > + *dp_packet_flow_mark_ptr(seg) = *dp_packet_flow_mark_ptr(p); > > + > > + /* The segment should inherit all the offloading flags from the > > + * original packet, except for the TCP segmentation, external > > + * buffer and indirect buffer flags. */ > > + *dp_packet_ol_flags_ptr(seg) = *dp_packet_ol_flags_ptr(p) > > + & ~(DP_PACKET_OL_TX_TCP_SEG | DP_PACKET_OL_EXTERNAL > > + | DP_PACKET_OL_INDIRECT); > > We should inherit DP_PACKET_OL_SUPPORTED_MASK & ~DP_PACKET_OL_TX_TCP_SEG. > So, > > *dp_packet_ol_flags_ptr(seg) &= DP_PACKET_OL_SUPPORTED_MASK > & ~DP_PACKET_OL_TX_TCP_SEG; > > Is that right? > > > + > > + dp_packet_hwol_reset_tcp_seg(seg); > > Also, this function just resets the DP_PACKET_OL_TX_TCP_SEG. > So, above should be just: > > *dp_packet_ol_flags_ptr(seg) &= DP_PACKET_OL_SUPPORTED_MASK; > > The same as in the clone function. Either way one of these operations > is redundant. > > > + > > + return seg; > > +} > > + > > +/* Returns the calculated number of TCP segments in packet 'p'. */ > > +int > > +dp_packet_gso_nr_segs(struct dp_packet *p) > > +{ > > + uint16_t segsz = dp_packet_get_tso_segsz(p); > > + const char *data_tail; > > + const char *data_pos; > > + int n_segs; > > + > > + data_pos = dp_packet_get_tcp_payload(p); > > + data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p); > > + data_pos = dp_packet_get_tcp_payload(p); > > Assigning twice? > > > + n_segs = DIV_ROUND_UP((data_tail - data_pos), segsz); > > Do we need this variable? Maybe just return here? > > > + > > + return n_segs; > > + > > Extra line. > > > +} > > + > > +/* Perform software segmentation on packet 'p'. > > + * > > + * Returns all the segments added to the array of preallocated > > + * batches in 'batches' starting at batch position 'batch_pos'. */ > > +void > > +dp_packet_gso(struct dp_packet *p, struct dp_packet_batch *batches, > > + size_t *batch_pos) > > +{ > > + struct tcp_header *tcp_hdr; > > + struct ip_header *ip_hdr; > > + struct dp_packet *seg; > > + uint16_t tcp_offset; > > + uint16_t tso_segsz; > > + uint32_t tcp_seq; > > + uint16_t ip_id; > > + int hdr_len; > > + > > + tso_segsz = dp_packet_get_tso_segsz(p); > > + if (!tso_segsz) { > > + VLOG_WARN("GSO packet with len %d with no segment size.", > > + dp_packet_size(p)); > > This should likely be rate limited. Also this packet is going to > be leaked. And maybe we should count that drop somehow? A coverage > counter? > > > + return; > > + } > > + > > + tcp_hdr = dp_packet_l4(p); > > + tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); > > + tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq)); > > + hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p)) > > + + tcp_offset * 4; > > + ip_id = 0; > > + if (dp_packet_hwol_is_ipv4(p)) { > > + ip_hdr = dp_packet_l3(p); > > + ip_id = ntohs(ip_hdr->ip_id); > > + } > > + > > + const char *data_tail = (char *) dp_packet_tail(p) > > + - dp_packet_l2_pad_size(p); > > + const char *data_pos = dp_packet_get_tcp_payload(p); > > + int n_segs = dp_packet_gso_nr_segs(p); > > + int seg_len; > > An empty line would be nice. > > > + for (int i = 0; i < n_segs; i++) { > > + seg_len = data_tail - data_pos; > > + if (seg_len > tso_segsz) { > > + seg_len = tso_segsz; > > + } > > + > > + seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len); > > + data_pos += seg_len; > > + > > + /* Update L3 header. */ > > + if (dp_packet_hwol_is_ipv4(seg)) { > > + ip_hdr = dp_packet_l3(seg); > > + ip_hdr->ip_tot_len = htons(sizeof *ip_hdr + > > + dp_packet_l4_size(seg)); > > + ip_hdr->ip_id = htons(ip_id); > > + ip_hdr->ip_csum = 0; > > + ip_id++; > > If we assume that IDs are sequential in the original stream, > here we will produce packets with overlapping IDs. I suppose > that's not a big problem?
If the source stream came from Linux/Windows, ipid will be random. This sequential behaviour is similar to the GSO implementation in DPDK and Linux. I'll send in an updated version with the other concerns addressed. Cheers, Mike > > > + } else { > > + struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg); > > + > > + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(sizeof *ip_hdr > > + + dp_packet_l4_size(seg)); > > Maybe break the line before or after '=' and start the next one with smaller > indentation. > > > + > > Extra line. > > > + } > > + > > + /* Update L4 header. */ > > + tcp_hdr = dp_packet_l4(seg); > > + put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); > > + tcp_seq += seg_len; > > + if (OVS_LIKELY(i < (n_segs - 1))) { > > + /* Reset flags PUSH and FIN unless it is the last segment. */ > > + uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl) > > + & ~(TCP_PSH | TCP_FIN); > > + tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); > > + } > > + > > + if (dp_packet_batch_is_full(&batches[ *batch_pos])) { > > + *batch_pos += 1; > > + } > > + > > + dp_packet_batch_add(&batches[ *batch_pos], seg); > > The extra space looks strange. We should remove it. > > Alternative approach to counting batches will be to just increment a batch > pointer. i.e. to have a **curr_batch pointer and do (*curr_batch)++ > whenever it gets full: > > if (dp_packet_batch_is_full(*curr_batch)) { > (*curr_batch)++; > } > dp_packet_batch_add(*curr_batch, seg); > > What do you think? > > > + } > > + > > + COVERAGE_INC(soft_seg_good); > > +} > > diff --git a/lib/dp-packet-gso.h b/lib/dp-packet-gso.h > > new file mode 100644 > > index 000000000..81cb52742 > > --- /dev/null > > +++ b/lib/dp-packet-gso.h > > @@ -0,0 +1,24 @@ > > +/* > > + * Copyright (c) 2021 Red Hat, Inc. > > Year. > > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#ifndef DP_PACKET_GSO_H > > +#define DP_PACKET_GSO_H 1 > > + > > +void dp_packet_gso(struct dp_packet *p, struct dp_packet_batch *batches, > > + size_t *batch_pos); > > +int dp_packet_gso_nr_segs(struct dp_packet *p); > > No need for struct names in prototypes. > > > + > > +#endif /* dp-packet-gso.h */ > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > > index 6a924f3ff..dfa9a0d6b 100644 > > --- a/lib/dp-packet.h > > +++ b/lib/dp-packet.h > > @@ -86,6 +86,10 @@ enum dp_packet_offload_mask { > > DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, RTE_MBUF_F_TX_SCTP_CKSUM, > > 0x800), > > /* Offload IP checksum. */ > > DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CKSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x1000), > > + /* External Buffer attached. */ > > + DEF_OL_FLAG(DP_PACKET_OL_EXTERNAL, RTE_MBUF_F_EXTERNAL, 0x4000), > > + /* Indirect Buffer attached. */ > > + DEF_OL_FLAG(DP_PACKET_OL_INDIRECT, RTE_MBUF_F_INDIRECT, 0x8000), > > These are DPDK-specific and shouldn't be here. DP_PACKET_OL_SUPPORTED_MASK > can be used for clearing unknown bits. > > > /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */ > > }; > > > > @@ -1131,6 +1135,13 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) > > *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; > > } > > > > +/* Resets TCP Segmentation flag in packet 'p'. */ > > +static inline void > > +dp_packet_hwol_reset_tcp_seg(struct dp_packet *p) > > +{ > > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_TCP_SEG; > > +} > > + > > /* Returns 'true' if the IP header has good integrity and the > > * checksum in it is complete. */ > > static inline bool > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 1c100c48e..82ce74c45 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -2288,6 +2288,7 @@ static bool > > netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf > > *mbuf) > > { > > struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf); > > + struct tcp_header *th; > > > > if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK > > | RTE_MBUF_F_TX_TCP_SEG))) { > > @@ -2299,27 +2300,38 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk > > *dev, struct rte_mbuf *mbuf) > > mbuf->l4_len = 0; > > mbuf->outer_l2_len = 0; > > mbuf->outer_l3_len = 0; > > + th = dp_packet_l4(pkt); > > > > if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > > - struct tcp_header *th = dp_packet_l4(pkt); > > - int hdr_len; > > - > > if (!th) { > > VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" > > + " pkt len: %"PRIu32"", dev->up.name, > > mbuf->pkt_len); > > + return false; > > + } > > + > > + mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; > > How can that happen that packet marked for TSO is not marked for CKSUM? I've seen "((ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) || (ol_flags & RTE_MBUF_F_TX_TCP_SEG))" logic in DPDK source code, but the documentation suggests that one implies the other, and looking deeper that seems to be the case. > > > + } > > + > > + if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) { > > + if (!th) { > > + VLOG_WARN_RL(&rl, "%s: TCP offloading without L4 header" > > " pkt len: %"PRIu32"", dev->up.name, > > mbuf->pkt_len); > > return false; > > } > > > > mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > > - mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; > > - hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; > > mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > > - if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > > > dev->max_packet_len)) { > > - VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. " > > - "hdr: %"PRIu32", gso: %"PRIu32", max len: > > %"PRIu32"", > > - dev->up.name, hdr_len, mbuf->tso_segsz, > > - dev->max_packet_len); > > - return false; > > + > > + if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > > + int hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; > > + if (OVS_UNLIKELY((hdr_len + > > + mbuf->tso_segsz) > dev->max_packet_len)) { > > + VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. hdr: > > %"PRIu32", " > > + "gso: %"PRIu32", max len: %"PRIu32"", > > + dev->up.name, hdr_len, mbuf->tso_segsz, > > + dev->max_packet_len); > > + return false; > > + } > > } > > > > if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) { > > @@ -2707,6 +2719,7 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, > > struct dp_packet *pkt_orig) > > mbuf_dest->packet_type = pkt_orig->mbuf.packet_type; > > mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags & > > ~(RTE_MBUF_F_EXTERNAL | RTE_MBUF_F_INDIRECT)); > > + mbuf_dest->tso_segsz = pkt_orig->mbuf.tso_segsz; > > > > memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, > > sizeof(struct dp_packet) - offsetof(struct dp_packet, > > l2_pad_size)); > > @@ -2765,11 +2778,20 @@ netdev_dpdk_common_send(struct netdev *netdev, > > struct dp_packet_batch *batch, > > struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; > > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > size_t cnt, pkt_cnt = dp_packet_batch_size(batch); > > + struct dp_packet *packet; > > + bool need_copy = false; > > > > memset(stats, 0, sizeof *stats); > > > > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > > + if (packet->source != DPBUF_DPDK) { > > + need_copy = true; > > + break; > > + } > > + } > > + > > /* Copy dp-packets to mbufs. */ > > - if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { > > + if (OVS_UNLIKELY(need_copy)) { > > cnt = dpdk_copy_batch_to_mbuf(netdev, batch); > > stats->tx_failure_drops += pkt_cnt - cnt; > > pkt_cnt = cnt; > > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > > index 52ce4947d..7a92ee534 100644 > > --- a/lib/netdev-linux.c > > +++ b/lib/netdev-linux.c > > @@ -6849,55 +6849,6 @@ af_packet_sock(void) > > return sock; > > } > > > > -static int > > -netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto) > > -{ > > - struct eth_header *eth_hdr; > > - ovs_be16 eth_type; > > - int l2_len; > > - > > - eth_hdr = dp_packet_at(b, 0, ETH_HEADER_LEN); > > - if (!eth_hdr) { > > - return -EINVAL; > > - } > > - > > - l2_len = ETH_HEADER_LEN; > > - eth_type = eth_hdr->eth_type; > > - if (eth_type_vlan(eth_type)) { > > - struct vlan_header *vlan = dp_packet_at(b, l2_len, > > VLAN_HEADER_LEN); > > - > > - if (!vlan) { > > - return -EINVAL; > > - } > > - > > - eth_type = vlan->vlan_next_type; > > - l2_len += VLAN_HEADER_LEN; > > - } > > - > > - if (eth_type == htons(ETH_TYPE_IP)) { > > - struct ip_header *ip_hdr = dp_packet_at(b, l2_len, IP_HEADER_LEN); > > - > > - if (!ip_hdr) { > > - return -EINVAL; > > - } > > - > > - *l4proto = ip_hdr->ip_proto; > > - dp_packet_hwol_set_tx_ipv4(b); > > - } else if (eth_type == htons(ETH_TYPE_IPV6)) { > > - struct ovs_16aligned_ip6_hdr *nh6; > > - > > - nh6 = dp_packet_at(b, l2_len, IPV6_HEADER_LEN); > > - if (!nh6) { > > - return -EINVAL; > > - } > > - > > - *l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt; > > - dp_packet_hwol_set_tx_ipv6(b); > > - } > > - > > - return 0; > > -} > > - > > /* Initializes packet 'b' with features enabled in the prepended > > * struct virtio_net_hdr. Returns 0 if successful, otherwise a > > * positive errno value. */ > > @@ -6915,15 +6866,6 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b) > > } > > > > if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { > > - uint16_t l4proto = 0; > > - > > - if (netdev_linux_parse_l2(b, &l4proto)) { > > - return EINVAL; > > - } > > - > > - if (l4proto == IPPROTO_UDP) { > > - dp_packet_hwol_set_csum_udp(b); > > - } > > Why all that is no longer needed? > > > /* The packet has offloaded checksum. However, there is no > > * additional information like the protocol used, so it would > > * require to parse the packet here. The checksum starting point > > diff --git a/lib/netdev.c b/lib/netdev.c > > index 8df7f8737..8eda6873d 100644 > > --- a/lib/netdev.c > > +++ b/lib/netdev.c > > @@ -35,6 +35,7 @@ > > #include "coverage.h" > > #include "dpif.h" > > #include "dp-packet.h" > > +#include "dp-packet-gso.h" > > #include "openvswitch/dynamic-string.h" > > #include "fatal-signal.h" > > #include "hash.h" > > @@ -56,6 +57,7 @@ > > #include "svec.h" > > #include "openvswitch/vlog.h" > > #include "flow.h" > > +#include "userspace-tso.h" > > #include "util.h" > > #ifdef __linux__ > > #include "tc.h" > > @@ -67,7 +69,6 @@ COVERAGE_DEFINE(netdev_received); > > COVERAGE_DEFINE(netdev_sent); > > COVERAGE_DEFINE(netdev_add_router); > > COVERAGE_DEFINE(netdev_get_stats); > > -COVERAGE_DEFINE(netdev_send_prepare_drops); > > COVERAGE_DEFINE(netdev_push_header_drops); > > > > struct netdev_saved_flags { > > @@ -792,60 +793,67 @@ netdev_get_pt_mode(const struct netdev *netdev) > > : NETDEV_PT_LEGACY_L2); > > } > > > > -/* Check if a 'packet' is compatible with 'netdev_flags'. > > - * If a packet is incompatible, return 'false' with the 'errormsg' > > - * pointing to a reason. */ > > -static bool > > -netdev_send_prepare_packet(const uint64_t netdev_flags, > > - struct dp_packet *packet, char **errormsg) > > -{ > > - if (dp_packet_hwol_is_tso(packet) > > - && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > > - /* Fall back to GSO in software. */ > > - VLOG_ERR_BUF(errormsg, "No TSO support"); > > - return false; > > - } > > - > > - /* Packet with IP csum offloading enabled was received with verified > > csum. > > - * Leave the IP csum offloading enabled even with good checksum to the > > - * netdev to decide what would be the best to do. > > - * Provide a software fallback in case the device doesn't support IP > > csum > > - * offloading. Note: Encapsulated packet must have the inner IP header > > - * csum already calculated. > > - * Packet with L4 csum offloading enabled was received with verified > > csum. > > - * Leave the L4 csum offloading enabled even with good checksum for the > > - * netdev to decide what would be the best to do. > > - * Netdev that requires pseudo header csum needs to calculate that. > > - * Provide a software fallback in case the netdev doesn't support L4 > > csum > > - * offloading. Note: Encapsulated packet must have the inner L4 header > > - * csum already calculated. */ > > - dp_packet_ol_send_prepare(packet, netdev_flags); > > - > > - return true; > > -} > > - > > -/* Check if each packet in 'batch' is compatible with 'netdev' features, > > - * otherwise either fall back to software implementation or drop it. */ > > -static void > > -netdev_send_prepare_batch(const struct netdev *netdev, > > - struct dp_packet_batch *batch) > > +static int > > +netdev_send_tso(struct netdev *netdev, int qid, > > + struct dp_packet_batch *batch, bool concurrent_txq) > > A comment for this function would be nice. > > > { > > + struct dp_packet_batch *batches; > > struct dp_packet *packet; > > - size_t i, size = dp_packet_batch_size(batch); > > + int n_packets; > > + int n_batches; > > + int error; > > > > - DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > > - char *errormsg = NULL; > > + /* Calculate the total number of packets in the batch after > > + * the segmentation. */ > > + n_packets = 0; > > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > > + if (dp_packet_hwol_is_tso(packet)) { > > + n_packets += dp_packet_gso_nr_segs(packet); > > + } else { > > + n_packets++; > > + } > > + } > > > > - if (netdev_send_prepare_packet(netdev->ol_flags, packet, > > &errormsg)) { > > - dp_packet_batch_refill(batch, packet, i); > > + if (!n_packets) { > > + return 0; > > + } > > + > > + /* Allocate enough batches to store all the packets in order. */ > > + n_batches = DIV_ROUND_UP(n_packets, NETDEV_MAX_BURST); > > + batches = xmalloc(n_batches * sizeof(struct dp_packet_batch)); > > Use 'sizeof *batches' or 'sizeof batches[0]' instead. > > > + size_t batch_pos = 0; > > + for (batch_pos = 0; batch_pos < n_batches; batch_pos++) { > > + dp_packet_batch_init(&batches[batch_pos]); > > + } > > + /* Do the packet segmentation if TSO is flagged. */ > > + size_t size = dp_packet_batch_size(batch); > > + size_t k; > > + batch_pos = 0; > > + DP_PACKET_BATCH_REFILL_FOR_EACH (k, size, packet, batch) { > > + if (dp_packet_hwol_is_tso(packet)) { > > + dp_packet_gso(packet, batches, &batch_pos); > > } else { > > - dp_packet_delete(packet); > > - COVERAGE_INC(netdev_send_prepare_drops); > > - VLOG_WARN_RL(&rl, "%s: Packet dropped: %s", > > - netdev_get_name(netdev), errormsg); > > - free(errormsg); > > + if (dp_packet_batch_is_full(&batches[batch_pos])) { > > + batch_pos++; > > + } > > + > > + dp_packet_batch_add(&batches[batch_pos], packet); > > + } > > + } > > + > > + for (batch_pos = 0; batch_pos < n_batches; batch_pos++) { > > + DP_PACKET_BATCH_FOR_EACH (i, packet, (&batches[batch_pos])) { > > You shouldn't need to parethesize the macro argument. We fixed that. > Fix the macro instead if it's still a problem. > > > + dp_packet_ol_send_prepare(packet, netdev->ol_flags); > > + } > > + > > + error = netdev->netdev_class->send(netdev, qid, > > &batches[batch_pos], > > + concurrent_txq); > > + if (!error) { > > + COVERAGE_INC(netdev_sent); > > } > > } > > + free(batches); > > + return 0; > > This function never returns an error, even if all the send() calls failed. > It should return an error even if one send() failed. > > > } > > > > /* Sends 'batch' on 'netdev'. Returns 0 if successful (for every packet), > > @@ -877,11 +885,21 @@ int > > netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, > > bool concurrent_txq) > > { > > + const uint64_t netdev_flags = netdev->ol_flags; > > + struct dp_packet *packet; > > int error; > > > > - netdev_send_prepare_batch(netdev, batch); > > - if (OVS_UNLIKELY(dp_packet_batch_is_empty(batch))) { > > - return 0; > > + if (userspace_tso_enabled() && > > + !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > > + if (dp_packet_hwol_is_tso(packet)) { > > + return netdev_send_tso(netdev, qid, batch, concurrent_txq); > > + } > > + } > > + } > > + > > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > > + dp_packet_ol_send_prepare(packet, netdev_flags); > > } > > > > error = netdev->netdev_class->send(netdev, qid, batch, concurrent_txq); > > diff --git a/lib/packets.c b/lib/packets.c > > index 462b51f92..dab823ba2 100644 > > --- a/lib/packets.c > > +++ b/lib/packets.c > > @@ -427,7 +427,7 @@ add_mpls(struct dp_packet *packet, ovs_be16 ethtype, > > ovs_be32 lse, > > } > > > > if (!l3_encap) { > > - struct mpls_hdr *header = dp_packet_push_uninit(packet, MPLS_HLEN); > > + struct mpls_hdr *header = dp_packet_resize_l2(packet, MPLS_HLEN); > > > > put_16aligned_be32(&header->mpls_lse, lse); > > packet->l2_5_ofs = 0; > > @@ -513,7 +513,7 @@ push_nsh(struct dp_packet *packet, const struct nsh_hdr > > *nsh_hdr_src) > > OVS_NOT_REACHED(); > > } > > > > - nsh = (struct nsh_hdr *) dp_packet_push_uninit(packet, length); > > + nsh = (struct nsh_hdr *) dp_packet_resize_l2(packet, length); > > memcpy(nsh, nsh_hdr_src, length); > > nsh->next_proto = next_proto; > > packet->packet_type = htonl(PT_NSH); > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev