From: Ilya Maximets <i.maxim...@ovn.org> On 11/24/22 06:30, Mike Pattrick wrote: > From: Flavio Leitner <f...@sysclose.org> > > The netdev receiving packets is supposed to provide the flags > indicating if the L4 checksum was verified and it is OK or BAD, > otherwise the stack will check when appropriate by software. > > If the packet comes with good checksum, then postpone the > checksum calculation to the egress device if needed. > > When encapsulate a packet with that flag, set the checksum > of the inner L4 header since that is not yet supported. > > Calculate the L4 checksum when the packet is going to be sent > over a device that doesn't support the feature. > > Linux tap devices allows enabling L3 and L4 offload, so this > patch enables the feature. However, Linux socket interface > remains disabled because the API doesn't allow enabling > those two features without enabling TSO too. > > Signed-off-by: Flavio Leitner <f...@sysclose.org> > Co-authored-by: Mike Pattrick <m...@redhat.com> > Signed-off-by: Mike Pattrick <m...@redhat.com> > ---
Didn't test this as well. Only visual review. Should we enable checksum offloading in CONFIGURE_VETH_OFFLOADS for check-system-userspace testsuite since support is enabled by default? More comments inline. Best regards, Ilya Maximets. > lib/conntrack.c | 15 +-- > lib/dp-packet.c | 25 ++++ > lib/dp-packet.h | 78 ++++++++++++- > lib/flow.c | 23 ++++ > lib/netdev-dpdk.c | 188 ++++++++++++++++++++---------- > lib/netdev-linux.c | 252 ++++++++++++++++++++++++++-------------- > lib/netdev-native-tnl.c | 32 +---- > lib/netdev.c | 46 ++------ > lib/packets.c | 175 ++++++++++++++++++++++------ > lib/packets.h | 3 + > 10 files changed, 580 insertions(+), 257 deletions(-) > > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 12194cce8..57e6a55e0 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -2118,13 +2118,12 @@ conn_key_extract(struct conntrack *ct, struct > dp_packet *pkt, ovs_be16 dl_type, > } > > if (ok) { > - bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt); > - if (!hwol_bad_l4_csum) { > - bool hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt) > - || dp_packet_hwol_tx_l4_checksum(pkt); > + if (!dp_packet_l4_checksum_bad(pkt)) { > /* Validate the checksum only when hwol is not supported. */ > if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt), > - &ctx->icmp_related, l3, !hwol_good_l4_csum, > + &ctx->icmp_related, l3, > + !dp_packet_l4_checksum_good(pkt) && > + !dp_packet_hwol_tx_l4_checksum(pkt), > NULL)) { > ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); > return true; > @@ -3453,8 +3452,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct > conn_lookup_ctx *ctx, > adj_seqnum(&th->tcp_seq, ec->seq_skew); > } > > - th->tcp_csum = 0; > - if (!dp_packet_hwol_tx_l4_checksum(pkt)) { > + if (dp_packet_hwol_tx_l4_checksum(pkt)) { > + dp_packet_ol_reset_l4_csum_good(pkt); > + } else { > + th->tcp_csum = 0; > if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) { > th->tcp_csum = packet_csum_upperlayer6(nh6, th, > ctx->key.nw_proto, > dp_packet_l4_size(pkt)); > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > index 90ef85de3..2cfaf5274 100644 > --- a/lib/dp-packet.c > +++ b/lib/dp-packet.c > @@ -38,6 +38,9 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, > enum dp_packet_source so > dp_packet_init_specific(b); > /* By default assume the packet type to be Ethernet. */ > b->packet_type = htonl(PT_ETH); > + /* Reset csum start and offset. */ > + b->csum_start = 0; > + b->csum_offset = 0; > } > > static void > @@ -544,4 +547,26 @@ dp_packet_ol_send_prepare(struct dp_packet *p, const > uint64_t flags) > dp_packet_ol_set_ip_csum_good(p); > dp_packet_hwol_reset_tx_ip_csum(p); > } > + > + if (dp_packet_l4_checksum_good(p) || !dp_packet_hwol_tx_l4_checksum(p)) { > + dp_packet_hwol_reset_tx_l4_csum(p); > + return; > + } > + > + if (dp_packet_hwol_l4_is_tcp(p) > + && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { > + packet_tcp_complete_csum(p); > + dp_packet_ol_set_l4_csum_good(p); > + dp_packet_hwol_reset_tx_l4_csum(p); > + } else if (dp_packet_hwol_l4_is_udp(p) > + && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { Indentation. > + packet_udp_complete_csum(p); > + dp_packet_ol_set_l4_csum_good(p); > + dp_packet_hwol_reset_tx_l4_csum(p); > + } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM) > + && dp_packet_hwol_l4_is_sctp(p)) { Indentation. > + packet_sctp_complete_csum(p); > + dp_packet_ol_set_l4_csum_good(p); > + dp_packet_hwol_reset_tx_l4_csum(p); > + } > } > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index f60618716..d550b099c 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -140,6 +140,8 @@ struct dp_packet { > or UINT16_MAX. */ > uint32_t cutlen; /* length in bytes to cut from the end. */ > ovs_be32 packet_type; /* Packet type as defined in OpenFlow */ > + uint16_t csum_start; /* Position to start checksumming from. */ > + uint16_t csum_offset; /* Offset to place checksum. */ > union { > struct pkt_metadata md; > uint64_t data[DP_PACKET_CONTEXT_SIZE / 8]; > @@ -995,6 +997,13 @@ dp_packet_hwol_is_ipv4(const struct dp_packet *b) > return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4); > } > > +/* Returns 'true' if packet 'p' is marked as IPv6. */ > +static inline bool > +dp_packet_hwol_tx_ipv6(const struct dp_packet *p) > +{ > + return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6); > +} > + > /* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */ > static inline bool > dp_packet_hwol_l4_is_tcp(const struct dp_packet *b) > @@ -1019,18 +1028,26 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b) > DP_PACKET_OL_TX_SCTP_CKSUM; > } > > -/* Mark packet 'b' for IPv4 checksum offloading. */ > static inline void > -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b) > +dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_L4_MASK; > +} > + > +/* Mark packet 'p' as IPv4. */ > +static inline void > +dp_packet_hwol_set_tx_ipv4(struct dp_packet *p) > { > - *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4; > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6; > + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4; > } > > -/* Mark packet 'b' for IPv6 checksum offloading. */ > +/* Mark packet 'a' as IPv6. */ > static inline void > -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b) > +dp_packet_hwol_set_tx_ipv6(struct dp_packet *a) > { > - *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6; > + *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_IPV4; > + *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6; > } > > /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */ > @@ -1129,6 +1146,8 @@ dp_packet_ip_set_header_csum(struct dp_packet *p) > ip->ip_csum = csum(ip, sizeof *ip); > } > > +/* Returns 'true' if the packet 'p' has good integrity and the > + * checksum in it is correct. */ Should be in a previous patch? > static inline bool > dp_packet_l4_checksum_good(const struct dp_packet *p) > { > @@ -1143,6 +1162,53 @@ dp_packet_l4_checksum_bad(const struct dp_packet *p) > DP_PACKET_OL_RX_L4_CKSUM_BAD; > } > > +/* Returns 'true' if the packet has good integrity though the > + * checksum in the packet 'p' is not complete. */ > +static inline bool > +dp_packet_ol_l4_csum_partial(const struct dp_packet *p) > +{ > + return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) == > + DP_PACKET_OL_RX_L4_CKSUM_MASK; > +} > + > +/* Marks packet 'p' with good integrity though the checksum in the > + * packet is not complete. */ > +static inline void > +dp_packet_ol_set_l4_csum_partial(const struct dp_packet *p) s/const// > +{ > + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_MASK; > +} > + > +/* Marks packet 'p' with good L4 checksum. */ > +static inline void > +dp_packet_ol_set_l4_csum_good(const struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_BAD; > + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_GOOD; > +} > + > +/* Marks packet 'p' with good L4 checksum as modified. */ > +static inline void > +dp_packet_ol_reset_l4_csum_good(const struct dp_packet *p) > +{ > + if (!dp_packet_ol_l4_csum_partial(p)) { > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_GOOD; > + } > +} > + > +/* Marks packet 'p' with good integrity if the 'start' and 'offset' > + * matches with the 'csum_start' and 'csum_offset' in packet 'p'. > + * The 'start' is the offset from the begin of the packet headers. > + * The 'offset' is the offset from start to place the checksum. */ > +static inline void > +dp_packet_ol_vnet_csum_check(const struct dp_packet *p, uint16_t start, 'vnet' part looks strange here. Unclear what it supposed to mean. > + uint16_t offset) > +{ > + if (p->csum_start == start && p->csum_offset == offset) { > + dp_packet_ol_set_l4_csum_partial(p); > + } > +} > + > static inline void ALWAYS_INLINE > dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet) > { > diff --git a/lib/flow.c b/lib/flow.c > index 6c8bf7fc0..5aaf3b420 100644 > --- a/lib/flow.c > +++ b/lib/flow.c > @@ -1027,6 +1027,13 @@ miniflow_extract(struct dp_packet *packet, struct > miniflow *dst) > } else if (dl_type == htons(ETH_TYPE_IPV6)) { > dp_packet_update_rss_hash_ipv6_tcp_udp(packet); > } > + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, > + offsetof(struct tcp_header, > + tcp_csum)); > + if (dp_packet_l4_checksum_good(packet) > + || dp_packet_ol_l4_csum_partial(packet)) { > + dp_packet_hwol_set_csum_tcp(packet); > + } > } > } > } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) { > @@ -1042,6 +1049,13 @@ miniflow_extract(struct dp_packet *packet, struct > miniflow *dst) > } else if (dl_type == htons(ETH_TYPE_IPV6)) { > dp_packet_update_rss_hash_ipv6_tcp_udp(packet); > } > + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, > + offsetof(struct udp_header, > + udp_csum)); > + if (dp_packet_l4_checksum_good(packet) > + || dp_packet_ol_l4_csum_partial(packet)) { > + dp_packet_hwol_set_csum_udp(packet); > + } > } > } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) { > if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) { > @@ -1051,6 +1065,13 @@ miniflow_extract(struct dp_packet *packet, struct > miniflow *dst) > miniflow_push_be16(mf, tp_dst, sctp->sctp_dst); > miniflow_push_be16(mf, ct_tp_src, ct_tp_src); > miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); > + dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs, > + offsetof(struct sctp_header, > + sctp_csum)); > + if (dp_packet_l4_checksum_good(packet) > + || dp_packet_ol_l4_csum_partial(packet)) { > + dp_packet_hwol_set_csum_sctp(packet); > + } avx512 implementation changes also needed, AFAIU. > } > } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) { > if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) { > @@ -3170,6 +3191,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct > flow *flow, > tcp->tcp_csum = 0; > tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum, > tcp, l4_len)); > + dp_packet_ol_set_l4_csum_good(p); > } else if (flow->nw_proto == IPPROTO_UDP) { > struct udp_header *udp = dp_packet_l4(p); > > @@ -3179,6 +3201,7 @@ flow_compose_l4_csum(struct dp_packet *p, const struct > flow *flow, > if (!udp->udp_csum) { > udp->udp_csum = htons(0xffff); > } > + dp_packet_ol_set_l4_csum_good(p); > } else if (flow->nw_proto == IPPROTO_ICMP) { > struct icmp_header *icmp = dp_packet_l4(p); > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 4ccc56b0e..d36d5a75a 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -146,17 +146,6 @@ typedef uint16_t dpdk_port_t; > > #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) > > -/* List of required flags advertised by the hardware that will be used > - * if TSO is enabled. Ideally this should include > - * RTE_ETH_TX_OFFLOAD_SCTP_CKSUM. However, very few drivers support that > - * at the moment and SCTP is not a widely used protocol like TCP and UDP, > - * so it's optional. */ > -#define DPDK_TX_TSO_OFFLOAD_FLAGS (RTE_ETH_TX_OFFLOAD_TCP_TSO \ > - | RTE_ETH_TX_OFFLOAD_TCP_CKSUM \ > - | RTE_ETH_TX_OFFLOAD_UDP_CKSUM \ > - | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) > - > - > static const struct rte_eth_conf port_conf = { > .rxmode = { > .split_hdr_size = 0, > @@ -407,8 +396,10 @@ enum dpdk_hw_ol_features { > NETDEV_RX_HW_CRC_STRIP = 1 << 1, > NETDEV_RX_HW_SCATTER = 1 << 2, > NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3, > - NETDEV_TX_TSO_OFFLOAD = 1 << 4, > - NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5, > + NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4, > + NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5, > + NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6, > + NETDEV_TX_TSO_OFFLOAD = 1 << 7, > }; > > /* > @@ -1004,6 +995,35 @@ dpdk_watchdog(void *dummy OVS_UNUSED) > return NULL; > } > > +static void > +netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev, > + enum dpdk_hw_ol_features hw_ol_features, > + enum netdev_ol_flags flag) > +{ > + struct netdev *netdev = &dev->up; > + > + if (dev->hw_ol_features & hw_ol_features) { > + netdev->ol_flags |= flag; > + } else { > + netdev->ol_flags &= ~flag; > + } > +} > + > +static void > +netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev) > +{ > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_IPV4_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_TCP_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_UDP_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD, > + NETDEV_TX_OFFLOAD_SCTP_CKSUM); > + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD, > + NETDEV_TX_OFFLOAD_TCP_TSO); > +} > + > static int > dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) > { > @@ -1040,11 +1060,20 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int > n_rxq, int n_txq) > conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; > } > > + if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM; > + } > + > + if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) { > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; > + } > + > if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { > - conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; > - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > - conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM; > - } > + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; > } > > /* Limit configured rss hash functions to only those supported > @@ -1150,7 +1179,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > struct rte_ether_addr eth_addr; > int diag; > int n_rxq, n_txq; > - uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS; > uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM | > RTE_ETH_RX_OFFLOAD_TCP_CKSUM | > RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; > @@ -1186,18 +1214,28 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) > dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD; > } > > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD; > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD; > + } > + > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { > + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + } else { > + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + } > + > dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; > if (userspace_tso_enabled()) { > - if ((info.tx_offload_capa & tx_tso_offload_capa) > - == tx_tso_offload_capa) { > + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) { > dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > - if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) { > - dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; > - } else { > - VLOG_WARN("%s: Tx SCTP checksum offload is not supported, " > - "SCTP packets sent to this device will be dropped", > - netdev_get_name(&dev->up)); > - } > } else { > VLOG_WARN("%s: Tx TSO offload is not supported.", > netdev_get_name(&dev->up)); > @@ -1759,6 +1797,9 @@ netdev_dpdk_get_config(const struct netdev *netdev, > struct smap *args) > smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false"); > HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD); > HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD); > + HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD); > + HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD); > + HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD); Probably, should not be here. See the comments for the previous patch. > HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD); > #undef HWOL_SMAP_ADD > smap_add(args, "lsc_interrupt_mode", > @@ -2210,6 +2251,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, > struct rte_mbuf *mbuf) > > mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt); > mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt); > + mbuf->l4_len = 0; > mbuf->outer_l2_len = 0; > mbuf->outer_l3_len = 0; > > @@ -3968,6 +4010,7 @@ new_device(int vid) > ovs_mutex_lock(&dev->mutex); > if (nullable_string_is_equal(ifname, dev->vhost_id)) { > uint32_t qp_num = rte_vhost_get_vring_num(vid) / VIRTIO_QNUM; > + uint64_t features; > > /* Get NUMA information */ > newnode = rte_vhost_get_numa_node(vid); > @@ -3992,6 +4035,36 @@ new_device(int vid) > dev->vhost_reconfigured = true; > } > > + if (rte_vhost_get_negotiated_features(vid, &features)) { > + VLOG_INFO("Error checking guest features for " > + "vHost Device '%s'", dev->vhost_id); > + } else { > + if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { > + dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD; > + dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD; > + dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + } > + > + if (userspace_tso_enabled()) { > + if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4) > + && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) { > + > + dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > + VLOG_DBG("%s: TSO enabled on vhost port", > + netdev_get_name(&dev->up)); > + } else { > + VLOG_WARN("%s: Tx TSO offload is not supported.", > + netdev_get_name(&dev->up)); > + } > + } > + } > + > + /* There is no support in virtio net to offload IPv4 csum, > + * but the vhost library handles IPv4 csum offloading fine. */ > + dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD; > + > + netdev_dpdk_update_netdev_flags(dev); > + > ovsrcu_index_set(&dev->vid, vid); > exists = true; > > @@ -4055,6 +4128,14 @@ destroy_device(int vid) > dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled); > netdev_dpdk_txq_map_clear(dev); > > + /* Clear offload capabilities before next new_device. */ > + dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD; > + dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD; > + dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD; > + dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; > + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; > + netdev_dpdk_update_netdev_flags(dev); > + > netdev_change_seq_changed(&dev->up); > ovs_mutex_unlock(&dev->mutex); > exists = true; > @@ -4992,22 +5073,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) > } > > err = dpdk_eth_dev_init(dev); > - > - if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) { > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - } else { > - netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - } > - > - if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > - } > - } > + netdev_dpdk_update_netdev_flags(dev); > > /* If both requested and actual hwaddr were previously > * unset (initialized to 0), then first device init above > @@ -5049,11 +5115,6 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) > dev->tx_q[0].map = 0; > } > > - if (userspace_tso_enabled()) { > - dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; > - VLOG_DBG("%s: TSO enabled on vhost port", netdev_get_name(&dev->up)); > - } > - > netdev_dpdk_remap_txqs(dev); > > if (netdev_dpdk_get_vid(dev) >= 0) { > @@ -5074,6 +5135,8 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) > } > } > > + netdev_dpdk_update_netdev_flags(dev); > + > return 0; > } > > @@ -5095,8 +5158,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev > *netdev) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > int err; > - uint64_t vhost_flags = 0; > - uint64_t vhost_unsup_flags; > > ovs_mutex_lock(&dev->mutex); > > @@ -5106,6 +5167,9 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev > *netdev) > * 2. A path has been specified. > */ > if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) && dev->vhost_id) > { > + uint64_t virtio_unsup_features = 0; > + uint64_t vhost_flags = 0; > + > /* Register client-mode device. */ > vhost_flags |= RTE_VHOST_USER_CLIENT; > > @@ -5149,22 +5213,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev > *netdev) > } > > if (userspace_tso_enabled()) { > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > - netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN > - | 1ULL << VIRTIO_NET_F_HOST_UFO; > + virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_ECN > + | 1ULL << VIRTIO_NET_F_HOST_UFO; > + VLOG_DBG("%s: TSO enabled on vhost port", > + netdev_get_name(&dev->up)); > } else { > - /* This disables checksum offloading and all the features > - * that depends on it (TSO, UFO, ECN) according to virtio > - * specification. */ > - vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM; > + /* Advertise checksum offloading to the guest, but explicitly > + * disable TSO and friends. > + * NOTE: we can't disable HOST_ECN which may have been wrongly > + * negotiated by a running guest. */ > + virtio_unsup_features = 1ULL << VIRTIO_NET_F_HOST_TSO4 > + | 1ULL << VIRTIO_NET_F_HOST_TSO6 > + | 1ULL << VIRTIO_NET_F_HOST_UFO; > } > > err = rte_vhost_driver_disable_features(dev->vhost_id, > - vhost_unsup_flags); > + virtio_unsup_features); > if (err) { > VLOG_ERR("rte_vhost_driver_disable_features failed for " > "vhost user client port: %s\n", dev->up.name); > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index 59e8dc0ae..4d8ebdae5 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -938,14 +938,6 @@ netdev_linux_common_construct(struct netdev *netdev_) > netnsid_unset(&netdev->netnsid); > ovs_mutex_init(&netdev->mutex); > > - if (userspace_tso_enabled()) { > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > - netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > - } > - > return 0; > } > > @@ -959,6 +951,16 @@ netdev_linux_construct(struct netdev *netdev_) > return error; > } > > + /* The socket interface doesn't offer the option to enable only > + * csum offloading without TSO. */ > + if (userspace_tso_enabled()) { > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; > + } > + > error = get_flags(&netdev->up, &netdev->ifi_flags); > if (error == ENODEV) { > if (netdev->up.netdev_class != &netdev_internal_class) { > @@ -987,6 +989,7 @@ netdev_linux_construct_tap(struct netdev *netdev_) > struct netdev_linux *netdev = netdev_linux_cast(netdev_); > static const char tap_dev[] = "/dev/net/tun"; > const char *name = netdev_->name; > + unsigned long oflags; > struct ifreq ifr; > > int error = netdev_linux_common_construct(netdev_); > @@ -1004,10 +1007,7 @@ netdev_linux_construct_tap(struct netdev *netdev_) > > /* Create tap device. */ > get_flags(&netdev->up, &netdev->ifi_flags); > - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; > - if (userspace_tso_enabled()) { > - ifr.ifr_flags |= IFF_VNET_HDR; > - } > + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; > > ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name); > if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) { > @@ -1030,21 +1030,22 @@ netdev_linux_construct_tap(struct netdev *netdev_) > goto error_close; > } > > + oflags = TUN_F_CSUM; > if (userspace_tso_enabled()) { > - /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is > - * available, it will return EINVAL when a flag is unknown. > - * Therefore, try enabling offload with no flags to check > - * if TUNSETOFFLOAD support is available or not. */ > - if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) > { > - unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; > - > - if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) { > - VLOG_WARN("%s: enabling tap offloading failed: %s", name, > - ovs_strerror(errno)); > - error = errno; > - goto error_close; > - } > - } > + oflags |= (TUN_F_TSO4 | TUN_F_TSO6); > + } > + > + if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) { > + netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM > + | NETDEV_TX_OFFLOAD_TCP_CKSUM > + | NETDEV_TX_OFFLOAD_UDP_CKSUM); > + > + if (userspace_tso_enabled()) { > + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; > + } > + } else { > + VLOG_WARN("%s: Disabling hardware offloading: %s", name, > + ovs_strerror(errno)); > } > > netdev->present = true; > @@ -1344,18 +1345,22 @@ netdev_linux_batch_rxq_recv_sock(struct > netdev_rxq_linux *rx, int mtu, > pkt = buffers[i]; > } > > - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { > - struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); > - struct netdev_linux *netdev = netdev_linux_cast(netdev_); > + if (virtio_net_hdr_size) { > + int ret = netdev_linux_parse_vnet_hdr(pkt); > + if (OVS_UNLIKELY(ret)) { > + struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); > + struct netdev_linux *netdev = netdev_linux_cast(netdev_); > > - /* Unexpected error situation: the virtio header is not present > - * or corrupted. Drop the packet but continue in case next ones > - * are correct. */ > - dp_packet_delete(pkt); > - netdev->rx_dropped += 1; > - VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net > header", > - netdev_get_name(netdev_)); > - continue; > + /* Unexpected error situation: the virtio header is not > + * present or corrupted or contains unsupported features. > + * Drop the packet but continue in case next ones are > + * correct. */ > + dp_packet_delete(pkt); > + netdev->rx_dropped += 1; > + VLOG_WARN_RL(&rl, "%s: Dropped packet: %s", > + netdev_get_name(netdev_), ovs_strerror(ret)); > + continue; > + } > } > > for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg; > @@ -1403,7 +1408,6 @@ static int > netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, > struct dp_packet_batch *batch) > { > - int virtio_net_hdr_size; > ssize_t retval; > size_t std_len; > int iovlen; > @@ -1413,16 +1417,14 @@ netdev_linux_batch_rxq_recv_tap(struct > netdev_rxq_linux *rx, int mtu, > /* Use the buffer from the allocated packet below to receive MTU > * sized packets and an aux_buf for extra TSO data. */ > iovlen = IOV_TSO_SIZE; > - virtio_net_hdr_size = sizeof(struct virtio_net_hdr); > } else { > /* Use only the buffer from the allocated packet. */ > iovlen = IOV_STD_SIZE; > - virtio_net_hdr_size = 0; > } > > /* The length here needs to be accounted in the same way when the > * aux_buf is allocated so that it can be prepended to TSO buffer. */ > - std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu; > + std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + mtu; > for (i = 0; i < NETDEV_MAX_BURST; i++) { > struct dp_packet *buffer; > struct dp_packet *pkt; > @@ -1462,7 +1464,7 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux > *rx, int mtu, > pkt = buffer; > } > > - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { > + if (netdev_linux_parse_vnet_hdr(pkt)) { > struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); > struct netdev_linux *netdev = netdev_linux_cast(netdev_); > > @@ -1611,7 +1613,7 @@ netdev_linux_sock_batch_send(int sock, int ifindex, > bool tso, int mtu, > * on other interface types because we attach a socket filter to the rx > * socket. */ > static int > -netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu, > +netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu, > struct dp_packet_batch *batch) > { > struct netdev_linux *netdev = netdev_linux_cast(netdev_); > @@ -1632,9 +1634,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, > bool tso, int mtu, > ssize_t retval; > int error; > > - if (tso) { > - netdev_linux_prepend_vnet_hdr(packet, mtu); > - } > + netdev_linux_prepend_vnet_hdr(packet, mtu); > > size = dp_packet_size(packet); > do { > @@ -1765,7 +1765,7 @@ netdev_linux_send(struct netdev *netdev_, int qid > OVS_UNUSED, > > error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch); > } else { > - error = netdev_linux_tap_batch_send(netdev_, tso, mtu, batch); > + error = netdev_linux_tap_batch_send(netdev_, mtu, batch); > } > if (error) { > if (error == ENOBUFS) { > @@ -6819,53 +6819,73 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t > *l4proto) > return 0; > } > > +/* Initializes packet 'b' with features enabled in the prepended > + * struct virtio_net_hdr. Returns 0 if successful, otherwise a > + * positive errno value. */ > static int > netdev_linux_parse_vnet_hdr(struct dp_packet *b) > { > struct virtio_net_hdr *vnet = dp_packet_pull(b, sizeof *vnet); > - uint16_t l4proto = 0; > > if (OVS_UNLIKELY(!vnet)) { > - return -EINVAL; > + return EINVAL; > } > > if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) { > return 0; > } > > - if (netdev_linux_parse_l2(b, &l4proto)) { > - return -EINVAL; > - } > - > if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { > - if (l4proto == IPPROTO_TCP) { > - dp_packet_hwol_set_csum_tcp(b); > - } else if (l4proto == IPPROTO_UDP) { > - dp_packet_hwol_set_csum_udp(b); > - } else if (l4proto == IPPROTO_SCTP) { > - dp_packet_hwol_set_csum_sctp(b); > - } > - } > + uint16_t l4proto = 0; > > - if (l4proto && vnet->gso_type != VIRTIO_NET_HDR_GSO_NONE) { > - uint8_t allowed_mask = VIRTIO_NET_HDR_GSO_TCPV4 > - | VIRTIO_NET_HDR_GSO_TCPV6 > - | VIRTIO_NET_HDR_GSO_UDP; > - uint8_t type = vnet->gso_type & allowed_mask; > - > - if (type == VIRTIO_NET_HDR_GSO_TCPV4 > - || type == VIRTIO_NET_HDR_GSO_TCPV6) { > - dp_packet_hwol_set_tcp_seg(b); > + if (netdev_linux_parse_l2(b, &l4proto)) { > + return EINVAL; > } > - } > > - return 0; > + if (l4proto == IPPROTO_UDP) { > + dp_packet_hwol_set_csum_udp(b); > + } > + /* The packet has offloaded checksum. However, there is no > + * additional information like the protocol used, so it would > + * require to parse the packet here. The checksum starting point > + * and offset are going to be verified when the packet headers > + * are parsed during miniflow extraction. */> + b->csum_start > = (OVS_FORCE uint16_t) vnet->csum_start; > + b->csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset; > + } else { > + b->csum_start = 0; > + b->csum_offset = 0; > + } > + > + int ret = 0; > + switch (vnet->gso_type) { > + case VIRTIO_NET_HDR_GSO_TCPV4: > + case VIRTIO_NET_HDR_GSO_TCPV6: > + /* FIXME: The packet has offloaded TCP segmentation. The gso_size > + * is given and needs to be respected. */ > + dp_packet_hwol_set_tcp_seg(b); > + break; An empty line should separate cases. > + case VIRTIO_NET_HDR_GSO_UDP: > + /* UFO is not supported. */ > + VLOG_WARN_RL(&rl, "Received an unsupported packet with UFO > enabled."); > + ret = ENOTSUP; > + break; > + case VIRTIO_NET_HDR_GSO_NONE: > + break; > + default: > + ret = ENOTSUP; > + VLOG_WARN_RL(&rl, "Received an unsupported packet with GSO type: > 0x%x", > + vnet->gso_type); > + } > + > + return ret; > } > > static void > netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) > { > - struct virtio_net_hdr *vnet = dp_packet_push_zeros(b, sizeof *vnet); > + struct virtio_net_hdr v; > + struct virtio_net_hdr *vnet = &v; > > if (dp_packet_hwol_is_tso(b)) { > uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char > *)dp_packet_eth(b)) > @@ -6875,30 +6895,92 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, > int mtu) > vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len); > if (dp_packet_hwol_is_ipv4(b)) { > vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; > - } else { > + } else if (dp_packet_hwol_tx_ipv6(b)) { > vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; > } > > } else { > - vnet->flags = VIRTIO_NET_HDR_GSO_NONE; > - } > - > - if (dp_packet_hwol_l4_mask(b)) { > - vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; > - vnet->csum_start = (OVS_FORCE __virtio16)((char *)dp_packet_l4(b) > - - (char > *)dp_packet_eth(b)); > - > + vnet->hdr_len = 0; > + vnet->gso_size = 0; > + vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE; > + } > + > + if (dp_packet_l4_checksum_good(b)) { > + /* The packet has good checksum in the packet. 'in the header' ? > + * No need to validate again. */ > + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; > + vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID; > + } else if (dp_packet_hwol_tx_l4_checksum(b)) { > + /* The csum calculation is offloaded. */ > if (dp_packet_hwol_l4_is_tcp(b)) { > + /* Virtual I/O Device (VIRTIO) Version 1.1 > + * 5.1.6.2 Packet Transmission > + If the driver negotiated VIRTIO_NET_F_CSUM, it can skip > + checksumming the packet: > + - flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set, > + - csum_start is set to the offset within the packet > + to begin checksumming, and > + - csum_offset indicates how many bytes after the > + csum_start the new (16 bit ones complement) checksum > + is placed by the device. > + The TCP checksum field in the packet is set to the sum of > + the TCP pseudo header, so that replacing it by the ones > + complement checksum of the TCP header and body will give > + the correct result. */ Comment style is strange. > + > + struct tcp_header *tcp_hdr = dp_packet_l4(b); > + ovs_be16 csum = 0; > + if (dp_packet_hwol_is_ipv4(b)) { > + const struct ip_header *ip_hdr = dp_packet_l3(b); > + csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr)); > + } else if (dp_packet_hwol_tx_ipv6(b)) { > + const struct ovs_16aligned_ip6_hdr *ip6_hdr = > dp_packet_l3(b); > + csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr)); > + } > + > + tcp_hdr->tcp_csum = csum; > + vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; > + vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs; > vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( > struct tcp_header, tcp_csum); > } else if (dp_packet_hwol_l4_is_udp(b)) { > + struct udp_header *udp_hdr = dp_packet_l4(b); > + ovs_be16 csum = 0; > + > + if (dp_packet_hwol_is_ipv4(b)) { > + const struct ip_header *ip_hdr = dp_packet_l3(b); > + csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr)); > + } else if (dp_packet_hwol_tx_ipv6(b)) { > + const struct ovs_16aligned_ip6_hdr *ip6_hdr = > dp_packet_l3(b); > + csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr)); > + } > + > + udp_hdr->udp_csum = csum; > + vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; > + vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs; > vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( I wonder why we're using __builtin_offsetof() instead of just offsetof(). Not an issue of this patch though. > struct udp_header, udp_csum); > } else if (dp_packet_hwol_l4_is_sctp(b)) { > - vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof( > - struct sctp_header, sctp_csum); > + /* The Linux kernel networking stack only supports csum_start > + * and csum_offset when SCTP GSO is enabled. See kernel's > + * skb_csum_hwoffload_help(). Currently there is no SCTP > + * segmentation offload support in OVS. */ > + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; > + vnet->flags = 0; > } else { > - VLOG_WARN_RL(&rl, "Unsupported L4 protocol"); > + /* This should only happen when DP_PACKET_OL_TX_L4_MASK includes > + * a new flag that is not covered in above checks. */ > + VLOG_WARN_RL(&rl, "Unsupported L4 checksum offload. " > + "Flags: %"PRIu64, > + (uint64_t)*dp_packet_ol_flags_ptr(b)); > + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; > + vnet->flags = 0; > } > + } else { > + /* Packet L4 csum is unknown. */ > + vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0; > + vnet->flags = 0; > } > + > + dp_packet_push(b, vnet, sizeof *vnet); > } > diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c > index 754e2d78d..dc054336a 100644 > --- a/lib/netdev-native-tnl.c > +++ b/lib/netdev-native-tnl.c > @@ -224,28 +224,6 @@ udp_extract_tnl_md(struct dp_packet *packet, struct > flow_tnl *tnl, > return udp + 1; > } > > -static void > -netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet, > - int ip_tot_size) > -{ > - uint32_t csum; > - > - if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) { > - csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr( > - dp_packet_data(packet))); > - } else { > - csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr( > - dp_packet_data(packet))); > - } > - > - csum = csum_continue(csum, udp, ip_tot_size); > - udp->udp_csum = csum_finish(csum); > - > - if (!udp->udp_csum) { > - udp->udp_csum = htons(0xffff); > - } > -} > - > void > netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, > struct dp_packet *packet, > @@ -260,9 +238,9 @@ netdev_tnl_push_udp_header(const struct netdev *netdev > OVS_UNUSED, > udp->udp_src = netdev_tnl_get_src_port(packet); > udp->udp_len = htons(ip_tot_size); > > - if (udp->udp_csum) { > - netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); > - } > + /* Postpone checksum to the egress netdev. */ > + dp_packet_hwol_set_csum_udp(packet); > + dp_packet_ol_reset_l4_csum_good(packet); > } > > static void * > @@ -806,7 +784,9 @@ netdev_gtpu_push_header(const struct netdev *netdev, > data->header_len, &ip_tot_size); > udp->udp_src = netdev_tnl_get_src_port(packet); > udp->udp_len = htons(ip_tot_size); > - netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); > + /* Postpone checksum to the egress netdev. */ > + dp_packet_hwol_set_csum_udp(packet); > + dp_packet_ol_reset_l4_csum_good(packet); > > gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1); > > diff --git a/lib/netdev.c b/lib/netdev.c > index 6d3f678f0..12e1cb948 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -798,8 +798,6 @@ static bool > netdev_send_prepare_packet(const uint64_t netdev_flags, > struct dp_packet *packet, char **errormsg) > { > - uint64_t l4_mask; > - > if (dp_packet_hwol_is_tso(packet) > && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > /* Fall back to GSO in software. */ > @@ -812,36 +810,16 @@ netdev_send_prepare_packet(const uint64_t netdev_flags, > * netdev to decide what would be the best to do. > * Provide a software fallback in case the device doesn't support IP csum > * offloading. Note: Encapsulated packet must have the inner IP header > + * csum already calculated. > + * Packet with L4 csum offloading enabled was received with verified > csum. > + * Leave the L4 csum offloading enabled even with good checksum for the > + * netdev to decide what would be the best to do. > + * Netdev that requires pseudo header csum needs to calculate that. > + * Provide a software fallback in case the netdev doesn't support L4 csum > + * offloading. Note: Encapsulated packet must have the inner L4 header > * csum already calculated. */ > dp_packet_ol_send_prepare(packet, netdev_flags); > > - l4_mask = dp_packet_hwol_l4_mask(packet); > - if (l4_mask) { > - if (dp_packet_hwol_l4_is_tcp(packet)) { > - if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { > - /* Fall back to TCP csum in software. */ > - VLOG_ERR_BUF(errormsg, "No TCP checksum support"); > - return false; > - } > - } else if (dp_packet_hwol_l4_is_udp(packet)) { > - if (!(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { > - /* Fall back to UDP csum in software. */ > - VLOG_ERR_BUF(errormsg, "No UDP checksum support"); > - return false; > - } > - } else if (dp_packet_hwol_l4_is_sctp(packet)) { > - if (!(netdev_flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)) { > - /* Fall back to SCTP csum in software. */ > - VLOG_ERR_BUF(errormsg, "No SCTP checksum support"); > - return false; > - } > - } else { > - VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64, > - l4_mask); > - return false; > - } > - } > - > return true; > } > > @@ -974,20 +952,16 @@ netdev_push_header(const struct netdev *netdev, > size_t i, size = dp_packet_batch_size(batch); > > DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet) > - || dp_packet_hwol_l4_mask(packet))) { > + if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet))) { > COVERAGE_INC(netdev_push_header_drops); > dp_packet_delete(packet); > - VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags > is " > + VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO offloading is " TSO already contains the word 'offloading'. > "not supported: packet dropped", > netdev_get_name(netdev)); > } else { > /* The packet is going to be encapsulated and there is > * no support yet for inner network header csum offloading. */ > - if (dp_packet_hwol_tx_ip_csum(packet) > - && !dp_packet_ip_checksum_good(packet)) { > - dp_packet_ip_set_header_csum(packet); > - } > + dp_packet_ol_send_prepare(packet, 0); > > netdev->netdev_class->push_header(netdev, packet, data); > > diff --git a/lib/packets.c b/lib/packets.c > index a1d668190..8c69e6e3e 100644 > --- a/lib/packets.c > +++ b/lib/packets.c > @@ -1131,16 +1131,22 @@ packet_set_ipv4_addr(struct dp_packet *packet, > pkt_metadata_init_conn(&packet->md); > > if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { > - struct tcp_header *th = dp_packet_l4(packet); > - > - th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); > + if (dp_packet_hwol_l4_is_tcp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + struct tcp_header *th = dp_packet_l4(packet); > + th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); > + } > } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) { > - struct udp_header *uh = dp_packet_l4(packet); > - > - if (uh->udp_csum) { > - uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); > - if (!uh->udp_csum) { > - uh->udp_csum = htons(0xffff); > + if (dp_packet_hwol_l4_is_udp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + struct udp_header *uh = dp_packet_l4(packet); > + if (uh->udp_csum) { > + uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, > new_addr); > + if (!uh->udp_csum) { > + uh->udp_csum = htons(0xffff); > + } > } > } > } > @@ -1246,16 +1252,24 @@ packet_update_csum128(struct dp_packet *packet, > uint8_t proto, > size_t l4_size = dp_packet_l4_size(packet); > > if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { > - struct tcp_header *th = dp_packet_l4(packet); > + if (dp_packet_hwol_l4_is_tcp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + struct tcp_header *th = dp_packet_l4(packet); > > - th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); > + th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); > + } > } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) { > - struct udp_header *uh = dp_packet_l4(packet); > + if (dp_packet_hwol_l4_is_udp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + struct udp_header *uh = dp_packet_l4(packet); > > - if (uh->udp_csum) { > - uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); > - if (!uh->udp_csum) { > - uh->udp_csum = htons(0xffff); > + if (uh->udp_csum) { > + uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); > + if (!uh->udp_csum) { > + uh->udp_csum = htons(0xffff); > + } > } > } > } else if (proto == IPPROTO_ICMPV6 && > @@ -1375,7 +1389,9 @@ static void > packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) > { > if (*port != new_port) { > - *csum = recalc_csum16(*csum, *port, new_port); > + if (csum) { > + *csum = recalc_csum16(*csum, *port, new_port); > + } > *port = new_port; > } > } > @@ -1387,9 +1403,16 @@ void > packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) > { > struct tcp_header *th = dp_packet_l4(packet); > + ovs_be16 *csum = NULL; > + > + if (dp_packet_hwol_l4_is_tcp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + } else { > + csum = &th->tcp_csum; > + } > > - packet_set_port(&th->tcp_src, src, &th->tcp_csum); > - packet_set_port(&th->tcp_dst, dst, &th->tcp_csum); > + packet_set_port(&th->tcp_src, src, csum); > + packet_set_port(&th->tcp_dst, dst, csum); > pkt_metadata_init_conn(&packet->md); > } > > @@ -1401,17 +1424,21 @@ packet_set_udp_port(struct dp_packet *packet, > ovs_be16 src, ovs_be16 dst) > { > struct udp_header *uh = dp_packet_l4(packet); > > - if (uh->udp_csum) { > - packet_set_port(&uh->udp_src, src, &uh->udp_csum); > - packet_set_port(&uh->udp_dst, dst, &uh->udp_csum); > + if (dp_packet_hwol_l4_is_udp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + packet_set_port(&uh->udp_src, src, NULL); > + packet_set_port(&uh->udp_dst, dst, NULL); > + } else { > + ovs_be16 *csum = uh->udp_csum ? &uh->udp_csum : NULL; > + > + packet_set_port(&uh->udp_src, src, csum); > + packet_set_port(&uh->udp_dst, dst, csum); > > - if (!uh->udp_csum) { > + if (csum && !uh->udp_csum) { > uh->udp_csum = htons(0xffff); > } > - } else { > - uh->udp_src = src; > - uh->udp_dst = dst; > } > + > pkt_metadata_init_conn(&packet->md); > } > > @@ -1422,18 +1449,27 @@ void > packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) > { > struct sctp_header *sh = dp_packet_l4(packet); > - ovs_be32 old_csum, old_correct_csum, new_csum; > - uint16_t tp_len = dp_packet_l4_size(packet); > > - old_csum = get_16aligned_be32(&sh->sctp_csum); > - put_16aligned_be32(&sh->sctp_csum, 0); > - old_correct_csum = crc32c((void *)sh, tp_len); > + if (dp_packet_hwol_l4_is_sctp(packet)) { > + dp_packet_ol_reset_l4_csum_good(packet); > + sh->sctp_src = src; > + sh->sctp_dst = dst; > + } else { > + ovs_be32 old_csum, old_correct_csum, new_csum; > + uint16_t tp_len = dp_packet_l4_size(packet); > > - sh->sctp_src = src; > - sh->sctp_dst = dst; > + old_csum = get_16aligned_be32(&sh->sctp_csum); > + put_16aligned_be32(&sh->sctp_csum, 0); > + old_correct_csum = crc32c((void *) sh, tp_len); > + > + sh->sctp_src = src; > + sh->sctp_dst = dst; > + > + new_csum = crc32c((void *) sh, tp_len); > + put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum > + ^ new_csum); > + } > > - new_csum = crc32c((void *)sh, tp_len); > - put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ > new_csum); > pkt_metadata_init_conn(&packet->md); > } > > @@ -1957,3 +1993,72 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6) > } > } > } > + > +/* Set TCP checksum field in packet 'p' with complete checksum. > + * The packet must have the L3 and L4 offsets. */ > +void > +packet_tcp_complete_csum(struct dp_packet *p) > +{ > + struct tcp_header *tcp = dp_packet_l4(p); > + > + tcp->tcp_csum = 0; > + if (dp_packet_hwol_is_ipv4(p)) { > + struct ip_header *ip = dp_packet_l3(p); > + > + tcp->tcp_csum = > csum_finish(csum_continue(packet_csum_pseudoheader(ip), > + tcp, > dp_packet_l4_size(p))); > + } else if (dp_packet_hwol_tx_ipv6(p)) { > + struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p); > + > + tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt, > + dp_packet_l4_size(p)); > + } else { > + OVS_NOT_REACHED(); > + } > +} > + > +/* Set UDP checksum field in packet 'p' with complete checksum. > + * The packet must have the L3 and L4 offsets. */ > +void > +packet_udp_complete_csum(struct dp_packet *p) > +{ > + struct udp_header *udp = dp_packet_l4(p); > + > + /* Skip csum calculation if the udp_csum is zero. */ > + if (!udp->udp_csum) { > + return; > + } > + > + udp->udp_csum = 0; > + if (dp_packet_hwol_is_ipv4(p)) { > + struct ip_header *ip = dp_packet_l3(p); > + > + udp->udp_csum = > csum_finish(csum_continue(packet_csum_pseudoheader(ip), > + udp, > dp_packet_l4_size(p))); > + } else if (dp_packet_hwol_tx_ipv6(p)) { > + struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p); > + > + udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt, > + dp_packet_l4_size(p)); > + } else { > + OVS_NOT_REACHED(); > + } > + > + if (!udp->udp_csum) { > + udp->udp_csum = htons(0xffff); > + } > +} > + > +/* Set SCTP checksum field in packet 'p' with complete checksum. > + * The packet must have the L3 and L4 offsets. */ > +void > +packet_sctp_complete_csum(struct dp_packet *p) > +{ > + struct sctp_header *sh = dp_packet_l4(p); > + uint16_t tp_len = dp_packet_l4_size(p); > + ovs_be32 csum; > + > + put_16aligned_be32(&sh->sctp_csum, 0); > + csum = crc32c((void *) sh, tp_len); > + put_16aligned_be32(&sh->sctp_csum, csum); > +} > diff --git a/lib/packets.h b/lib/packets.h > index 5bdf6e4bb..28950b8b1 100644 > --- a/lib/packets.h > +++ b/lib/packets.h > @@ -1643,6 +1643,9 @@ void packet_put_ra_prefix_opt(struct dp_packet *, > const ovs_be128 router_prefix); > uint32_t packet_csum_pseudoheader(const struct ip_header *); > void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6); > +void packet_tcp_complete_csum(struct dp_packet *); > +void packet_udp_complete_csum(struct dp_packet *); > +void packet_sctp_complete_csum(struct dp_packet *); > > #define DNS_HEADER_LEN 12 > struct dns_header { _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev