> -----Original Message----- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Olivier Matz > Sent: Friday, November 14, 2014 5:03 PM > To: dev at dpdk.org > Cc: jigsaw at gmail.com > Subject: [dpdk-dev] [PATCH v2 09/13] mbuf: introduce new checksum API > > Introduce new functions to calculate checksums. These new functions > are derivated from the ones provided csumonly.c but slightly reworked. > There is still some room for future optimization of these functions > (maybe SSE/AVX, ...). > > This API will be modified in tbe next commits by the introduction of > TSO that requires a different pseudo header checksum to be set in the > packet. > > Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev at intel.com> Just 2 nits from me: 1) > +static inline uint16_t > +rte_raw_cksum(const char *buf, size_t len) > +{ ... > + while (len >= 8) { > + sum += u16[0]; sum += u16[1]; sum += u16[2]; sum += u16[3]; Can you put each expression into a new line? sum += u16[0]; sum += u16[1]; ... To make it easier to read. Or can it be rewritten just like: sum = (uint32_t)u16[0] + u16[1] + u16[2] + u16[3]; here? 2) > + while (len >= 8) { > + sum += u16[0]; sum += u16[1]; sum += u16[2]; sum += u16[3]; > + len -= 8; > + u16 += 4; > + } > + while (len >= 2) { > + sum += *u16; > + len -= 2; > + u16 += 1; > + } In the code above, probably use sizeof(u16[0]) wherever appropriate. To make things a bit more clearer and consistent. ... while (len >= 4 * sizeof(u16[0])) len -= 4 * sizeof(u16[0]); u16 += 4; ... Same for second loop. > --- > app/test-pmd/csumonly.c | 133 ++------------------------------- > lib/librte_mbuf/rte_mbuf.h | 3 +- > lib/librte_net/rte_ip.h | 179 > +++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 189 insertions(+), 126 deletions(-) > > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c > index dda5d9e..39f974d 100644 > --- a/app/test-pmd/csumonly.c > +++ b/app/test-pmd/csumonly.c > @@ -86,137 +86,22 @@ > #define _htons(x) (x) > #endif > > -static inline uint16_t > -get_16b_sum(uint16_t *ptr16, uint32_t nr) > -{ > - uint32_t sum = 0; > - while (nr > 1) > - { > - sum +=*ptr16; > - nr -= sizeof(uint16_t); > - ptr16++; > - if (sum > UINT16_MAX) > - sum -= UINT16_MAX; > - } > - > - /* If length is in odd bytes */ > - if (nr) > - sum += *((uint8_t*)ptr16); > - > - sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff); > - sum &= 0x0ffff; > - return (uint16_t)sum; > -} > - > -static inline uint16_t > -get_ipv4_cksum(struct ipv4_hdr *ipv4_hdr) > -{ > - uint16_t cksum; > - cksum = get_16b_sum((uint16_t*)ipv4_hdr, sizeof(struct ipv4_hdr)); > - return (uint16_t)((cksum == 0xffff)?cksum:~cksum); > -} > - > - > -static inline uint16_t > -get_ipv4_psd_sum(struct ipv4_hdr *ip_hdr) > -{ > - /* Pseudo Header for IPv4/UDP/TCP checksum */ > - union ipv4_psd_header { > - struct { > - uint32_t src_addr; /* IP address of source host. */ > - uint32_t dst_addr; /* IP address of destination > host(s). */ > - uint8_t zero; /* zero. */ > - uint8_t proto; /* L4 protocol type. */ > - uint16_t len; /* L4 length. */ > - } __attribute__((__packed__)); > - uint16_t u16_arr[0]; > - } psd_hdr; > - > - psd_hdr.src_addr = ip_hdr->src_addr; > - psd_hdr.dst_addr = ip_hdr->dst_addr; > - psd_hdr.zero = 0; > - psd_hdr.proto = ip_hdr->next_proto_id; > - psd_hdr.len = > rte_cpu_to_be_16((uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) > - - sizeof(struct ipv4_hdr))); > - return get_16b_sum(psd_hdr.u16_arr, sizeof(psd_hdr)); > -} > - > -static inline uint16_t > -get_ipv6_psd_sum(struct ipv6_hdr *ip_hdr) > -{ > - /* Pseudo Header for IPv6/UDP/TCP checksum */ > - union ipv6_psd_header { > - struct { > - uint8_t src_addr[16]; /* IP address of source host. */ > - uint8_t dst_addr[16]; /* IP address of destination > host(s). */ > - uint32_t len; /* L4 length. */ > - uint32_t proto; /* L4 protocol - top 3 bytes must > be zero */ > - } __attribute__((__packed__)); > - > - uint16_t u16_arr[0]; /* allow use as 16-bit values with safe > aliasing */ > - } psd_hdr; > - > - rte_memcpy(&psd_hdr.src_addr, ip_hdr->src_addr, > - sizeof(ip_hdr->src_addr) + sizeof(ip_hdr->dst_addr)); > - psd_hdr.len = ip_hdr->payload_len; > - psd_hdr.proto = (ip_hdr->proto << 24); > - > - return get_16b_sum(psd_hdr.u16_arr, sizeof(psd_hdr)); > -} > - > static uint16_t > get_psd_sum(void *l3_hdr, uint16_t ethertype) > { > if (ethertype == _htons(ETHER_TYPE_IPv4)) > - return get_ipv4_psd_sum(l3_hdr); > + return rte_ipv4_phdr_cksum(l3_hdr); > else /* assume ethertype == ETHER_TYPE_IPv6 */ > - return get_ipv6_psd_sum(l3_hdr); > -} > - > -static inline uint16_t > -get_ipv4_udptcp_checksum(struct ipv4_hdr *ipv4_hdr, uint16_t *l4_hdr) > -{ > - uint32_t cksum; > - uint32_t l4_len; > - > - l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - sizeof(struct > ipv4_hdr); > - > - cksum = get_16b_sum(l4_hdr, l4_len); > - cksum += get_ipv4_psd_sum(ipv4_hdr); > - > - cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); > - cksum = (~cksum) & 0xffff; > - if (cksum == 0) > - cksum = 0xffff; > - return (uint16_t)cksum; > -} > - > -static inline uint16_t > -get_ipv6_udptcp_checksum(struct ipv6_hdr *ipv6_hdr, uint16_t *l4_hdr) > -{ > - uint32_t cksum; > - uint32_t l4_len; > - > - l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len); > - > - cksum = get_16b_sum(l4_hdr, l4_len); > - cksum += get_ipv6_psd_sum(ipv6_hdr); > - > - cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); > - cksum = (~cksum) & 0xffff; > - if (cksum == 0) > - cksum = 0xffff; > - > - return (uint16_t)cksum; > + return rte_ipv6_phdr_cksum(l3_hdr); > } > > static uint16_t > get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) > { > if (ethertype == _htons(ETHER_TYPE_IPv4)) > - return get_ipv4_udptcp_checksum(l3_hdr, l4_hdr); > + return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); > else /* assume ethertype == ETHER_TYPE_IPv6 */ > - return get_ipv6_udptcp_checksum(l3_hdr, l4_hdr); > + return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr); > } > > /* > @@ -294,7 +179,7 @@ process_inner_cksums(void *l3_hdr, uint16_t ethertype, > uint16_t l3_len, > if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_IP_CKSUM) > ol_flags |= PKT_TX_IP_CKSUM; > else > - ipv4_hdr->hdr_checksum = get_ipv4_cksum(ipv4_hdr); > + ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); > > } > else if (ethertype != _htons(ETHER_TYPE_IPv6)) > @@ -366,7 +251,7 @@ process_outer_cksums(void *outer_l3_hdr, uint16_t > outer_ethertype, > ipv4_hdr->hdr_checksum = 0; > > if ((testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM) == 0) > - ipv4_hdr->hdr_checksum = get_ipv4_cksum(ipv4_hdr); > + ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); > } > > udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + outer_l3_len); > @@ -376,12 +261,10 @@ process_outer_cksums(void *outer_l3_hdr, uint16_t > outer_ethertype, > if ((testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM) == 0) { > if (outer_ethertype == _htons(ETHER_TYPE_IPv4)) > udp_hdr->dgram_cksum = > - get_ipv4_udptcp_checksum(ipv4_hdr, > - (uint16_t *)udp_hdr); > + rte_ipv4_udptcp_cksum(ipv4_hdr, > udp_hdr); > else > udp_hdr->dgram_cksum = > - get_ipv6_udptcp_checksum(ipv6_hdr, > - (uint16_t *)udp_hdr); > + rte_ipv6_udptcp_cksum(ipv6_hdr, > udp_hdr); > } > } > > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h > index e76617f..3c8e825 100644 > --- a/lib/librte_mbuf/rte_mbuf.h > +++ b/lib/librte_mbuf/rte_mbuf.h > @@ -114,7 +114,8 @@ extern "C" { > * - fill l2_len and l3_len in mbuf > * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM > * - calculate the pseudo header checksum and set it in the L4 header (only > - * for TCP or UDP). For SCTP, set the crc field to 0. > + * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum(). > + * For SCTP, set the crc field to 0. > */ > #define PKT_TX_L4_NO_CKSUM (0ULL << 52) /* Disable L4 cksum of TX pkt. */ > #define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed > by NIC. */ > diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h > index e3f65c1..9cfca7f 100644 > --- a/lib/librte_net/rte_ip.h > +++ b/lib/librte_net/rte_ip.h > @@ -78,6 +78,9 @@ > > #include <stdint.h> > > +#include <rte_memcpy.h> > +#include <rte_byteorder.h> > + > #ifdef __cplusplus > extern "C" { > #endif > @@ -247,6 +250,124 @@ struct ipv4_hdr { > ((x) >= IPV4_MIN_MCAST && (x) <= IPV4_MAX_MCAST) /**< check if IPv4 > address is multicast */ > > /** > + * Process the non-complemented checksum of a buffer. > + * > + * @param buf > + * Pointer to the buffer. > + * @param len > + * Length of the buffer. > + * @return > + * The non-complemented checksum. > + */ > +static inline uint16_t > +rte_raw_cksum(const char *buf, size_t len) > +{ > + const uint16_t *u16 = (const uint16_t *)buf; > + uint32_t sum = 0; > + > + while (len >= 8) { > + sum += u16[0]; sum += u16[1]; sum += u16[2]; sum += u16[3]; > + len -= 8; > + u16 += 4; > + } > + while (len >= 2) { > + sum += *u16; > + len -= 2; > + u16 += 1; > + } > + > + /* if length is in odd bytes */ > + if (len == 1) > + sum += *((const uint8_t *)u16); > + > + sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff); > + sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff); > + return (uint16_t)sum; > +} > + > +/** > + * Process the IPv4 checksum of an IPv4 header. > + * > + * The checksum field must be set to 0 by the caller. > + * > + * @param ipv4_hdr > + * The pointer to the contiguous IPv4 header. > + * @return > + * The complemented checksum to set in the IP packet. > + */ > +static inline uint16_t > +rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr) > +{ > + uint16_t cksum; > + cksum = rte_raw_cksum((const char *)ipv4_hdr, sizeof(struct ipv4_hdr)); > + return ((cksum == 0xffff) ? cksum : ~cksum); > +} > + > +/** > + * Process the pseudo-header checksum of an IPv4 header. > + * > + * The checksum field must be set to 0 by the caller. > + * > + * @param ipv4_hdr > + * The pointer to the contiguous IPv4 header. > + * @return > + * The non-complemented checksum to set in the L4 header. > + */ > +static inline uint16_t > +rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr) > +{ > + struct ipv4_psd_header { > + uint32_t src_addr; /* IP address of source host. */ > + uint32_t dst_addr; /* IP address of destination host. */ > + uint8_t zero; /* zero. */ > + uint8_t proto; /* L4 protocol type. */ > + uint16_t len; /* L4 length. */ > + } psd_hdr; > + > + psd_hdr.src_addr = ipv4_hdr->src_addr; > + psd_hdr.dst_addr = ipv4_hdr->dst_addr; > + psd_hdr.zero = 0; > + psd_hdr.proto = ipv4_hdr->next_proto_id; > + psd_hdr.len = rte_cpu_to_be_16( > + (uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) > + - sizeof(struct ipv4_hdr))); > + return rte_raw_cksum((const char *)&psd_hdr, sizeof(psd_hdr)); > +} > + > +/** > + * Process the IPv4 UDP or TCP checksum. > + * > + * The IPv4 header should not contains options. The IP and layer 4 > + * checksum must be set to 0 in the packet by the caller. > + * > + * @param ipv4_hdr > + * The pointer to the contiguous IPv4 header. > + * @param l4_hdr > + * The pointer to the beginning of the L4 header. > + * @return > + * The complemented checksum to set in the IP packet. > + */ > +static inline uint16_t > +rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) > +{ > + uint32_t cksum; > + uint32_t l4_len; > + > + l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - > + sizeof(struct ipv4_hdr); > + > + cksum = rte_raw_cksum(l4_hdr, l4_len); > + cksum += rte_ipv4_phdr_cksum(ipv4_hdr); > + > + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); > + cksum = (~cksum) & 0xffff; > + if (cksum == 0) > + cksum = 0xffff; > + > + return cksum; > +} > + > +/** > * IPv6 Header > */ > struct ipv6_hdr { > @@ -258,6 +379,64 @@ struct ipv6_hdr { > uint8_t dst_addr[16]; /**< IP address of destination host(s). */ > } __attribute__((__packed__)); > > +/** > + * Process the pseudo-header checksum of an IPv6 header. > + * > + * @param ipv6_hdr > + * The pointer to the contiguous IPv6 header. > + * @return > + * The non-complemented checksum to set in the L4 header. > + */ > +static inline uint16_t > +rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr) > +{ > + struct ipv6_psd_header { > + uint8_t src_addr[16]; /* IP address of source host. */ > + uint8_t dst_addr[16]; /* IP address of destination host. */ > + uint32_t len; /* L4 length. */ > + uint32_t proto; /* L4 protocol - top 3 bytes must be zero > */ > + } psd_hdr; > + > + rte_memcpy(&psd_hdr.src_addr, ipv6_hdr->src_addr, > + sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr)); > + psd_hdr.proto = (ipv6_hdr->proto << 24); > + psd_hdr.len = ipv6_hdr->payload_len; > + > + return rte_raw_cksum((const char *)&psd_hdr, sizeof(psd_hdr)); > +} > + > +/** > + * Process the IPv6 UDP or TCP checksum. > + * > + * The IPv4 header should not contains options. The layer 4 checksum > + * must be set to 0 in the packet by the caller. > + * > + * @param ipv6_hdr > + * The pointer to the contiguous IPv6 header. > + * @param l4_hdr > + * The pointer to the beginning of the L4 header. > + * @return > + * The complemented checksum to set in the IP packet. > + */ > +static inline uint16_t > +rte_ipv6_udptcp_cksum(const struct ipv6_hdr *ipv6_hdr, const void *l4_hdr) > +{ > + uint32_t cksum; > + uint32_t l4_len; > + > + l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len); > + > + cksum = rte_raw_cksum(l4_hdr, l4_len); > + cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0); > + > + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); > + cksum = (~cksum) & 0xffff; > + if (cksum == 0) > + cksum = 0xffff; > + > + return cksum; > +} > + > #ifdef __cplusplus > } > #endif > -- > 2.1.0