On 03/15/2016 05:17 PM, w...@redhat.com wrote: > From: Wei Xu <w...@redhat.com> > > Most things like ipv4 except there is a significant difference between ipv4 > and ipv6, the fragment lenght in ipv4 header includes itself, while it's not > included for ipv6, thus means ipv6 can carry a real '65535' unit. > > Signed-off-by: Wei Xu <w...@redhat.com> > --- > hw/net/virtio-net.c | 146 > ++++++++++++++++++++++++++++++++++++++++----- > include/hw/virtio/virtio.h | 5 +- > 2 files changed, 135 insertions(+), 16 deletions(-) > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index c23b45f..ef61b74 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -52,9 +52,14 @@ > #define MAX_IP4_PAYLOAD (65535 - IP4_HDR_SZ) > #define MAX_TCP_PAYLOAD 65535 > > -/* max payload with virtio header */ > +#define IP6_HDR_SZ (sizeof(struct ip6_header)) > +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ) > +#define IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ > +#define MAX_IP6_PAYLOAD MAX_TCP_PAYLOAD > + > +/* ip6 max payload, payload in ipv6 don't include the header */ > #define MAX_VIRTIO_PAYLOAD (sizeof(struct virtio_net_hdr_mrg_rxbuf) \ > - + ETH_HDR_SZ + MAX_TCP_PAYLOAD) > + + ETH_IP6_HDR_SZ + MAX_IP6_PAYLOAD) > > #define IP4_HEADER_LEN 5 /* header lenght value in ip header without option > */ > > @@ -1722,14 +1727,27 @@ static void virtio_net_rsc_extract_unit4(NetRscChain > *chain, > { > uint16_t ip_hdrlen; > > - unit->ip = (struct ip_header *)(buf + chain->hdr_size + ETH_HDR_SZ); > - ip_hdrlen = ((0xF & unit->ip->ip_ver_len) << 2); > - unit->ip_plen = &unit->ip->ip_len; > - unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); > + unit->u_ip.ip = (struct ip_header *)(buf + chain->hdr_size + ETH_HDR_SZ); > + ip_hdrlen = ((0xF & unit->u_ip.ip->ip_ver_len) << 2); > + unit->ip_plen = &unit->u_ip.ip->ip_len; > + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->u_ip.ip) + > ip_hdrlen); > unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; > unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; > } > > +static void virtio_net_rsc_extract_unit6(NetRscChain *chain, > + const uint8_t *buf, NetRscUnit* > unit) > +{ > + unit->u_ip.ip6 = (struct ip6_header *)(buf + chain->hdr_size + > ETH_HDR_SZ);
The u_ip seems a little bit redundant. How about use a simple void * and cast it to ipv4/ipv6 in proto specific callbacks? The introducing of u_ip leads unnecessary ipv4 codes changes for ipv6 coalescing implementation. > + unit->ip_plen = &(unit->u_ip.ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); > + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->u_ip.ip6)\ > + + IP6_HDR_SZ); > + unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; > + /* There is a difference between payload lenght in ipv4 and v6, > + ip header is excluded in ipv6 */ > + unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; > +} > + > static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip) > { > uint32_t sum; > @@ -1743,7 +1761,10 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain > *chain, NetRscSeg *seg) > { > int ret; > > - virtio_net_rsc_ipv4_checksum(seg->unit.ip); > + if ((chain->proto == ETH_P_IP) && seg->is_coalesced) { > + virtio_net_rsc_ipv4_checksum(seg->unit.u_ip.ip); > + } > + > ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); > QTAILQ_REMOVE(&chain->buffers, seg, next); > g_free(seg->buf); > @@ -1807,7 +1828,11 @@ static void virtio_net_rsc_cache_buf(NetRscChain > *chain, NetClientState *nc, > QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); > chain->stat.cache++; > > - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); > + if (chain->proto == ETH_P_IP) { > + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); > + } else { A switch and a g_assert_not_reached() is better than this. > + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); > + } > } > > static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg, > @@ -1930,8 +1955,8 @@ coalesce: > static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg, > const uint8_t *buf, size_t size, NetRscUnit *unit) > { > - if ((unit->ip->ip_src ^ seg->unit.ip->ip_src) > - || (unit->ip->ip_dst ^ seg->unit.ip->ip_dst) > + if ((unit->u_ip.ip->ip_src ^ seg->unit.u_ip.ip->ip_src) > + || (unit->u_ip.ip->ip_dst ^ seg->unit.u_ip.ip->ip_dst) > || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) > || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { > chain->stat.no_match++; > @@ -1941,6 +1966,22 @@ static int32_t virtio_net_rsc_coalesce4(NetRscChain > *chain, NetRscSeg *seg, > return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); > } > > +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg *seg, > + const uint8_t *buf, size_t size, NetRscUnit *unit) > +{ > + if (memcmp(&unit->u_ip.ip6->ip6_src, &seg->unit.u_ip.ip6->ip6_src, > + sizeof(struct in6_address)) > + || memcmp(&unit->u_ip.ip6->ip6_dst, &seg->unit.u_ip.ip6->ip6_dst, > + sizeof(struct in6_address)) > + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) > + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { > + chain->stat.no_match++; > + return RSC_NO_MATCH; > + } > + > + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); > +} > + > /* Pakcets with 'SYN' should bypass, other flag should be sent after drain > * to prevent out of order */ > static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain, > @@ -1983,7 +2024,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRscChain > *chain, NetClientState *nc, > NetRscSeg *seg, *nseg; > > QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { > - ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); > + if (chain->proto == ETH_P_IP) { > + ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); > + } else { > + ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); > + } > > if (ret == RSC_FINAL) { > if (virtio_net_rsc_drain_seg(chain, seg) == 0) { > @@ -2082,7 +2127,8 @@ static size_t virtio_net_rsc_receive4(void *opq, > NetClientState* nc, > > chain = (NetRscChain *)opq; > virtio_net_rsc_extract_unit4(chain, buf, &unit); > - if (RSC_WANT != virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)) > { > + if (RSC_WANT != virtio_net_rsc_sanity_check4(chain, > + unit.u_ip.ip, buf, size)) { > return virtio_net_do_receive(nc, buf, size); > } > > @@ -2102,13 +2148,74 @@ static size_t virtio_net_rsc_receive4(void *opq, > NetClientState* nc, > return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); > } > > +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain, > + struct ip6_header *ip, const uint8_t *buf, size_t > size) Indentation is wrong here. > +{ > + uint16_t ip_len; > + > + if (size < (chain->hdr_size + ETH_IP6_HDR_SZ + TCP_HDR_SZ)) { > + return RSC_BYPASS; > + } > + > + if (((0xF0 & ip->ip6_ctlun.ip6_un1.ip6_un1_flow) >> 4) > + != IP_HEADER_VERSION_6) { > + return RSC_BYPASS; > + } > + > + /* Both option and protocol is checked in this */ > + if (ip->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { > + chain->stat.bypass_not_tcp++; > + return RSC_BYPASS; > + } > + > + /* Sanity check */ The comment is useless. > + ip_len = htons(ip->ip6_ctlun.ip6_un1.ip6_un1_plen); > + if (ip_len < TCP_HDR_SZ > + || ip_len > (size - chain->hdr_size - ETH_IP6_HDR_SZ)) { > + chain->stat.ip_hacked++; > + return RSC_BYPASS; > + } > + > + return RSC_WANT; > +} > + > +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc, > + const uint8_t *buf, size_t size) > +{ Rather similar to ipv4 version, need to unify the code. > + int32_t ret; > + NetRscChain *chain; > + NetRscUnit unit; > + > + chain = (NetRscChain *)opq; > + virtio_net_rsc_extract_unit6(chain, buf, &unit); > + if (RSC_WANT != virtio_net_rsc_sanity_check6(chain, > + unit.u_ip.ip6, buf, size)) { > + return virtio_net_do_receive(nc, buf, size); > + } > + > + ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); > + if (ret == RSC_BYPASS) { > + return virtio_net_do_receive(nc, buf, size); > + } else if (ret == RSC_FINAL) { > + return virtio_net_rsc_drain_flow(chain, nc, buf, size, > + ((chain->hdr_size + ETH_HDR_SZ) + 8), IP6_ADDR_SIZE, > + (chain->hdr_size + ETH_IP6_HDR_SZ), TCP_PORT_SIZE); > + } > + > + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) { > + return size; > + } > + > + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); > +} > + > static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n, > NetClientState *nc, uint16_t > proto) > { > NetRscChain *chain; > > /* Only handle IPv4/6 */ > - if (proto != (uint16_t)ETH_P_IP) { > + if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { > return NULL; > } > > @@ -2121,7 +2228,11 @@ static NetRscChain > *virtio_net_rsc_lookup_chain(VirtIONet * n, > chain = g_malloc(sizeof(*chain)); > chain->hdr_size = n->guest_hdr_len; > chain->proto = proto; > - chain->max_payload = MAX_IP4_PAYLOAD; > + if (proto == (uint16_t)ETH_P_IP) { > + chain->max_payload = MAX_IP4_PAYLOAD; > + } else { > + chain->max_payload = MAX_IP6_PAYLOAD; > + } > chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, > virtio_net_rsc_purge, chain); > memset(&chain->stat, 0, sizeof(chain->stat)); > @@ -2153,7 +2264,12 @@ static ssize_t virtio_net_rsc_receive(NetClientState > *nc, > return virtio_net_do_receive(nc, buf, size); > } else { > chain->stat.received++; > - return virtio_net_rsc_receive4(chain, nc, buf, size); > + > + if (proto == (uint16_t)ETH_P_IP) { > + return virtio_net_rsc_receive4(chain, nc, buf, size); > + } else { > + return virtio_net_rsc_receive6(chain, nc, buf, size); > + } > } > } > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > index 3b1dfa8..13d20a4 100644 > --- a/include/hw/virtio/virtio.h > +++ b/include/hw/virtio/virtio.h > @@ -170,7 +170,10 @@ typedef struct NetRscStat { > > /* Rsc unit general info used to checking if can coalescing */ > typedef struct NetRscUnit { > - struct ip_header *ip; /* ip header */ > + union { > + struct ip_header *ip; /* ip header */ > + struct ip6_header *ip6; /* ip6 header */ > + } u_ip; > uint16_t *ip_plen; /* data len pointer in ip header field */ > struct tcp_header *tcp; /* tcp header */ > uint16_t tcp_hdrlen; /* tcp header len */