On Wed, Apr 28, 2010 at 01:57:12PM -0700, David L Stevens wrote: > @@ -218,18 +248,19 @@ static void handle_rx(struct vhost_net * > use_mm(net->dev.mm); > mutex_lock(&vq->mutex); > vhost_disable_notify(vq); > - hdr_size = vq->hdr_size; > + vhost_hlen = vq->vhost_hlen; > > vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? > vq->log : NULL; > > - for (;;) { > - head = vhost_get_vq_desc(&net->dev, vq, vq->iov, > - ARRAY_SIZE(vq->iov), > - &out, &in, > - vq_log, &log); > + while ((datalen = vhost_head_len(vq, sock->sk))) { > + headcount = vhost_get_desc_n(vq, vq->heads, > + datalen + vhost_hlen, > + &in, vq_log, &log); > + if (headcount < 0) > + break; > /* OK, now we need to know about added descriptors. */ > - if (head == vq->num) { > + if (!headcount) { > if (unlikely(vhost_enable_notify(vq))) { > /* They have slipped one in as we were > * doing that: check again. */ > @@ -241,46 +272,53 @@ static void handle_rx(struct vhost_net * > break; > } > /* We don't need to be notified again. */ > - if (out) { > - vq_err(vq, "Unexpected descriptor format for RX: " > - "out %d, int %d\n", > - out, in); > - break; > - } > - /* Skip header. TODO: support TSO/mergeable rx buffers. */ > - s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in); > + if (vhost_hlen) > + /* Skip header. TODO: support TSO. */ > + s = move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); > + else > + s = copy_iovec_hdr(vq->iov, vq->hdr, vq->sock_hlen, in); > msg.msg_iovlen = in; > len = iov_length(vq->iov, in); > /* Sanity check */ > if (!len) { > vq_err(vq, "Unexpected header len for RX: " > "%zd expected %zd\n", > - iov_length(vq->hdr, s), hdr_size); > + iov_length(vq->hdr, s), vhost_hlen); > break; > } > err = sock->ops->recvmsg(NULL, sock, &msg, > len, MSG_DONTWAIT | MSG_TRUNC); > /* TODO: Check specific error and bomb out unless EAGAIN? */ > if (err < 0) { > - vhost_discard_vq_desc(vq); > + vhost_discard_desc(vq, headcount); > break; > } > - /* TODO: Should check and handle checksum. */ > - if (err > len) { > - pr_err("Discarded truncated rx packet: " > - " len %d > %zd\n", err, len); > - vhost_discard_vq_desc(vq); > + if (err != datalen) { > + pr_err("Discarded rx packet: " > + " len %d, expected %zd\n", err, datalen); > + vhost_discard_desc(vq, headcount); > continue; > } > len = err; > - err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size); > - if (err) { > - vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n", > - vq->iov->iov_base, err); > + if (vhost_hlen && > + memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0, > + vhost_hlen)) { > + vq_err(vq, "Unable to write vnet_hdr at addr %p\n", > + vq->iov->iov_base); > break; > } > - len += hdr_size; > - vhost_add_used_and_signal(&net->dev, vq, head, len); > + /* TODO: Should check and handle checksum. */ > + if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && > + memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, > + offsetof(typeof(hdr), num_buffers), > + sizeof(hdr.num_buffers))) { > + vq_err(vq, "Failed num_buffers write"); > + vhost_discard_desc(vq, headcount); > + break; > + } > + len += vhost_hlen; > + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, > + headcount); > if (unlikely(vq_log)) > vhost_log_write(vq, vq_log, log, len); > total_len += len;
OK I think I see the bug here: vhost_add_used_and_signal_n does not get the actual length, it gets the iovec length from vhost. Guest virtio uses this as packet length, with bad results. So I have applied the follows and it seems to have fixed the problem: diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index c16db02..9d7496d 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -219,7 +219,7 @@ static int peek_head_len(struct vhost_virtqueue *vq, struct sock *sk) /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue - * @datalen - data length we'll be reading + * @datalen - data length we'll be reading. must be > 0 * @iovcount - returned count of io vectors we fill * @log - vhost log * @log_num - log offset @@ -236,9 +236,10 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, int seg = 0; int headcount = 0; unsigned d; + size_t len; int r, nlogs = 0; - while (datalen > 0) { + for (;;) { if (unlikely(headcount >= VHOST_NET_MAX_SG)) { r = -ENOBUFS; goto err; @@ -260,16 +261,20 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, nlogs += *log_num; log += *log_num; } + len = iov_length(vq->iov + seg, in); + seg += in; heads[headcount].id = d; - heads[headcount].len = iov_length(vq->iov + seg, in); - datalen -= heads[headcount].len; + if (datalen <= len) + break; + heads[headcount].len = len; ++headcount; - seg += in; + datalen -= len; } + heads[headcount].len = datalen; *iovcount = seg; if (unlikely(log)) *log_num = nlogs; - return headcount; + return headcount + 1; err: vhost_discard_desc(vq, headcount); return r; -- MST -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html