On Tue, Nov 4, 2025 at 1:59 AM Peter Maydell <[email protected]> wrote:
>
> In e1000e_write_packet_to_guest() we attempt to ensure that we don't
> write more of a packet to a descriptor than will fit in the guest
> configured receive buffers. However, this code does not allow for
> the "packet split" feature. When packet splitting is enabled, the
> first of up to 4 buffers in the descriptor is used for the packet
> header only, with the payload going into buffers 2, 3 and 4. Our
> length check only checks against the total sizes of all 4 buffers,
> which meant that if an incoming packet was large enough to fit in (1
> + 2 + 3 + 4) but not into (2 + 3 + 4) and packet splitting was
> enabled, we would run into the assertion in
> e1000e_write_hdr_frag_to_rx_buffers() that we had enough buffers for
> the data:
>
> qemu-system-i386: ../../hw/net/e1000e_core.c:1418: void
> e1000e_write_payload_frag_to_rx_buffers(E1000ECore *, hwaddr *, E1000EBAState
> *, const char *, dma_addr_t): Assertion `bastate->cur_idx < MAX_PS_BUFFERS'
> failed.
>
> A malicious guest could provoke this assertion by configuring the
> device into loopback mode, and then sending itself a suitably sized
> packet into a suitably arrange rx descriptor.
>
> The code also fails to deal with the possibility that the descriptor
> buffers are sized such that the trailing checksum word does not fit
> into the last descriptor which has actual data, which might also
> trigger this assertion.
>
> Rework the length handling to use two variables:
> * desc_size is the total amount of data DMA'd to the guest
> for the descriptor being processed in this iteration of the loop
> * rx_desc_buf_size is the total amount of space left in it
>
> As we copy data to the guest (packet header, payload, checksum),
> update these two variables. (Previously we attempted to calculate
> desc_size once at the top of the loop, but this is too difficult to
> do correctly.) Then we can use the variables to ensure that we clamp
> the amount of copied payload data to the remaining space in the
> descriptor's buffers, even if we've used one of the buffers up in the
> packet-split code, and we can tell whether we have enough space for
> the full checksum word in this descriptor or whether we're going to
> need to split that to the following descriptor.
>
> I have included comments that hopefully help to make the loop
> logic a little clearer.
>
> Cc: [email protected]
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/537
> Signed-off-by: Peter Maydell <[email protected]>
> ---
> hw/net/e1000e_core.c | 61 ++++++++++++++++++++++++++++++++++----------
> 1 file changed, 47 insertions(+), 14 deletions(-)
>
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> index ba77cb6011f..471c3ed20b8 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -1495,6 +1495,13 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct
> NetRxPkt *pkt,
> rxi = rxr->i;
>
> do {
> + /*
> + * Loop processing descriptors while we have packet data to
> + * DMA to the guest. desc_offset tracks how much data we have
> + * sent to the guest in total over all descriptors, and goes
> + * from 0 up to total_size (the size of everything to send to
> + * the guest including possible trailing 4 bytes of CRC data).
> + */
> hwaddr ba[MAX_PS_BUFFERS];
> E1000EBAState bastate = { { 0 } };
> bool is_last = false;
> @@ -1512,23 +1519,27 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct
> NetRxPkt *pkt,
> e1000e_read_rx_descr(core, &desc, ba);
>
> if (ba[0]) {
> - size_t desc_size = total_size - desc_offset;
> -
> - if (desc_size > core->rx_desc_buf_size) {
> - desc_size = core->rx_desc_buf_size;
> - }
> + /* Total amount of data DMA'd to the guest in this iteration */
> + size_t desc_size = 0;
> + /*
> + * Total space available in this descriptor (we will update
> + * this as we use it up)
> + */
> + size_t rx_desc_buf_size = core->rx_desc_buf_size;
>
> if (desc_offset < size) {
> - static const uint32_t fcs_pad;
> size_t iov_copy;
> + /* Amount of data to copy from the incoming packet */
> size_t copy_size = size - desc_offset;
> - if (copy_size > core->rx_desc_buf_size) {
> - copy_size = core->rx_desc_buf_size;
> - }
>
> /* For PS mode copy the packet header first */
> if (do_ps) {
> if (is_first) {
> + /*
> + * e1000e_do_ps() guarantees that buffer 0 has enough
> + * space for the header; otherwise we will not split
> + * the packet (i.e. do_ps is false).
> + */
> size_t ps_hdr_copied = 0;
> do {
> iov_copy = MIN(ps_hdr_len - ps_hdr_copied,
> @@ -1550,14 +1561,26 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct
> NetRxPkt *pkt,
> } while (ps_hdr_copied < ps_hdr_len);
>
> is_first = false;
> + desc_size += ps_hdr_len;
> } else {
> /* Leave buffer 0 of each descriptor except first */
> /* empty as per spec 7.1.5.1 */
> e1000e_write_hdr_frag_to_rx_buffers(core, ba,
> &bastate,
> NULL, 0);
> }
> + rx_desc_buf_size -= core->rxbuf_sizes[0];
> }
>
> + /*
> + * Clamp the amount of packet data we copy into what will fit
> + * into the remaining buffers in the descriptor.
> + */
> + if (copy_size > rx_desc_buf_size) {
> + copy_size = rx_desc_buf_size;
> + }
> + desc_size += copy_size;
> + rx_desc_buf_size -= copy_size;
> +
> /* Copy packet payload */
> while (copy_size) {
> iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
> @@ -1574,12 +1597,22 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct
> NetRxPkt *pkt,
> iov_ofs = 0;
> }
> }
> + }
>
> - if (desc_offset + desc_size >= total_size) {
> - /* Simulate FCS checksum presence in the last descriptor
> */
> - e1000e_write_payload_frag_to_rx_buffers(core, ba,
> &bastate,
> - (const char *) &fcs_pad,
> e1000x_fcs_len(core->mac));
> - }
> + if (rx_desc_buf_size &&
> + desc_offset >= size && desc_offset < total_size) {
> + /*
> + * We are in the last 4 bytes corresponding to the FCS
> checksum.
> + * We only ever write zeroes here (unlike the hardware).
> + */
> + static const uint32_t fcs_pad;
> + /* Amount of space for the trailing checksum */
> + size_t fcs_len = MIN(rx_desc_buf_size,
> + total_size - desc_offset);
> + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
> + (const char
> *)&fcs_pad,
> + fcs_len);
> + desc_size += fcs_len;
> }
> desc_offset += desc_size;
This should be done before the if (rx_desc_bufs_size && ... ?
> if (desc_offset >= total_size) {
> --
> 2.43.0
>
Thanks