On Tue, Nov 4, 2025 at 1:59 AM Peter Maydell <[email protected]> wrote:
>
> In e1000e_write_packet_to_guest() we attempt to ensure that we don't
> write more of a packet to a descriptor than will fit in the guest
> configured receive buffers.  However, this code does not allow for
> the "packet split" feature.  When packet splitting is enabled, the
> first of up to 4 buffers in the descriptor is used for the packet
> header only, with the payload going into buffers 2, 3 and 4.  Our
> length check only checks against the total sizes of all 4 buffers,
> which meant that if an incoming packet was large enough to fit in (1
> + 2 + 3 + 4) but not into (2 + 3 + 4) and packet splitting was
> enabled, we would run into the assertion in
> e1000e_write_hdr_frag_to_rx_buffers() that we had enough buffers for
> the data:
>
> qemu-system-i386: ../../hw/net/e1000e_core.c:1418: void 
> e1000e_write_payload_frag_to_rx_buffers(E1000ECore *, hwaddr *, E1000EBAState 
> *, const char *, dma_addr_t): Assertion `bastate->cur_idx < MAX_PS_BUFFERS' 
> failed.
>
> A malicious guest could provoke this assertion by configuring the
> device into loopback mode, and then sending itself a suitably sized
> packet into a suitably arrange rx descriptor.
>
> The code also fails to deal with the possibility that the descriptor
> buffers are sized such that the trailing checksum word does not fit
> into the last descriptor which has actual data, which might also
> trigger this assertion.
>
> Rework the length handling to use two variables:
>  * desc_size is the total amount of data DMA'd to the guest
>    for the descriptor being processed in this iteration of the loop
>  * rx_desc_buf_size is the total amount of space left in it
>
> As we copy data to the guest (packet header, payload, checksum),
> update these two variables.  (Previously we attempted to calculate
> desc_size once at the top of the loop, but this is too difficult to
> do correctly.) Then we can use the variables to ensure that we clamp
> the amount of copied payload data to the remaining space in the
> descriptor's buffers, even if we've used one of the buffers up in the
> packet-split code, and we can tell whether we have enough space for
> the full checksum word in this descriptor or whether we're going to
> need to split that to the following descriptor.
>
> I have included comments that hopefully help to make the loop
> logic a little clearer.
>
> Cc: [email protected]
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/537
> Signed-off-by: Peter Maydell <[email protected]>
> ---
>  hw/net/e1000e_core.c | 61 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 47 insertions(+), 14 deletions(-)
>
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> index ba77cb6011f..471c3ed20b8 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -1495,6 +1495,13 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct 
> NetRxPkt *pkt,
>      rxi = rxr->i;
>
>      do {
> +        /*
> +         * Loop processing descriptors while we have packet data to
> +         * DMA to the guest.  desc_offset tracks how much data we have
> +         * sent to the guest in total over all descriptors, and goes
> +         * from 0 up to total_size (the size of everything to send to
> +         * the guest including possible trailing 4 bytes of CRC data).
> +         */
>          hwaddr ba[MAX_PS_BUFFERS];
>          E1000EBAState bastate = { { 0 } };
>          bool is_last = false;
> @@ -1512,23 +1519,27 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct 
> NetRxPkt *pkt,
>          e1000e_read_rx_descr(core, &desc, ba);
>
>          if (ba[0]) {
> -            size_t desc_size = total_size - desc_offset;
> -
> -            if (desc_size > core->rx_desc_buf_size) {
> -                desc_size = core->rx_desc_buf_size;
> -            }
> +            /* Total amount of data DMA'd to the guest in this iteration */
> +            size_t desc_size = 0;
> +            /*
> +             * Total space available in this descriptor (we will update
> +             * this as we use it up)
> +             */
> +            size_t rx_desc_buf_size = core->rx_desc_buf_size;
>
>              if (desc_offset < size) {
> -                static const uint32_t fcs_pad;
>                  size_t iov_copy;
> +                /* Amount of data to copy from the incoming packet */
>                  size_t copy_size = size - desc_offset;
> -                if (copy_size > core->rx_desc_buf_size) {
> -                    copy_size = core->rx_desc_buf_size;
> -                }
>
>                  /* For PS mode copy the packet header first */
>                  if (do_ps) {
>                      if (is_first) {
> +                        /*
> +                         * e1000e_do_ps() guarantees that buffer 0 has enough
> +                         * space for the header; otherwise we will not split
> +                         * the packet (i.e. do_ps is false).
> +                         */
>                          size_t ps_hdr_copied = 0;
>                          do {
>                              iov_copy = MIN(ps_hdr_len - ps_hdr_copied,
> @@ -1550,14 +1561,26 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct 
> NetRxPkt *pkt,
>                          } while (ps_hdr_copied < ps_hdr_len);
>
>                          is_first = false;
> +                        desc_size += ps_hdr_len;
>                      } else {
>                          /* Leave buffer 0 of each descriptor except first */
>                          /* empty as per spec 7.1.5.1                      */
>                          e1000e_write_hdr_frag_to_rx_buffers(core, ba, 
> &bastate,
>                                                              NULL, 0);
>                      }
> +                    rx_desc_buf_size -= core->rxbuf_sizes[0];
>                  }
>
> +                /*
> +                 * Clamp the amount of packet data we copy into what will fit
> +                 * into the remaining buffers in the descriptor.
> +                 */
> +                if (copy_size > rx_desc_buf_size) {
> +                    copy_size = rx_desc_buf_size;
> +                }
> +                desc_size += copy_size;
> +                rx_desc_buf_size -= copy_size;
> +
>                  /* Copy packet payload */
>                  while (copy_size) {
>                      iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
> @@ -1574,12 +1597,22 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct 
> NetRxPkt *pkt,
>                          iov_ofs = 0;
>                      }
>                  }
> +            }
>
> -                if (desc_offset + desc_size >= total_size) {
> -                    /* Simulate FCS checksum presence in the last descriptor 
> */
> -                    e1000e_write_payload_frag_to_rx_buffers(core, ba, 
> &bastate,
> -                          (const char *) &fcs_pad, 
> e1000x_fcs_len(core->mac));
> -                }
> +            if (rx_desc_buf_size &&
> +                desc_offset >= size && desc_offset < total_size) {
> +                /*
> +                 * We are in the last 4 bytes corresponding to the FCS 
> checksum.
> +                 * We only ever write zeroes here (unlike the hardware).
> +                 */
> +                static const uint32_t fcs_pad;
> +                /* Amount of space for the trailing checksum */
> +                size_t fcs_len = MIN(rx_desc_buf_size,
> +                                     total_size - desc_offset);
> +                e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
> +                                                        (const char 
> *)&fcs_pad,
> +                                                        fcs_len);
> +                desc_size += fcs_len;
>              }
>              desc_offset += desc_size;

This should be done before the if (rx_desc_bufs_size && ... ?

>              if (desc_offset >= total_size) {
> --
> 2.43.0
>

Thanks


Reply via email to