Re: [lng-odp] [PATCH 1/2] linux-generic: pktio: recover from transmit errors
Merged, thanks! Maxim. On 11/13/2015 11:37, Elo, Matias (Nokia - FI/Espoo) wrote: The patch fixes socket mmap transmit bug. For both patches: Reviewed-and-Tested-by: Matias Elo While testing the patch set I noticed that the pktio_test_send_failure() doesn't pass when using a directly attached cable between NIC ports. However, this seems to be unrelated to the patch set. $ sudo ODP_PKTIO_IF0=p1p1 ODP_PKTIO_IF1=p1p2 ./test/validation/pktio/pktio_main ... Test: pktio_test_send_failure ... Received 0 packets FAILED 1. pktio.c:394 - CU_FAIL("failed to receive transmitted packet") 2. pktio.c:943 - CU_FAIL("failed to receive transmitted packets\n") 3. pktio.c:394 - CU_FAIL("failed to receive transmitted packet") 4. pktio.c:963 - i == TX_BATCH_LEN -Matias -Original Message- From: lng-odp [mailto:lng-odp-boun...@lists.linaro.org] On Behalf Of EXT Stuart Haslam Sent: Tuesday, November 10, 2015 8:24 PM To: lng-odp@lists.linaro.org Subject: [lng-odp] [PATCH 1/2] linux-generic: pktio: recover from transmit errors Fix the way transmit errors are handled to avoid getting out of sync between kernel and user space, which causes transmission to hang. Fixes bug https://bugs.linaro.org/show_bug.cgi?id=1890 Signed-off-by: Stuart Haslam --- platform/linux-generic/pktio/socket_mmap.c | 59 +--- -- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/platform/linux-generic/pktio/socket_mmap.c b/platform/linux- generic/pktio/socket_mmap.c index 79ff82d..2bdb72b 100644 --- a/platform/linux-generic/pktio/socket_mmap.c +++ b/platform/linux-generic/pktio/socket_mmap.c @@ -182,6 +182,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, unsigned n, i = 0; unsigned nb_tx = 0; int send_errno; + int total_len = 0; first_frame_num = ring->frame_num; frame_num = first_frame_num; @@ -195,6 +196,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, pkt_len = odp_packet_len(pkt_table[i]); ppd.v2->tp_h.tp_snaplen = pkt_len; ppd.v2->tp_h.tp_len = pkt_len; + total_len += pkt_len; buf = (uint8_t *)ppd.raw + TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); @@ -215,28 +217,49 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, * failure a value of -1 is returned, even if the failure occurred * after some of the packets in the ring have already been sent, so we * need to inspect the packet status to determine which were sent. */ - for (frame_num = first_frame_num, n = 0; n < i; ++n) { - struct tpacket2_hdr *hdr = ring->rd[frame_num].iov_base; + if (odp_likely(ret == total_len)) { + nb_tx = i; + ring->frame_num = frame_num; + } else if (ret == -1) { + for (frame_num = first_frame_num, n = 0; n < i; ++n) { + struct tpacket2_hdr *hdr = ring- rd[frame_num].iov_base; + + if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE || + hdr->tp_status == TP_STATUS_SENDING)) { + nb_tx++; + } else { + /* The remaining frames weren't sent, clear +* their status to indicate we're not waiting +* for the kernel to process them. */ + hdr->tp_status = TP_STATUS_AVAILABLE; + } - if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) { - nb_tx++; - } else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) { - /* status will be cleared on the next send request */ - break; + if (++frame_num >= frame_count) + frame_num = 0; } - if (++frame_num >= frame_count) - frame_num = 0; - } - - ring->frame_num = (ring->frame_num + nb_tx) % frame_count; + ring->frame_num = (first_frame_num + nb_tx) % frame_count; + + if (nb_tx == 0 && SOCK_ERR_REPORT(send_errno)) { + __odp_errno = send_errno; + /* ENOBUFS indicates that the transmit queue is full, +* which will happen regularly when overloaded so don't +* print it */ + if (errno != ENOBUFS) + ODP_ERR("sendto(pkt mmap): %s\n", + strerror(send_errno)); + return -1; + } + } else { + /*
Re: [lng-odp] [PATCH 1/2] linux-generic: pktio: recover from transmit errors
The patch fixes socket mmap transmit bug. For both patches: Reviewed-and-Tested-by: Matias Elo While testing the patch set I noticed that the pktio_test_send_failure() doesn't pass when using a directly attached cable between NIC ports. However, this seems to be unrelated to the patch set. $ sudo ODP_PKTIO_IF0=p1p1 ODP_PKTIO_IF1=p1p2 ./test/validation/pktio/pktio_main ... Test: pktio_test_send_failure ... Received 0 packets FAILED 1. pktio.c:394 - CU_FAIL("failed to receive transmitted packet") 2. pktio.c:943 - CU_FAIL("failed to receive transmitted packets\n") 3. pktio.c:394 - CU_FAIL("failed to receive transmitted packet") 4. pktio.c:963 - i == TX_BATCH_LEN -Matias > -Original Message- > From: lng-odp [mailto:lng-odp-boun...@lists.linaro.org] On Behalf Of EXT > Stuart > Haslam > Sent: Tuesday, November 10, 2015 8:24 PM > To: lng-odp@lists.linaro.org > Subject: [lng-odp] [PATCH 1/2] linux-generic: pktio: recover from transmit > errors > > Fix the way transmit errors are handled to avoid getting out of sync > between kernel and user space, which causes transmission to hang. > > Fixes bug https://bugs.linaro.org/show_bug.cgi?id=1890 > > Signed-off-by: Stuart Haslam > --- > platform/linux-generic/pktio/socket_mmap.c | 59 +--- > -- > 1 file changed, 41 insertions(+), 18 deletions(-) > > diff --git a/platform/linux-generic/pktio/socket_mmap.c b/platform/linux- > generic/pktio/socket_mmap.c > index 79ff82d..2bdb72b 100644 > --- a/platform/linux-generic/pktio/socket_mmap.c > +++ b/platform/linux-generic/pktio/socket_mmap.c > @@ -182,6 +182,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct > ring *ring, > unsigned n, i = 0; > unsigned nb_tx = 0; > int send_errno; > + int total_len = 0; > > first_frame_num = ring->frame_num; > frame_num = first_frame_num; > @@ -195,6 +196,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct > ring *ring, > pkt_len = odp_packet_len(pkt_table[i]); > ppd.v2->tp_h.tp_snaplen = pkt_len; > ppd.v2->tp_h.tp_len = pkt_len; > + total_len += pkt_len; > > buf = (uint8_t *)ppd.raw + TPACKET2_HDRLEN - > sizeof(struct sockaddr_ll); > @@ -215,28 +217,49 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct > ring *ring, >* failure a value of -1 is returned, even if the failure occurred >* after some of the packets in the ring have already been sent, so we >* need to inspect the packet status to determine which were sent. */ > - for (frame_num = first_frame_num, n = 0; n < i; ++n) { > - struct tpacket2_hdr *hdr = ring->rd[frame_num].iov_base; > + if (odp_likely(ret == total_len)) { > + nb_tx = i; > + ring->frame_num = frame_num; > + } else if (ret == -1) { > + for (frame_num = first_frame_num, n = 0; n < i; ++n) { > + struct tpacket2_hdr *hdr = ring- > >rd[frame_num].iov_base; > + > + if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE > || > +hdr->tp_status == TP_STATUS_SENDING)) { > + nb_tx++; > + } else { > + /* The remaining frames weren't sent, clear > + * their status to indicate we're not waiting > + * for the kernel to process them. */ > + hdr->tp_status = TP_STATUS_AVAILABLE; > + } > > - if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) { > - nb_tx++; > - } else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) { > - /* status will be cleared on the next send request */ > - break; > + if (++frame_num >= frame_count) > + frame_num = 0; > } > > - if (++frame_num >= frame_count) > - frame_num = 0; > - } > - > - ring->frame_num = (ring->frame_num + nb_tx) % frame_count; > + ring->frame_num = (first_frame_num + nb_tx) % frame_count; > + > + if (nb_tx == 0 && SOCK_ERR_REPORT(send_errno)) { > + __odp_errno = send_errno; > + /* ENOBUFS indicates that the transmit queue is full, > + * which will happen regularly when overloaded so don't > + * print it */ &
Re: [lng-odp] [PATCH 1/2] linux-generic: pktio: recover from transmit errors
Sure, I'll check them. -Matias > -Original Message- > From: EXT Maxim Uvarov [mailto:maxim.uva...@linaro.org] > Sent: Wednesday, November 11, 2015 1:53 PM > To: lng-odp@lists.linaro.org; Elo, Matias (Nokia - FI/Espoo) > > Subject: Re: [lng-odp] [PATCH 1/2] linux-generic: pktio: recover from transmit > errors > > Hello Matias, > > can you please review and test that patches in your env? To be sure that > bug is totally fixed. > > Thank you, > Maxim. > > On 11/10/2015 21:24, Stuart Haslam wrote: > > Fix the way transmit errors are handled to avoid getting out of sync > > between kernel and user space, which causes transmission to hang. > > > > Fixes bug https://bugs.linaro.org/show_bug.cgi?id=1890 > > > > Signed-off-by: Stuart Haslam > > --- > > platform/linux-generic/pktio/socket_mmap.c | 59 > +- > > 1 file changed, 41 insertions(+), 18 deletions(-) > > > > diff --git a/platform/linux-generic/pktio/socket_mmap.c b/platform/linux- > generic/pktio/socket_mmap.c > > index 79ff82d..2bdb72b 100644 > > --- a/platform/linux-generic/pktio/socket_mmap.c > > +++ b/platform/linux-generic/pktio/socket_mmap.c > > @@ -182,6 +182,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct > ring *ring, > > unsigned n, i = 0; > > unsigned nb_tx = 0; > > int send_errno; > > + int total_len = 0; > > > > first_frame_num = ring->frame_num; > > frame_num = first_frame_num; > > @@ -195,6 +196,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct > ring *ring, > > pkt_len = odp_packet_len(pkt_table[i]); > > ppd.v2->tp_h.tp_snaplen = pkt_len; > > ppd.v2->tp_h.tp_len = pkt_len; > > + total_len += pkt_len; > > > > buf = (uint8_t *)ppd.raw + TPACKET2_HDRLEN - > >sizeof(struct sockaddr_ll); > > @@ -215,28 +217,49 @@ static inline unsigned pkt_mmap_v2_tx(int sock, > struct ring *ring, > > * failure a value of -1 is returned, even if the failure occurred > > * after some of the packets in the ring have already been sent, so we > > * need to inspect the packet status to determine which were sent. */ > > - for (frame_num = first_frame_num, n = 0; n < i; ++n) { > > - struct tpacket2_hdr *hdr = ring->rd[frame_num].iov_base; > > + if (odp_likely(ret == total_len)) { > > + nb_tx = i; > > + ring->frame_num = frame_num; > > + } else if (ret == -1) { > > + for (frame_num = first_frame_num, n = 0; n < i; ++n) { > > + struct tpacket2_hdr *hdr = ring- > >rd[frame_num].iov_base; > > + > > + if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE > || > > + hdr->tp_status == TP_STATUS_SENDING)) { > > + nb_tx++; > > + } else { > > + /* The remaining frames weren't sent, clear > > +* their status to indicate we're not waiting > > +* for the kernel to process them. */ > > + hdr->tp_status = TP_STATUS_AVAILABLE; > > + } > > > > - if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) { > > - nb_tx++; > > - } else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) { > > - /* status will be cleared on the next send request */ > > - break; > > + if (++frame_num >= frame_count) > > + frame_num = 0; > > } > > > > - if (++frame_num >= frame_count) > > - frame_num = 0; > > - } > > - > > - ring->frame_num = (ring->frame_num + nb_tx) % frame_count; > > + ring->frame_num = (first_frame_num + nb_tx) % frame_count; > > + > > + if (nb_tx == 0 && SOCK_ERR_REPORT(send_errno)) { > > + __odp_errno = send_errno; > > + /* ENOBUFS indicates that the transmit queue is full, > > +* which will happen regularly when overloaded so don't > > +* print it */ > > + if (errno != ENOBUFS) > > + ODP_ERR("sendto(pkt mmap): %s\n", > > + strerror(send_errno)); > > + return
Re: [lng-odp] [PATCH 1/2] linux-generic: pktio: recover from transmit errors
Hello Matias, can you please review and test that patches in your env? To be sure that bug is totally fixed. Thank you, Maxim. On 11/10/2015 21:24, Stuart Haslam wrote: Fix the way transmit errors are handled to avoid getting out of sync between kernel and user space, which causes transmission to hang. Fixes bug https://bugs.linaro.org/show_bug.cgi?id=1890 Signed-off-by: Stuart Haslam --- platform/linux-generic/pktio/socket_mmap.c | 59 +- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/platform/linux-generic/pktio/socket_mmap.c b/platform/linux-generic/pktio/socket_mmap.c index 79ff82d..2bdb72b 100644 --- a/platform/linux-generic/pktio/socket_mmap.c +++ b/platform/linux-generic/pktio/socket_mmap.c @@ -182,6 +182,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, unsigned n, i = 0; unsigned nb_tx = 0; int send_errno; + int total_len = 0; first_frame_num = ring->frame_num; frame_num = first_frame_num; @@ -195,6 +196,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, pkt_len = odp_packet_len(pkt_table[i]); ppd.v2->tp_h.tp_snaplen = pkt_len; ppd.v2->tp_h.tp_len = pkt_len; + total_len += pkt_len; buf = (uint8_t *)ppd.raw + TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); @@ -215,28 +217,49 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, * failure a value of -1 is returned, even if the failure occurred * after some of the packets in the ring have already been sent, so we * need to inspect the packet status to determine which were sent. */ - for (frame_num = first_frame_num, n = 0; n < i; ++n) { - struct tpacket2_hdr *hdr = ring->rd[frame_num].iov_base; + if (odp_likely(ret == total_len)) { + nb_tx = i; + ring->frame_num = frame_num; + } else if (ret == -1) { + for (frame_num = first_frame_num, n = 0; n < i; ++n) { + struct tpacket2_hdr *hdr = ring->rd[frame_num].iov_base; + + if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE || + hdr->tp_status == TP_STATUS_SENDING)) { + nb_tx++; + } else { + /* The remaining frames weren't sent, clear +* their status to indicate we're not waiting +* for the kernel to process them. */ + hdr->tp_status = TP_STATUS_AVAILABLE; + } - if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) { - nb_tx++; - } else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) { - /* status will be cleared on the next send request */ - break; + if (++frame_num >= frame_count) + frame_num = 0; } - if (++frame_num >= frame_count) - frame_num = 0; - } - - ring->frame_num = (ring->frame_num + nb_tx) % frame_count; + ring->frame_num = (first_frame_num + nb_tx) % frame_count; + + if (nb_tx == 0 && SOCK_ERR_REPORT(send_errno)) { + __odp_errno = send_errno; + /* ENOBUFS indicates that the transmit queue is full, +* which will happen regularly when overloaded so don't +* print it */ + if (errno != ENOBUFS) + ODP_ERR("sendto(pkt mmap): %s\n", + strerror(send_errno)); + return -1; + } + } else { + /* Short send, return value is number of bytes sent so use this +* to determine number of complete frames sent. */ + for (n = 0; n < i && ret > 0; ++n) { + ret -= odp_packet_len(pkt_table[n]); + if (ret >= 0) + nb_tx++; + } - if (odp_unlikely(ret == -1 && -nb_tx == 0 && -SOCK_ERR_REPORT(send_errno))) { - __odp_errno = send_errno; - ODP_ERR("sendto(pkt mmap): %s\n", strerror(send_errno)); - return -1; + ring->frame_num = (first_frame_num + nb_tx) % frame_count; } for (i = 0; i < nb_tx; ++i) ___ lng-odp mailing list lng-odp@lists.linaro.org https://lists.linaro.org/mailman/listinfo/lng-odp
[lng-odp] [PATCH 1/2] linux-generic: pktio: recover from transmit errors
Fix the way transmit errors are handled to avoid getting out of sync between kernel and user space, which causes transmission to hang. Fixes bug https://bugs.linaro.org/show_bug.cgi?id=1890 Signed-off-by: Stuart Haslam --- platform/linux-generic/pktio/socket_mmap.c | 59 +- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/platform/linux-generic/pktio/socket_mmap.c b/platform/linux-generic/pktio/socket_mmap.c index 79ff82d..2bdb72b 100644 --- a/platform/linux-generic/pktio/socket_mmap.c +++ b/platform/linux-generic/pktio/socket_mmap.c @@ -182,6 +182,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, unsigned n, i = 0; unsigned nb_tx = 0; int send_errno; + int total_len = 0; first_frame_num = ring->frame_num; frame_num = first_frame_num; @@ -195,6 +196,7 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, pkt_len = odp_packet_len(pkt_table[i]); ppd.v2->tp_h.tp_snaplen = pkt_len; ppd.v2->tp_h.tp_len = pkt_len; + total_len += pkt_len; buf = (uint8_t *)ppd.raw + TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); @@ -215,28 +217,49 @@ static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, * failure a value of -1 is returned, even if the failure occurred * after some of the packets in the ring have already been sent, so we * need to inspect the packet status to determine which were sent. */ - for (frame_num = first_frame_num, n = 0; n < i; ++n) { - struct tpacket2_hdr *hdr = ring->rd[frame_num].iov_base; + if (odp_likely(ret == total_len)) { + nb_tx = i; + ring->frame_num = frame_num; + } else if (ret == -1) { + for (frame_num = first_frame_num, n = 0; n < i; ++n) { + struct tpacket2_hdr *hdr = ring->rd[frame_num].iov_base; + + if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE || + hdr->tp_status == TP_STATUS_SENDING)) { + nb_tx++; + } else { + /* The remaining frames weren't sent, clear +* their status to indicate we're not waiting +* for the kernel to process them. */ + hdr->tp_status = TP_STATUS_AVAILABLE; + } - if (odp_likely(hdr->tp_status == TP_STATUS_AVAILABLE)) { - nb_tx++; - } else if (hdr->tp_status & TP_STATUS_WRONG_FORMAT) { - /* status will be cleared on the next send request */ - break; + if (++frame_num >= frame_count) + frame_num = 0; } - if (++frame_num >= frame_count) - frame_num = 0; - } - - ring->frame_num = (ring->frame_num + nb_tx) % frame_count; + ring->frame_num = (first_frame_num + nb_tx) % frame_count; + + if (nb_tx == 0 && SOCK_ERR_REPORT(send_errno)) { + __odp_errno = send_errno; + /* ENOBUFS indicates that the transmit queue is full, +* which will happen regularly when overloaded so don't +* print it */ + if (errno != ENOBUFS) + ODP_ERR("sendto(pkt mmap): %s\n", + strerror(send_errno)); + return -1; + } + } else { + /* Short send, return value is number of bytes sent so use this +* to determine number of complete frames sent. */ + for (n = 0; n < i && ret > 0; ++n) { + ret -= odp_packet_len(pkt_table[n]); + if (ret >= 0) + nb_tx++; + } - if (odp_unlikely(ret == -1 && -nb_tx == 0 && -SOCK_ERR_REPORT(send_errno))) { - __odp_errno = send_errno; - ODP_ERR("sendto(pkt mmap): %s\n", strerror(send_errno)); - return -1; + ring->frame_num = (first_frame_num + nb_tx) % frame_count; } for (i = 0; i < nb_tx; ++i) -- 2.1.1 ___ lng-odp mailing list lng-odp@lists.linaro.org https://lists.linaro.org/mailman/listinfo/lng-odp