Re: iwm(4): Improve iwm_rx_addbuf() error handling.
> Date: Thu, 6 Oct 2016 12:56:37 +0200 > From: Stefan Sperling > > On Thu, Oct 06, 2016 at 10:49:21AM +0200, Imre Vadasz wrote: > > Hi, > > This patch improves the error handling iwm_rx_addbuf(), specifically in > > out-of-memory and mbuf exhaustion cases. > > > > Keep an additional/spare bus_dmamap_t object around to make error handling > > for bus_dmamap_load_mbuf() failures easier in iwm_rx_addbuf(). > > This seems like an easy way to avoid having weird panic() cases in the code > > (like in iwn, which then tries to bus_dmamap_load_mbuf() the old mbuf and > > panics when that fails). > > > > Move mbuf modifications that are needed before calling ieee80211_input() > > in iwm_rx_rx_mpdu() to below the iwm_rx_addbuf call. Otherwise we can end > > up with a broken mbuf in the rx ring when iwm_rx_rx_mpdu() aborts. > > If memory is so tight that this panic triggers is it reasonable to > even try to recover? I honestly don't know. > > This problem exists in many drivers. > Are we going to add a spare map to all of them? > > $ egrep -i 'panic.*rx.*buf' pci/*c ic/*c > pci/if_ipw.c: panic("%s: could not load old rx mbuf", > pci/if_iwi.c: panic("%s: could not load old rx mbuf", > pci/if_iwm.c: panic("iwm: could not load RX mbuf"); > pci/if_iwn.c: panic("%s: could not load old RX mbuf", > pci/if_ix.c: panic("%s: ixgbe_rxeof: NULL mbuf in slot %d " > pci/if_nfe.c: panic("%s: could not load old rx mbuf", > pci/if_pcn.c: panic("pcn_add_rxbuf"); > pci/if_rtwn.c:panic("%s: could not load old RX mbuf", > pci/if_stge.c:panic("stge_add_rxbuf");/* XXX */ > pci/if_vio.c: panic("enqueue_prep for rx buffers: %d", r); > pci/if_wpi.c: panic("%s: could not load old RX mbuf", > ic/aic6915.c: panic("sf_add_rxbuf"); /* XXX */ > ic/atw.c: panic("atw_add_rxbuf"); /* XXX */ > ic/dwc_gmac.c:panic("%s: could not load old > rx mbuf", > ic/gem.c: panic("gem_add_rxbuf"); /* XXX */ > ic/malo.c:panic("%s: could not load old rx mbuf", > ic/rt2560.c: panic("%s: could not load old rx mbuf", > ic/rt2661.c: panic("%s: could not load old rx mbuf", > ic/rt2860.c: panic("%s: could not load old rx mbuf", > ic/smc83c170.c: panic("epic_add_rxbuf");/* XXX */ I think we're at the point where bus_dmamap_load() should never fail because of memory shortage. As part of the effort to make interrupt handlers mpsafe, we pre-allocate all the necessary resources. I think the sparc64 iommu code can still return ENOMEM, but only if you screw up and bus_dmam_load() something that doesn't fit into the resources created by bus_dmamap_create(). And that would almost certainly be a driver bug.
Re: iwm(4): Improve iwm_rx_addbuf() error handling.
On Thu, Oct 06, 2016 at 10:49:21AM +0200, Imre Vadasz wrote: > Hi, > This patch improves the error handling iwm_rx_addbuf(), specifically in > out-of-memory and mbuf exhaustion cases. > > Keep an additional/spare bus_dmamap_t object around to make error handling > for bus_dmamap_load_mbuf() failures easier in iwm_rx_addbuf(). > This seems like an easy way to avoid having weird panic() cases in the code > (like in iwn, which then tries to bus_dmamap_load_mbuf() the old mbuf and > panics when that fails). > > Move mbuf modifications that are needed before calling ieee80211_input() > in iwm_rx_rx_mpdu() to below the iwm_rx_addbuf call. Otherwise we can end > up with a broken mbuf in the rx ring when iwm_rx_rx_mpdu() aborts. If memory is so tight that this panic triggers is it reasonable to even try to recover? I honestly don't know. This problem exists in many drivers. Are we going to add a spare map to all of them? $ egrep -i 'panic.*rx.*buf' pci/*c ic/*c pci/if_ipw.c: panic("%s: could not load old rx mbuf", pci/if_iwi.c: panic("%s: could not load old rx mbuf", pci/if_iwm.c: panic("iwm: could not load RX mbuf"); pci/if_iwn.c: panic("%s: could not load old RX mbuf", pci/if_ix.c:panic("%s: ixgbe_rxeof: NULL mbuf in slot %d " pci/if_nfe.c: panic("%s: could not load old rx mbuf", pci/if_pcn.c: panic("pcn_add_rxbuf"); pci/if_rtwn.c: panic("%s: could not load old RX mbuf", pci/if_stge.c: panic("stge_add_rxbuf");/* XXX */ pci/if_vio.c: panic("enqueue_prep for rx buffers: %d", r); pci/if_wpi.c: panic("%s: could not load old RX mbuf", ic/aic6915.c: panic("sf_add_rxbuf"); /* XXX */ ic/atw.c: panic("atw_add_rxbuf"); /* XXX */ ic/dwc_gmac.c: panic("%s: could not load old rx mbuf", ic/gem.c: panic("gem_add_rxbuf"); /* XXX */ ic/malo.c: panic("%s: could not load old rx mbuf", ic/rt2560.c:panic("%s: could not load old rx mbuf", ic/rt2661.c:panic("%s: could not load old rx mbuf", ic/rt2860.c:panic("%s: could not load old rx mbuf", ic/smc83c170.c: panic("epic_add_rxbuf");/* XXX */
iwm(4): Improve iwm_rx_addbuf() error handling.
Hi, This patch improves the error handling iwm_rx_addbuf(), specifically in out-of-memory and mbuf exhaustion cases. Keep an additional/spare bus_dmamap_t object around to make error handling for bus_dmamap_load_mbuf() failures easier in iwm_rx_addbuf(). This seems like an easy way to avoid having weird panic() cases in the code (like in iwn, which then tries to bus_dmamap_load_mbuf() the old mbuf and panics when that fails). Move mbuf modifications that are needed before calling ieee80211_input() in iwm_rx_rx_mpdu() to below the iwm_rx_addbuf call. Otherwise we can end up with a broken mbuf in the rx ring when iwm_rx_rx_mpdu() aborts. Index: sys/dev/pci/if_iwm.c === RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v retrieving revision 1.142 diff -u -r1.142 if_iwm.c --- sys/dev/pci/if_iwm.c27 Sep 2016 15:54:33 - 1.142 +++ sys/dev/pci/if_iwm.c6 Oct 2016 08:38:18 - @@ -1009,6 +1009,18 @@ } ring->stat = ring->stat_dma.vaddr; + /* +* Allocate one more bus_dmamap_t for iwm_rx_addbuf() usage from +* iwm_rx_rx_mpdu(). +*/ + err = bus_dmamap_create(sc->sc_dmat, IWM_RBUF_SIZE, 1, + IWM_RBUF_SIZE, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, + &ring->spare_map); + if (err) { + printf("%s: could not create RX buf DMA map\n", + DEVNAME(sc)); + goto fail; + } for (i = 0; i < IWM_RX_RING_COUNT; i++) { struct iwm_rx_data *data = &ring->data[i]; @@ -1069,6 +1081,10 @@ iwm_dma_contig_free(&ring->desc_dma); iwm_dma_contig_free(&ring->stat_dma); + if (ring->spare_map != NULL) { + bus_dmamap_destroy(sc->sc_dmat, ring->spare_map); + ring->spare_map = NULL; + } for (i = 0; i < IWM_RX_RING_COUNT; i++) { struct iwm_rx_data *data = &ring->data[i]; @@ -3116,8 +3132,8 @@ struct iwm_rx_ring *ring = &sc->rxq; struct iwm_rx_data *data = &ring->data[idx]; struct mbuf *m; + bus_dmamap_t dmamap; int err; - int fatal = 0; m = m_gethdr(M_DONTWAIT, MT_DATA); if (m == NULL) @@ -3133,21 +3149,23 @@ return ENOBUFS; } - if (data->m != NULL) { - bus_dmamap_unload(sc->sc_dmat, data->map); - fatal = 1; - } - m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; - err = bus_dmamap_load_mbuf(sc->sc_dmat, data->map, m, + err = bus_dmamap_load_mbuf(sc->sc_dmat, ring->spare_map, m, BUS_DMA_READ|BUS_DMA_NOWAIT); if (err) { - /* XXX */ - if (fatal) - panic("iwm: could not load RX mbuf"); + printf("%s: could not load RX mbuf\n", DEVNAME(sc)); m_freem(m); return err; } + + if (data->m != NULL) + bus_dmamap_unload(sc->sc_dmat, data->map); + + /* Swap ring->spare_map with data->map */ + dmamap = data->map; + data->map = ring->spare_map; + ring->spare_map = dmamap; + data->m = m; bus_dmamap_sync(sc->sc_dmat, data->map, 0, size, BUS_DMASYNC_PREREAD); @@ -3255,7 +3273,7 @@ struct ieee80211_node *ni; struct ieee80211_channel *c = NULL; struct ieee80211_rxinfo rxi; - struct mbuf *m; + struct mbuf *m = data->m; struct iwm_rx_phy_info *phy_info; struct iwm_rx_mpdu_res_start *rx_res; int device_timestamp; @@ -3273,10 +3291,6 @@ rx_pkt_status = le32toh(*(uint32_t *)(pkt->data + sizeof(*rx_res) + len)); - m = data->m; - m->m_data = pkt->data + sizeof(*rx_res); - m->m_pkthdr.len = m->m_len = len; - if (__predict_false(phy_info->cfg_phy_cnt > 20)) return; @@ -3296,6 +3310,9 @@ if (iwm_rx_addbuf(sc, IWM_RBUF_SIZE, sc->rxq.cur) != 0) return; + + m->m_data = pkt->data + sizeof(*rx_res); + m->m_pkthdr.len = m->m_len = len; if (le32toh(phy_info->channel) < nitems(ic->ic_channels)) c = &ic->ic_channels[le32toh(phy_info->channel)]; Index: sys/dev/pci/if_iwmvar.h === RCS file: /cvs/src/sys/dev/pci/if_iwmvar.h,v retrieving revision 1.24 diff -u -r1.24 if_iwmvar.h --- sys/dev/pci/if_iwmvar.h 21 Sep 2016 13:53:18 - 1.24 +++ sys/dev/pci/if_iwmvar.h 6 Oct 2016 08:38:18 - @@ -276,6 +276,7 @@ uint32_t*desc; struct iwm_rb_status*stat; struct iwm_rx_data data[IWM_RX_RING_COUNT]; + bus_dmamap_tspare_map; int cur; };