Re: [PATCH net-next v2 3/3] tun: AF_XDP Tx zero-copy support
On Wed, Feb 28, 2024 at 07:05:56PM +0800, Yunjian Wang wrote: > This patch set allows TUN to support the AF_XDP Tx zero-copy feature, > which can significantly reduce CPU utilization for XDP programs. Why no Rx ZC support though? What will happen if I try rxdrop xdpsock against tun with this patch? You clearly allow for that. > > Since commit fc72d1d54dd9 ("tuntap: XDP transmission"), the pointer > ring has been utilized to queue different types of pointers by encoding > the type into the lower bits. Therefore, we introduce a new flag, > TUN_XDP_DESC_FLAG(0x2UL), which allows us to enqueue XDP descriptors > and differentiate them from XDP buffers and sk_buffs. Additionally, a > spin lock is added for enabling and disabling operations on the xsk pool. > > The performance testing was performed on a Intel E5-2620 2.40GHz machine. > Traffic were generated/send through TUN(testpmd txonly with AF_XDP) > to VM (testpmd rxonly in guest). > > +--+-+-+-+ > | | copy |zero-copy| speedup | > +--+-+-+-+ > | UDP | Mpps | Mpps |%| > | 64 | 2.5 | 4.0 | 60% | > | 512 | 2.1 | 3.6 | 71% | > | 1024 | 1.9 | 3.3 | 73% | > +--+-+-+-+ > > Signed-off-by: Yunjian Wang > --- > drivers/net/tun.c | 177 +++-- > drivers/vhost/net.c| 4 + > include/linux/if_tun.h | 32 > 3 files changed, 208 insertions(+), 5 deletions(-) >
Re: [PATCH net-next] sfc: reduce the number of requested xdp ev queues
On Thu, Jan 21, 2021 at 06:11:30PM +0100, Jesper Dangaard Brouer wrote: > On Wed, 20 Jan 2021 13:27:59 -0800 > Ivan Babrou wrote: > > > Without this change the driver tries to allocate too many queues, > > breaching the number of available msi-x interrupts on machines > > with many logical cpus and default adapter settings: > > > > Insufficient resources for 12 XDP event queues (24 other channels, max 32) > > > > Which in turn triggers EINVAL on XDP processing: > > > > sfc :86:00.0 ext0: XDP TX failed (-22) Please mention in commit message *how* you are addressing/fixing this issue. > > > > Signed-off-by: Ivan Babrou > > --- > > I guess the patch is good in itself due to available msi-x interrupts. > > Per earlier discussion: What will happen if a CPU with an ID higher > than available XDP TX-queues redirect a packet out this driver? +1 on that question > > > > drivers/net/ethernet/sfc/efx_channels.c | 6 -- > > 1 file changed, 4 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/net/ethernet/sfc/efx_channels.c > > b/drivers/net/ethernet/sfc/efx_channels.c > > index a4a626e9cd9a..1bfeee283ea9 100644 > > --- a/drivers/net/ethernet/sfc/efx_channels.c > > +++ b/drivers/net/ethernet/sfc/efx_channels.c > > @@ -17,6 +17,7 @@ > > #include "rx_common.h" > > #include "nic.h" > > #include "sriov.h" > > +#include "workarounds.h" > > > > /* This is the first interrupt mode to try out of: > > * 0 => MSI-X > > @@ -137,6 +138,7 @@ static int efx_allocate_msix_channels(struct efx_nic > > *efx, > > { > > unsigned int n_channels = parallelism; > > int vec_count; > > + int tx_per_ev; > > int n_xdp_tx; > > int n_xdp_ev; > > > > @@ -149,9 +151,9 @@ static int efx_allocate_msix_channels(struct efx_nic > > *efx, > > * multiple tx queues, assuming tx and ev queues are both > > * maximum size. > > */ > > - > > + tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx); > > n_xdp_tx = num_possible_cpus(); > > - n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL); > > + n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev); > > > > vec_count = pci_msix_vec_count(efx->pci_dev); > > if (vec_count < 0) > > > > -- > Best regards, > Jesper Dangaard Brouer > MSc.CS, Principal Kernel Engineer at Red Hat > LinkedIn: http://www.linkedin.com/in/brouer >
Re: [PATCH net-next 3/4] mvpp2: add basic XDP support
On Thu, Jul 02, 2020 at 11:08:19AM +0300, ilias.apalodi...@linaro.org wrote: > On Tue, Jun 30, 2020 at 08:09:29PM +0200, Matteo Croce wrote: > > From: Matteo Croce > > > > Add XDP native support. > > By now only XDP_DROP, XDP_PASS and XDP_REDIRECT > > verdicts are supported. > > > > Co-developed-by: Sven Auhagen > > Signed-off-by: Sven Auhagen > > Signed-off-by: Matteo Croce > > --- > > [...] > > > } > > > > +static int > > +mvpp2_run_xdp(struct mvpp2_port *port, struct mvpp2_rx_queue *rxq, > > + struct bpf_prog *prog, struct xdp_buff *xdp, > > + struct page_pool *pp) > > +{ > > + unsigned int len, sync, err; > > + struct page *page; > > + u32 ret, act; > > + > > + len = xdp->data_end - xdp->data_hard_start - MVPP2_SKB_HEADROOM; > > + act = bpf_prog_run_xdp(prog, xdp); > > + > > + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ > > + sync = xdp->data_end - xdp->data_hard_start - MVPP2_SKB_HEADROOM; > > + sync = max(sync, len); > > + > > + switch (act) { > > + case XDP_PASS: > > + ret = MVPP2_XDP_PASS; > > + break; > > + case XDP_REDIRECT: > > + err = xdp_do_redirect(port->dev, xdp, prog); > > + if (unlikely(err)) { > > + ret = MVPP2_XDP_DROPPED; > > + page = virt_to_head_page(xdp->data); > > + page_pool_put_page(pp, page, sync, true); > > + } else { > > + ret = MVPP2_XDP_REDIR; > > + } > > + break; > > + default: > > + bpf_warn_invalid_xdp_action(act); > > + fallthrough; > > + case XDP_ABORTED: > > + trace_xdp_exception(port->dev, prog, act); > > + fallthrough; > > + case XDP_DROP: > > + page = virt_to_head_page(xdp->data); > > + page_pool_put_page(pp, page, sync, true); > > + ret = MVPP2_XDP_DROPPED; > > + break; > > + } > > + > > + return ret; > > +} > > + > > /* Main rx processing */ > > static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, > > int rx_todo, struct mvpp2_rx_queue *rxq) > > { > > struct net_device *dev = port->dev; > > + struct bpf_prog *xdp_prog; > > + struct xdp_buff xdp; > > int rx_received; > > int rx_done = 0; > > + u32 xdp_ret = 0; > > u32 rcvd_pkts = 0; > > u32 rcvd_bytes = 0; > > > > + rcu_read_lock(); > > + > > + xdp_prog = READ_ONCE(port->xdp_prog); > > + > > /* Get number of received packets and clamp the to-do */ > > rx_received = mvpp2_rxq_received(port, rxq->id); > > if (rx_todo > rx_received) > > @@ -3060,7 +3115,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct > > napi_struct *napi, > > dma_addr_t dma_addr; > > phys_addr_t phys_addr; > > u32 rx_status; > > - int pool, rx_bytes, err; > > + int pool, rx_bytes, err, ret; > > void *data; > > > > rx_done++; > > @@ -3096,6 +3151,33 @@ static int mvpp2_rx(struct mvpp2_port *port, struct > > napi_struct *napi, > > else > > frag_size = bm_pool->frag_size; > > > > + if (xdp_prog) { > > + xdp.data_hard_start = data; > > + xdp.data = data + MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM; > > + xdp.data_end = xdp.data + rx_bytes; > > + xdp.frame_sz = PAGE_SIZE; > > + > > + if (bm_pool->pkt_size == MVPP2_BM_SHORT_PKT_SIZE) > > + xdp.rxq = >xdp_rxq_short; > > + else > > + xdp.rxq = >xdp_rxq_long; > > + > > + xdp_set_data_meta_invalid(); > > + > > + ret = mvpp2_run_xdp(port, rxq, xdp_prog, , pp); > > + > > + if (ret) { > > + xdp_ret |= ret; > > + err = mvpp2_rx_refill(port, bm_pool, pp, pool); > > + if (err) { > > + netdev_err(port->dev, "failed to refill > > BM pools\n"); > > + goto err_drop_frame; > > + } > > + > > + continue; > > + } > > + } > > + > > skb = build_skb(data, frag_size); > > if (!skb) { > > netdev_warn(port->dev, "skb build failed\n"); > > @@ -3118,7 +3200,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct > > napi_struct *napi, > > rcvd_pkts++; > > rcvd_bytes += rx_bytes; > > > > - skb_reserve(skb, MVPP2_MH_SIZE + NET_SKB_PAD); > > + skb_reserve(skb, MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM); > > skb_put(skb, rx_bytes); > > skb->protocol = eth_type_trans(skb, dev); > > mvpp2_rx_csum(port, rx_status, skb); > > @@ -3133,6 +3215,8 @@ static int mvpp2_rx(struct
Re: [PATCH net v3] ixgbe: fix double clean of tx descriptors with xdp
On Thu, 22 Aug 2019 20:12:37 +0300 Ilya Maximets wrote: > Tx code doesn't clear the descriptors' status after cleaning. > So, if the budget is larger than number of used elems in a ring, some > descriptors will be accounted twice and xsk_umem_complete_tx will move > prod_tail far beyond the prod_head breaking the completion queue ring. > > Fix that by limiting the number of descriptors to clean by the number > of used descriptors in the tx ring. > > 'ixgbe_clean_xdp_tx_irq()' function refactored to look more like > 'ixgbe_xsk_clean_tx_ring()' since we're allowed to directly use > 'next_to_clean' and 'next_to_use' indexes. > > Fixes: 8221c5eba8c1 ("ixgbe: add AF_XDP zero-copy Tx support") > Signed-off-by: Ilya Maximets > --- > > Version 3: > * Reverted some refactoring made for v2. > * Eliminated 'budget' for tx clean. > * prefetch returned. > > Version 2: > * 'ixgbe_clean_xdp_tx_irq()' refactored to look more like > 'ixgbe_xsk_clean_tx_ring()'. > > drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 29 > 1 file changed, 11 insertions(+), 18 deletions(-) > > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c > b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c > index 6b609553329f..a3b6d8c89127 100644 > --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c > @@ -633,19 +633,17 @@ static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring > *tx_ring, > bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, > struct ixgbe_ring *tx_ring, int napi_budget) While you're at it, can you please as well remove the 'napi_budget' argument? It wasn't used at all even before your patch. I'm jumping late in, but I was really wondering and hesitated with taking part in discussion since the v1 of this patch - can you elaborate why simply clearing the DD bit wasn't sufficient? Maciej > { > + u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use; > unsigned int total_packets = 0, total_bytes = 0; > - u32 i = tx_ring->next_to_clean, xsk_frames = 0; > - unsigned int budget = q_vector->tx.work_limit; > struct xdp_umem *umem = tx_ring->xsk_umem; > union ixgbe_adv_tx_desc *tx_desc; > struct ixgbe_tx_buffer *tx_bi; > - bool xmit_done; > + u32 xsk_frames = 0; > > - tx_bi = _ring->tx_buffer_info[i]; > - tx_desc = IXGBE_TX_DESC(tx_ring, i); > - i -= tx_ring->count; > + tx_bi = _ring->tx_buffer_info[ntc]; > + tx_desc = IXGBE_TX_DESC(tx_ring, ntc); > > - do { > + while (ntc != ntu) { > if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) > break; > > @@ -661,22 +659,18 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector > *q_vector, > > tx_bi++; > tx_desc++; > - i++; > - if (unlikely(!i)) { > - i -= tx_ring->count; > + ntc++; > + if (unlikely(ntc == tx_ring->count)) { > + ntc = 0; > tx_bi = tx_ring->tx_buffer_info; > tx_desc = IXGBE_TX_DESC(tx_ring, 0); > } > > /* issue prefetch for next Tx descriptor */ > prefetch(tx_desc); > + } > > - /* update budget accounting */ > - budget--; > - } while (likely(budget)); > - > - i += tx_ring->count; > - tx_ring->next_to_clean = i; > + tx_ring->next_to_clean = ntc; > > u64_stats_update_begin(_ring->syncp); > tx_ring->stats.bytes += total_bytes; > @@ -688,8 +682,7 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector > *q_vector, > if (xsk_frames) > xsk_umem_complete_tx(umem, xsk_frames); > > - xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); > - return budget > 0 && xmit_done; > + return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); > } > > int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid)