On Mon, Dec 16, 2019 at 03:22:00PM +0100, Ilya Maximets wrote: > Marking the structure as 64 bytes aligned forces compiler to produce > big holes in the containing structures in order to fulfill this > requirement. Also, any structure that contains this one as a member > automatically inherits this huge alignment making resulted memory > layout not efficient. For example, 'struct umem_pool' currently > uses 3 full cache lines (192 bytes) with only 32 bytes of actual data: > > struct umem_pool { > int index; /* 0 4 */ > unsigned int size; /* 4 4 */ > > /* XXX 56 bytes hole, try to pack */ > > /* --- cacheline 1 boundary (64 bytes) --- */ > struct ovs_spin lock __attribute__((__aligned__(64))); /* 64 64 */ > > /* XXX last struct has 48 bytes of padding */ > > /* --- cacheline 2 boundary (128 bytes) --- */ > void * * array; /* 128 8 */ > > /* size: 192, cachelines: 3, members: 4 */ > /* sum members: 80, holes: 1, sum holes: 56 */ > /* padding: 56 */ > /* paddings: 1, sum paddings: 48 */ > /* forced alignments: 1, forced holes: 1, sum forced holes: 56 */ > } __attribute__((__aligned__(64))); > > Actual alignment of a spin lock is required only for Tx queue locks > inside netdev-afxdp to avoid false sharing, in all other cases > alignment only produces inefficient memory usage.
yes, now I realize umem->lock does not have the false sharing problem. > > Also, CACHE_LINE_SIZE macro should be used instead of 64 as different > platforms may have different cache line sizes. > > Using PADDED_MEMBERS to avoid alignment inheritance. > > CC: William Tu <u9012...@gmail.com> > Fixes: ae36d63d7e3c ("ovs-thread: Make struct spin lock cache aligned.") > Signed-off-by: Ilya Maximets <i.maxim...@ovn.org> > --- Somehow my pahole stops working, but the patch looks good to me, and thanks for also fixing the include ordering. Acked-by: William Tu <u9012...@gmail.com> > include/openvswitch/thread.h | 2 +- > lib/netdev-afxdp.c | 20 ++++++++++++++------ > lib/netdev-afxdp.h | 13 +++++++------ > lib/netdev-linux-private.h | 2 +- > 4 files changed, 23 insertions(+), 14 deletions(-) > > diff --git a/include/openvswitch/thread.h b/include/openvswitch/thread.h > index 5053cb309..acc822904 100644 > --- a/include/openvswitch/thread.h > +++ b/include/openvswitch/thread.h > @@ -34,7 +34,7 @@ struct OVS_LOCKABLE ovs_mutex { > }; > > #ifdef HAVE_PTHREAD_SPIN_LOCK > -OVS_ALIGNED_STRUCT(64, OVS_LOCKABLE ovs_spin) { > +struct OVS_LOCKABLE ovs_spin { > pthread_spinlock_t lock; > const char *where; /* NULL if and only if uninitialized. */ > }; > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > index ca2dfd005..c59a671e7 100644 > --- a/lib/netdev-afxdp.c > +++ b/lib/netdev-afxdp.c > @@ -40,6 +40,7 @@ > #include "openvswitch/compiler.h" > #include "openvswitch/dynamic-string.h" > #include "openvswitch/list.h" > +#include "openvswitch/thread.h" > #include "openvswitch/vlog.h" > #include "packets.h" > #include "socket-util.h" > @@ -158,6 +159,13 @@ struct xsk_socket_info { > atomic_uint64_t tx_dropped; > }; > > +struct netdev_afxdp_tx_lock { > + /* Padding to make netdev_afxdp_tx_lock exactly one cache line long. */ > + PADDED_MEMBERS(CACHE_LINE_SIZE, > + struct ovs_spin lock; > + ); > +}; > + > #ifdef HAVE_XDP_NEED_WAKEUP > static inline void > xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, > @@ -512,10 +520,10 @@ xsk_configure_all(struct netdev *netdev) > } > > n_txq = netdev_n_txq(netdev); > - dev->tx_locks = xcalloc(n_txq, sizeof *dev->tx_locks); > + dev->tx_locks = xzalloc_cacheline(n_txq * sizeof *dev->tx_locks); > > for (i = 0; i < n_txq; i++) { > - ovs_spin_init(&dev->tx_locks[i]); > + ovs_spin_init(&dev->tx_locks[i].lock); > } > > return 0; > @@ -577,9 +585,9 @@ xsk_destroy_all(struct netdev *netdev) > > if (dev->tx_locks) { > for (i = 0; i < netdev_n_txq(netdev); i++) { > - ovs_spin_destroy(&dev->tx_locks[i]); > + ovs_spin_destroy(&dev->tx_locks[i].lock); > } > - free(dev->tx_locks); > + free_cacheline(dev->tx_locks); > dev->tx_locks = NULL; > } > } > @@ -1076,9 +1084,9 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, > dev = netdev_linux_cast(netdev); > qid = qid % netdev_n_txq(netdev); > > - ovs_spin_lock(&dev->tx_locks[qid]); > + ovs_spin_lock(&dev->tx_locks[qid].lock); > ret = __netdev_afxdp_batch_send(netdev, qid, batch); > - ovs_spin_unlock(&dev->tx_locks[qid]); > + ovs_spin_unlock(&dev->tx_locks[qid].lock); > } else { > ret = __netdev_afxdp_batch_send(netdev, qid, batch); > } > diff --git a/lib/netdev-afxdp.h b/lib/netdev-afxdp.h > index 8188bc669..ae6971efd 100644 > --- a/lib/netdev-afxdp.h > +++ b/lib/netdev-afxdp.h > @@ -34,15 +34,16 @@ enum afxdp_mode { > OVS_AF_XDP_MODE_MAX, > }; > > -struct netdev; > -struct xsk_socket_info; > -struct xdp_umem; > -struct dp_packet_batch; > -struct smap; > struct dp_packet; > +struct dp_packet_batch; > +struct netdev; > +struct netdev_afxdp_tx_lock; > +struct netdev_custom_stats; > struct netdev_rxq; > struct netdev_stats; > -struct netdev_custom_stats; > +struct smap; > +struct xdp_umem; > +struct xsk_socket_info; > > int netdev_afxdp_rxq_construct(struct netdev_rxq *rxq_); > void netdev_afxdp_rxq_destruct(struct netdev_rxq *rxq_); > diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h > index 8873caa9d..f08159aa7 100644 > --- a/lib/netdev-linux-private.h > +++ b/lib/netdev-linux-private.h > @@ -108,7 +108,7 @@ struct netdev_linux { > bool use_need_wakeup; > bool requested_need_wakeup; > > - struct ovs_spin *tx_locks; /* spin lock array for TX queues. */ > + struct netdev_afxdp_tx_lock *tx_locks; /* Array of locks for TX queues. > */ > #endif > }; > > -- > 2.17.1 > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev