Hi Slava

Questions inline.

From: Viacheslav Ovsiienko
> The rdma core library can map doorbell register in two ways, depending on
> the environment variable "MLX5_SHUT_UP_BF":
> 
>   - as regular cached memory, the variable is either missing or
>     set to zero. This type of mapping may cause the significant
>     doorbell register writing latency and requires explicit
>     memory write barrier to mitigate this issue and prevent
>     write combining.
> 
>   - as non-cached memory, the variable is present and set to
>     not "0" value. This type of mapping may cause performance
>     impact under heavy loading conditions but the explicit write
>     memory barrier is not required and it may improve core
>     performance.
> 
> This patch checks the mapping type and provides the memory barrier after
> writing to tx doorbell register if it is needed.
> The mapping type is extracted directly from the uar_mmap_offset field in
> the queue properties.
> 
> Fixes: 18a1c20044c0 ("net/mlx5: implement Tx burst template")
> 
> Signed-off-by: Viacheslav Ovsiienko <[email protected]>
> ---
>  drivers/net/mlx5/Makefile    |  5 +++++
>  drivers/net/mlx5/meson.build |  2 ++
>  drivers/net/mlx5/mlx5_defs.h |  8 ++++++++  drivers/net/mlx5/mlx5_rxtx.c
> | 17 ++++++++++++++++-  drivers/net/mlx5/mlx5_rxtx.h |  1 +
> drivers/net/mlx5/mlx5_txq.c  | 17 ++++++++++++++++-
>  6 files changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> d89a7b5..7100fa3 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -189,6 +189,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-
> config-h.sh
>               func mlx5dv_dr_action_create_dest_devx_tir \
>               $(AUTOCONF_OUTPUT)
>       $Q sh -- '$<' '$@' \
> +             HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD \
> +             infiniband/mlx5dv.h \
> +             enum MLX5_MMAP_GET_NC_PAGES_CMD \
> +             $(AUTOCONF_OUTPUT)
> +     $Q sh -- '$<' '$@' \
>               HAVE_ETHTOOL_LINK_MODE_25G \
>               /usr/include/linux/ethtool.h \
>               enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \ diff --
> git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index
> fb764fa..e56e018 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -128,6 +128,8 @@ if build
>               'mlx5dv_devx_obj_query_async' ],
>               [ 'HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR',
> 'infiniband/mlx5dv.h',
>               'mlx5dv_dr_action_create_dest_devx_tir' ],
> +             [ 'HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD',
> 'infiniband/mlx5dv.h',
> +             'MLX5_MMAP_GET_NC_PAGES_CMD' ],
>               [ 'HAVE_MLX5DV_DR', 'infiniband/mlx5dv.h',
>               'MLX5DV_DR_DOMAIN_TYPE_NIC_RX' ],
>               [ 'HAVE_MLX5DV_DR_ESWITCH', 'infiniband/mlx5dv.h', diff --
> git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index
> d7440fd..03a8086 100644
> --- a/drivers/net/mlx5/mlx5_defs.h
> +++ b/drivers/net/mlx5/mlx5_defs.h
> @@ -115,6 +115,14 @@
>  #define MLX5_UAR_PAGE_NUM_MAX 64
>  #define MLX5_UAR_PAGE_NUM_MASK ((MLX5_UAR_PAGE_NUM_MAX) -
> 1)
> 
> +/* Fields of memory mapping type in offset parameter of mmap() */
> +#define MLX5_UAR_MMAP_CMD_SHIFT 8 #define
> MLX5_UAR_MMAP_CMD_MASK 0xff
> +
> +#ifndef HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD #define
> +MLX5_MMAP_GET_NC_PAGES_CMD 3 #endif
> +
>  /* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */
> #define MLX5_MPRQ_STRIDE_NUM_N 6U
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index f540977..fa3aa15 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -4730,8 +4730,23 @@ enum mlx5_txcmp_code {
>        * to improve latencies. The pure software related data treatment
>        * can be completed after doorbell. Tx CQEs for this SQ are
>        * processed in this thread only by the polling.
> +      *
> +      * The rdma core library can map doorbell register in two ways,
> +      * depending on the environment variable "MLX5_SHUT_UP_BF":
> +      *
> +      * - as regular cached memory, the variable is either missing or
> +      *   set to zero. This type of mapping may cause the significant
> +      *   doorbell register writing latency and requires explicit
> +      *   memory write barrier to mitigate this issue and prevent
> +      *   write combining.
> +      *
> +      * - as non-cached memory, the variable is present and set to
> +      *   not "0" value. This type of mapping may cause performance
> +      *   impact under heavy loading conditions but the explicit write
> +      *   memory barrier is not required and it may improve core
> +      *   performance.
>        */
> -     mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, 0);
> +     mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc);
>       /* Not all of the mbufs may be stored into elts yet. */
>       part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent -
> loc.pkts_copy;
>       if (!MLX5_TXOFF_CONFIG(INLINE) && part) { diff --git
> a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index
> 4f73d91..ae3a763 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -277,6 +277,7 @@ struct mlx5_txq_data {
>       /* When set TX offload for tunneled packets are supported. */
>       uint16_t swp_en:1; /* Whether SW parser is enabled. */
>       uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */
> +     uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */
>       uint16_t inlen_send; /* Ordinary send data inline size. */
>       uint16_t inlen_empw; /* eMPW max packet size to inline. */
>       uint16_t inlen_mode; /* Minimal data length to inline. */ diff --git
> a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index
> 2b7d6c0..7ec2ef3 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -18,6 +18,7 @@
>  #pragma GCC diagnostic ignored "-Wpedantic"
>  #endif
>  #include <infiniband/verbs.h>
> +#include <infiniband/mlx5dv.h>
>  #ifdef PEDANTIC
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> @@ -241,14 +242,21 @@
>  {
>       struct mlx5_priv *priv = txq_ctrl->priv;
>       struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
> +     const size_t page_size = sysconf(_SC_PAGESIZE);
> +     unsigned int cmd;
>  #ifndef RTE_ARCH_64
>       unsigned int lock_idx;
> -     const size_t page_size = sysconf(_SC_PAGESIZE);
>  #endif
> 
>       assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
>       assert(ppriv);
>       ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
> +     /* Check the doorbell register mapping type. */
> +     cmd = txq_ctrl->uar_mmap_offset / page_size;
> +     cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
> +     cmd &= MLX5_UAR_MMAP_CMD_MASK;
> +     if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
> +             txq_ctrl->txq.db_nc = 1;

Are you sure we can't retrieve the value in compile time?

>  #ifndef RTE_ARCH_64
>       /* Assign an UAR lock according to UAR page number */
>       lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & @@ -281,6
> +289,7 @@
>       uintptr_t uar_va;
>       uintptr_t offset;
>       const size_t page_size = sysconf(_SC_PAGESIZE);
> +     unsigned int cmd;
> 
>       assert(ppriv);
>       /*
> @@ -300,6 +309,12 @@
>       }
>       addr = RTE_PTR_ADD(addr, offset);
>       ppriv->uar_table[txq->idx] = addr;
> +     /* Check the doorbell register mapping type. */
> +     cmd = txq_ctrl->uar_mmap_offset / page_size;
> +     cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
> +     cmd &= MLX5_UAR_MMAP_CMD_MASK;
> +     if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
> +             txq_ctrl->txq.db_nc = 1;

Looks like double code.
Maybe it is better to get the value by a MACRO?


>       return 0;
>  }
> 
> --
> 1.8.3.1

Reply via email to