On Wed, 17 Apr 2024 23:55:44 +0800 Heng Qi wrote:
> $ ethtool -c ethx
> ...
> rx-eqe-profile:
> {.usec =   1, .pkts = 256, .comps =   0,},
> {.usec =   8, .pkts = 256, .comps =   0,},
> {.usec =  64, .pkts = 256, .comps =   0,},
> {.usec = 128, .pkts = 256, .comps =   0,},
> {.usec = 256, .pkts = 256, .comps =   0,}
> rx-cqe-profile:   n/a
> tx-eqe-profile:   n/a
> tx-cqe-profile:   n/a

I don't think that exposing CQE vs EQE mode makes much sense here.
We enable the right mode via:

struct kernel_ethtool_coalesce {
        u8 use_cqe_mode_tx;
        u8 use_cqe_mode_rx;

the user needs to set the packets and usecs in an educated way 
(matching the mode). The kernel never changes the mode.

Am I missing something?

> +  -
> +    name: moderation

irq-moderation ?

> +    attributes:
> +      -
> +        name: usec
> +        type: u32
> +      -
> +        name: pkts
> +        type: u32
> +      -
> +        name: comps
> +        type: u32

> +++ b/include/linux/ethtool.h
> @@ -284,7 +284,11 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 
> *legacy_u32,
>  #define ETHTOOL_COALESCE_TX_AGGR_MAX_BYTES   BIT(24)
>  #define ETHTOOL_COALESCE_TX_AGGR_MAX_FRAMES  BIT(25)
>  #define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS  BIT(26)
> -#define ETHTOOL_COALESCE_ALL_PARAMS          GENMASK(26, 0)
> +#define ETHTOOL_COALESCE_RX_EQE_PROFILE         BIT(27)
> +#define ETHTOOL_COALESCE_RX_CQE_PROFILE         BIT(28)
> +#define ETHTOOL_COALESCE_TX_EQE_PROFILE         BIT(29)
> +#define ETHTOOL_COALESCE_TX_CQE_PROFILE         BIT(30)
> +#define ETHTOOL_COALESCE_ALL_PARAMS          GENMASK(30, 0)

I think it's better to add these to netdev_ops, see below.

> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index d45f330d083d..a1c7e9c2be86 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -80,6 +80,25 @@ struct xdp_frame;
>  struct xdp_metadata_ops;
>  struct xdp_md;
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)

Don't wrap type definitions in an ifdef.

> +struct dim_cq_moder;

Unnecessary.

> +#define NETDEV_PROFILE_USEC  BIT(0)  /* device supports usec field 
> modification */
> +#define NETDEV_PROFILE_PKTS  BIT(1)  /* device supports pkts field 
> modification */
> +#define NETDEV_PROFILE_COMPS BIT(2)  /* device supports comps field 
> modification */
> +
> +struct netdev_profile_moder {
> +     /* See NETDEV_PROFILE_* */
> +     unsigned int flags;
> +
> +     /* DIM profile lists for different dim cq modes */
> +     struct dim_cq_moder *rx_eqe_profile;
> +     struct dim_cq_moder *rx_cqe_profile;
> +     struct dim_cq_moder *tx_eqe_profile;
> +     struct dim_cq_moder *tx_cqe_profile;
> +};
> +#endif

All of this can move to the dim header. No need to grow netdevice.h

>  typedef u32 xdp_features_t;
>  
>  void synchronize_net(void);


> +static int dev_dim_profile_init(struct net_device *dev)
> +{
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +     u32 supported = dev->ethtool_ops->supported_coalesce_params;
> +
> +     struct netdev_profile_moder *moder;
> +     int length;
> +
> +     dev->moderation = kzalloc(sizeof(*dev->moderation), GFP_KERNEL);
> +     if (!dev->moderation)
> +             goto err_moder;

if the device has no DIM we should allocate nothing

> +     moder = dev->moderation;
> +     length = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_eqe_profile);
> +
> +     if (supported & ETHTOOL_COALESCE_RX_EQE_PROFILE) {
> +             moder->rx_eqe_profile = kmemdup(dim_rx_profile[0], length, 
> GFP_KERNEL);
> +             if (!moder->rx_eqe_profile)
> +                     goto err_rx_eqe;
> +     }
> +     if (supported & ETHTOOL_COALESCE_RX_CQE_PROFILE) {
> +             moder->rx_cqe_profile = kmemdup(dim_rx_profile[1], length, 
> GFP_KERNEL);
> +             if (!moder->rx_cqe_profile)
> +                     goto err_rx_cqe;
> +     }
> +     if (supported & ETHTOOL_COALESCE_TX_EQE_PROFILE) {
> +             moder->tx_eqe_profile = kmemdup(dim_tx_profile[0], length, 
> GFP_KERNEL);
> +             if (!moder->tx_eqe_profile)
> +                     goto err_tx_eqe;
> +     }
> +     if (supported & ETHTOOL_COALESCE_TX_CQE_PROFILE) {
> +             moder->tx_cqe_profile = kmemdup(dim_tx_profile[1], length, 
> GFP_KERNEL);
> +             if (!moder->tx_cqe_profile)
> +                     goto err_tx_cqe;
> +     }

This code should also live in dim rather than dev.c.

> +#endif
> +     return 0;
> +
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +err_tx_cqe:
> +     kfree(moder->tx_eqe_profile);
> +err_tx_eqe:
> +     kfree(moder->rx_cqe_profile);
> +err_rx_cqe:
> +     kfree(moder->rx_eqe_profile);
> +err_rx_eqe:
> +     kfree(moder);
> +err_moder:
> +     return -ENOMEM;
> +#endif
> +}
> +
>  /**
>   * register_netdevice() - register a network device
>   * @dev: device to register
> @@ -10258,6 +10310,10 @@ int register_netdevice(struct net_device *dev)
>       if (ret)
>               return ret;
>  
> +     ret = dev_dim_profile_init(dev);
> +     if (ret)
> +             return ret;

This is fine but the driver still has to manually do bunch of init:

                INIT_WORK(&vi->rq[i].dim.work, virtnet_rx_dim_work);
                vi->rq[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;

It'd be better to collect all this static info (flags, mode, work func)
in one place / struct, attached to netdev or netdev_ops. Then the
driver can call a helper like which only needs to take netdev and dim
as arguments.

>       spin_lock_init(&dev->addr_list_lock);
>       netdev_set_addr_lockdep_class(dev);
>  
> @@ -11011,6 +11067,27 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, 
> const char *name,
>  }
>  EXPORT_SYMBOL(alloc_netdev_mqs);
>  
> +static void netif_free_profile(struct net_device *dev)
> +{
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +     u32 supported = dev->ethtool_ops->supported_coalesce_params;
> +
> +     if (supported & ETHTOOL_COALESCE_RX_EQE_PROFILE)
> +             kfree(dev->moderation->rx_eqe_profile);

kfree(NULL) is valid, you don't have to check the flags

> +     if (supported & ETHTOOL_COALESCE_RX_CQE_PROFILE)
> +             kfree(dev->moderation->rx_cqe_profile);
> +
> +     if (supported & ETHTOOL_COALESCE_TX_EQE_PROFILE)
> +             kfree(dev->moderation->tx_eqe_profile);
> +
> +     if (supported & ETHTOOL_COALESCE_TX_CQE_PROFILE)
> +             kfree(dev->moderation->tx_cqe_profile);
> +
> +     kfree(dev->moderation);
> +#endif
> +}
> +
>  /**
>   * free_netdev - free network device
>   * @dev: device
> @@ -11036,6 +11113,8 @@ void free_netdev(struct net_device *dev)
>               return;
>       }
>  
> +     netif_free_profile(dev);
> +
>       netif_free_tx_queues(dev);
>       netif_free_rx_queues(dev);
>  
> diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
> index 83112c1a71ae..3a41840fbcc7 100644
> --- a/net/ethtool/coalesce.c
> +++ b/net/ethtool/coalesce.c
> @@ -1,5 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  
> +#include <linux/dim.h>
>  #include "netlink.h"
>  #include "common.h"
>  
> @@ -51,6 +52,10 @@ __CHECK_SUPPORTED_OFFSET(COALESCE_RX_MAX_FRAMES_HIGH);
>  __CHECK_SUPPORTED_OFFSET(COALESCE_TX_USECS_HIGH);
>  __CHECK_SUPPORTED_OFFSET(COALESCE_TX_MAX_FRAMES_HIGH);
>  __CHECK_SUPPORTED_OFFSET(COALESCE_RATE_SAMPLE_INTERVAL);
> +__CHECK_SUPPORTED_OFFSET(COALESCE_RX_EQE_PROFILE);
> +__CHECK_SUPPORTED_OFFSET(COALESCE_RX_CQE_PROFILE);
> +__CHECK_SUPPORTED_OFFSET(COALESCE_TX_EQE_PROFILE);
> +__CHECK_SUPPORTED_OFFSET(COALESCE_TX_CQE_PROFILE);
>  
>  const struct nla_policy ethnl_coalesce_get_policy[] = {
>       [ETHTOOL_A_COALESCE_HEADER]             =
> @@ -82,6 +87,14 @@ static int coalesce_prepare_data(const struct 
> ethnl_req_info *req_base,
>  static int coalesce_reply_size(const struct ethnl_req_info *req_base,
>                              const struct ethnl_reply_data *reply_base)
>  {
> +     int modersz = nla_total_size(0) + /* _MODERATIONS_MODERATION, nest */
> +                   nla_total_size(sizeof(u32)) + /* _MODERATION_USEC */
> +                   nla_total_size(sizeof(u32)) + /* _MODERATION_PKTS */
> +                   nla_total_size(sizeof(u32));  /* _MODERATION_COMPS */
> +
> +     int total_modersz = nla_total_size(0) +  /* _{R,T}X_{E,C}QE_PROFILE, 
> nest */
> +                     modersz * NET_DIM_PARAMS_NUM_PROFILES;
> +
>       return nla_total_size(sizeof(u32)) +    /* _RX_USECS */
>              nla_total_size(sizeof(u32)) +    /* _RX_MAX_FRAMES */
>              nla_total_size(sizeof(u32)) +    /* _RX_USECS_IRQ */
> @@ -108,7 +121,8 @@ static int coalesce_reply_size(const struct 
> ethnl_req_info *req_base,
>              nla_total_size(sizeof(u8)) +     /* _USE_CQE_MODE_RX */
>              nla_total_size(sizeof(u32)) +    /* _TX_AGGR_MAX_BYTES */
>              nla_total_size(sizeof(u32)) +    /* _TX_AGGR_MAX_FRAMES */
> -            nla_total_size(sizeof(u32));     /* _TX_AGGR_TIME_USECS */
> +            nla_total_size(sizeof(u32)) +    /* _TX_AGGR_TIME_USECS */
> +            total_modersz * 4;               /* _{R,T}X_{E,C}QE_PROFILE */
>  }
>  
>  static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val,
> @@ -127,6 +141,62 @@ static bool coalesce_put_bool(struct sk_buff *skb, u16 
> attr_type, u32 val,
>       return nla_put_u8(skb, attr_type, !!val);
>  }
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)

prefer using

        if (IS_ENABLED(CONFIG_DIMLIB))
                return -EOPNOTSUPP;

rather than hiding code from the compiler, where possible 

> +/**
> + * coalesce_put_profile - fill reply with a nla nest with four child nla 
> nests.
> + * @skb: socket buffer the message is stored in
> + * @attr_type: nest attr type ETHTOOL_A_COALESCE_*X_*QE_PROFILE
> + * @profile: data passed to userspace
> + * @supported_params: modifiable parameters supported by the driver
> + *
> + * Put a dim profile nest attribute. Refer to 
> ETHTOOL_A_MODERATIONS_MODERATION.
> + *
> + * Return: false to indicate successful placement or no placement, and
> + * true to pass the -EMSGSIZE error to the wrapper.

Why the bool? Doesn't most of the similar code return the error?

> + */
> +static bool coalesce_put_profile(struct sk_buff *skb, u16 attr_type,
> +                              const struct dim_cq_moder *profile,
> +                              u32 supported_params)
> +{
> +     struct nlattr *profile_attr, *moder_attr;
> +     bool emsg = !!-EMSGSIZE;
> +     int i;
> +
> +     if (!profile)
> +             return false;
> +
> +     if (!(supported_params & attr_to_mask(attr_type)))
> +             return false;
> +
> +     profile_attr = nla_nest_start(skb, attr_type);
> +     if (!profile_attr)
> +             return emsg;
> +
> +     for (i = 0; i < NET_DIM_PARAMS_NUM_PROFILES; i++) {
> +             moder_attr = nla_nest_start(skb, 
> ETHTOOL_A_MODERATIONS_MODERATION);
> +             if (!moder_attr)
> +                     goto nla_cancel_profile;
> +
> +             if (nla_put_u32(skb, ETHTOOL_A_MODERATION_USEC, 
> profile[i].usec) ||
> +                 nla_put_u32(skb, ETHTOOL_A_MODERATION_PKTS, 
> profile[i].pkts) ||
> +                 nla_put_u32(skb, ETHTOOL_A_MODERATION_COMPS, 
> profile[i].comps))

Only put attrs for supported fields

> +                     goto nla_cancel_moder;
> +
> +             nla_nest_end(skb, moder_attr);
> +     }
> +
> +     nla_nest_end(skb, profile_attr);
> +
> +     return 0;
> +
> +nla_cancel_moder:
> +     nla_nest_cancel(skb, moder_attr);
> +nla_cancel_profile:
> +     nla_nest_cancel(skb, profile_attr);
> +     return emsg;
> +}
> +#endif
> +
>  static int coalesce_fill_reply(struct sk_buff *skb,
>                              const struct ethnl_req_info *req_base,
>                              const struct ethnl_reply_data *reply_base)
> @@ -134,6 +204,9 @@ static int coalesce_fill_reply(struct sk_buff *skb,
>       const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
>       const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce;
>       const struct ethtool_coalesce *coal = &data->coalesce;
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +     struct net_device *dev = req_base->dev;
> +#endif
>       u32 supported = data->supported_params;
>  
>       if (coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS,
> @@ -192,6 +265,21 @@ static int coalesce_fill_reply(struct sk_buff *skb,
>                            kcoal->tx_aggr_time_usecs, supported))
>               return -EMSGSIZE;
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +     if (!(dev->moderation->flags & (NETDEV_PROFILE_USEC | 
> NETDEV_PROFILE_PKTS |
> +                                     NETDEV_PROFILE_COMPS)))
> +             return 0;
> +
> +     if (coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_EQE_PROFILE,
> +                              dev->moderation->rx_eqe_profile, supported) ||
> +         coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_CQE_PROFILE,
> +                              dev->moderation->rx_cqe_profile, supported) ||
> +         coalesce_put_profile(skb, ETHTOOL_A_COALESCE_TX_EQE_PROFILE,
> +                              dev->moderation->tx_eqe_profile, supported) ||
> +         coalesce_put_profile(skb, ETHTOOL_A_COALESCE_TX_CQE_PROFILE,
> +                              dev->moderation->tx_cqe_profile, supported))
> +             return -EMSGSIZE;
> +#endif
>       return 0;
>  }
>  
> @@ -227,7 +315,19 @@ const struct nla_policy ethnl_coalesce_set_policy[] = {
>       [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 },
>       [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 },
>       [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 },
> +     [ETHTOOL_A_COALESCE_RX_EQE_PROFILE]     = { .type = NLA_NESTED },
> +     [ETHTOOL_A_COALESCE_RX_CQE_PROFILE]     = { .type = NLA_NESTED },
> +     [ETHTOOL_A_COALESCE_TX_EQE_PROFILE]     = { .type = NLA_NESTED },
> +     [ETHTOOL_A_COALESCE_TX_CQE_PROFILE]     = { .type = NLA_NESTED },

NLA_POLICY_NESTED(coalesce_set_profile_policy)

> +};
> +
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +static const struct nla_policy coalesce_set_profile_policy[] = {
> +     [ETHTOOL_A_MODERATION_USEC]     = {.type = NLA_U32},
> +     [ETHTOOL_A_MODERATION_PKTS]     = {.type = NLA_U32},
> +     [ETHTOOL_A_MODERATION_COMPS]    = {.type = NLA_U32},
>  };
> +#endif
>  
>  static int
>  ethnl_set_coalesce_validate(struct ethnl_req_info *req_info,
> @@ -253,6 +353,76 @@ ethnl_set_coalesce_validate(struct ethnl_req_info 
> *req_info,
>       return 1;
>  }
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +/**
> + * ethnl_update_profile - get a nla nest with four child nla nests from 
> userspace.
> + * @dev: netdevice to update the profile
> + * @dst: data get from the driver and modified by ethnl_update_profile.
> + * @nests: nest attr ETHTOOL_A_COALESCE_*X_*QE_PROFILE to set driver's 
> profile.
> + * @extack: Netlink extended ack
> + *
> + * Layout of nests:
> + *   Nested ETHTOOL_A_COALESCE_*X_*QE_PROFILE attr
> + *     Nested ETHTOOL_A_MODERATIONS_MODERATION attr
> + *       ETHTOOL_A_MODERATION_USEC attr
> + *       ETHTOOL_A_MODERATION_PKTS attr
> + *       ETHTOOL_A_MODERATION_COMPS attr
> + *     ...
> + *     Nested ETHTOOL_A_MODERATIONS_MODERATION attr
> + *       ETHTOOL_A_MODERATION_USEC attr
> + *       ETHTOOL_A_MODERATION_PKTS attr
> + *       ETHTOOL_A_MODERATION_COMPS attr
> + *
> + * Return: 0 on success or a negative error code.
> + */
> +static int ethnl_update_profile(struct net_device *dev,
> +                             struct dim_cq_moder *dst,
> +                             const struct nlattr *nests,
> +                             struct netlink_ext_ack *extack)
> +{
> +     struct nlattr *tb_moder[ARRAY_SIZE(coalesce_set_profile_policy)];
> +     struct dim_cq_moder profile[NET_DIM_PARAMS_NUM_PROFILES];
> +     struct netdev_profile_moder *moder = dev->moderation;
> +     struct nlattr *nest;
> +     int ret, rem, i = 0;
> +
> +     if (!nests)
> +             return 0;
> +
> +     if (!dst)
> +             return -EOPNOTSUPP;
> +
> +     nla_for_each_nested_type(nest, ETHTOOL_A_MODERATIONS_MODERATION, nests, 
> rem) {
> +             ret = nla_parse_nested(tb_moder,
> +                                    ARRAY_SIZE(coalesce_set_profile_policy) 
> - 1,
> +                                    nest, coalesce_set_profile_policy,
> +                                    extack);
> +             if (ret)
> +                     return ret;
> +
> +             if (NL_REQ_ATTR_CHECK(extack, nest, tb_moder, 
> ETHTOOL_A_MODERATION_USEC) ||
> +                 NL_REQ_ATTR_CHECK(extack, nest, tb_moder, 
> ETHTOOL_A_MODERATION_PKTS) ||
> +                 NL_REQ_ATTR_CHECK(extack, nest, tb_moder, 
> ETHTOOL_A_MODERATION_COMPS))

Only require what the device supports, reject what it doesn't

> +                     return -EINVAL;
> +
> +             profile[i].usec = 
> nla_get_u32(tb_moder[ETHTOOL_A_MODERATION_USEC]);
> +             profile[i].pkts = 
> nla_get_u32(tb_moder[ETHTOOL_A_MODERATION_PKTS]);
> +             profile[i].comps = 
> nla_get_u32(tb_moder[ETHTOOL_A_MODERATION_COMPS]);
> +
> +             if ((dst[i].usec != profile[i].usec && !(moder->flags & 
> NETDEV_PROFILE_USEC)) ||
> +                 (dst[i].pkts != profile[i].pkts && !(moder->flags & 
> NETDEV_PROFILE_PKTS)) ||
> +                 (dst[i].comps != profile[i].comps && !(moder->flags & 
> NETDEV_PROFILE_COMPS)))
> +                     return -EOPNOTSUPP;
> +
> +             i++;
> +     }
> +
> +     memcpy(dst, profile, sizeof(profile));

Is this safe? I think you need to use some synchronization when
swapping profiles, maybe RCU?..

> +     return 0;
> +}
> +#endif
> +
>  static int
>  __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info,
>                    bool *dual_change)
> @@ -317,6 +487,35 @@ __ethnl_set_coalesce(struct ethnl_req_info *req_info, 
> struct genl_info *info,
>       ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs,
>                        tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod);
>  
> +#if IS_ENABLED(CONFIG_DIMLIB)
> +     ret = ethnl_update_profile(dev, dev->moderation->rx_eqe_profile,
> +                                tb[ETHTOOL_A_COALESCE_RX_EQE_PROFILE],
> +                                info->extack);
> +     if (ret < 0)
> +             return ret;
> +     ret = ethnl_update_profile(dev, dev->moderation->rx_cqe_profile,
> +                                tb[ETHTOOL_A_COALESCE_RX_CQE_PROFILE],
> +                                info->extack);
> +     if (ret < 0)
> +             return ret;
> +     ret = ethnl_update_profile(dev, dev->moderation->tx_eqe_profile,
> +                                tb[ETHTOOL_A_COALESCE_TX_EQE_PROFILE],
> +                                info->extack);
> +     if (ret < 0)
> +             return ret;
> +     ret = ethnl_update_profile(dev, dev->moderation->tx_cqe_profile,
> +                                tb[ETHTOOL_A_COALESCE_TX_CQE_PROFILE],
> +                                info->extack);
> +     if (ret < 0)
> +             return ret;
> +#else
> +     if (tb[ETHTOOL_A_COALESCE_RX_EQE_PROFILE] ||
> +         tb[ETHTOOL_A_COALESCE_RX_CQE_PROFILE] ||
> +         tb[ETHTOOL_A_COALESCE_TX_EQE_PROFILE] ||
> +         tb[ETHTOOL_A_COALESCE_TX_CQE_PROFILE])
> +             return -EOPNOTSUPP;
> +
> +#endif
>       /* Update operation modes */
>       ethnl_update_bool32(&coalesce.use_adaptive_rx_coalesce,
>                           tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX], &mod_mode);


Reply via email to