> -----Original Message-----
> From: netdev-ow...@vger.kernel.org [mailto:netdev-ow...@vger.kernel.org]
> On Behalf Of Jiri Pirko
> Sent: Wednesday, October 11, 2017 1:11 PM
> To: netdev@vger.kernel.org
> Cc: da...@davemloft.net; j...@mojatatu.com; xiyou.wangc...@gmail.com;
> sae...@mellanox.com; mat...@mellanox.com; leo...@mellanox.com;
> ml...@mellanox.com; david.lai...@aculab.com; gerlitz...@gmail.com
> Subject: [patch net-next v2 2/4] net: sched: introduce per-egress action 
> device
> callbacks
> 
> From: Jiri Pirko <j...@mellanox.com>
> 
> Introduce infrastructure that allows drivers to register callbacks that are 
> called
> whenever tc would offload inserted rule and specified device acts as tc action
> egress device.
> 
> Signed-off-by: Jiri Pirko <j...@mellanox.com>
> ---
> v1->v2:
> - take rtnl for register/unregister
> ---
>  include/net/act_api.h |  34 ++++++++
>  include/net/pkt_cls.h |   2 +
>  net/sched/act_api.c   | 220
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  net/sched/cls_api.c   |  30 +++++++
>  4 files changed, 286 insertions(+)
> 
> diff --git a/include/net/act_api.h b/include/net/act_api.h index
> 900168a..f5e8c90 100644
> --- a/include/net/act_api.h
> +++ b/include/net/act_api.h
> @@ -174,4 +174,38 @@ static inline void tcf_action_stats_update(struct
> tc_action *a, u64 bytes,  #endif  }
> 
> +typedef int tc_setup_cb_t(enum tc_setup_type type,
> +                       void *type_data, void *cb_priv);
> +
> +#ifdef CONFIG_NET_CLS_ACT
> +int tc_setup_cb_egdev_register(const struct net_device *dev,
> +                            tc_setup_cb_t *cb, void *cb_priv); void
> +tc_setup_cb_egdev_unregister(const struct net_device *dev,
> +                               tc_setup_cb_t *cb, void *cb_priv); int
> +tc_setup_cb_egdev_call(const struct net_device *dev,
> +                        enum tc_setup_type type, void *type_data,
> +                        bool err_stop);
> +#else
> +static inline
> +int tc_setup_cb_egdev_register(const struct net_device *dev,
> +                            tc_setup_cb_t *cb, void *cb_priv) {
> +     return 0;
> +}
> +
> +static inline
> +void tc_setup_cb_egdev_unregister(const struct net_device *dev,
> +                               tc_setup_cb_t *cb, void *cb_priv) { }
> +
> +static inline
> +int tc_setup_cb_egdev_call(const struct net_device *dev,
> +                        enum tc_setup_type type, void *type_data,
> +                        bool err_stop)
> +{
> +     return 0;
> +}
> +#endif
> +
>  #endif
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index
> e80edd8..6f8149c 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -206,6 +206,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts
> *exts);  int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); 
>  int
> tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
>                    struct net_device **hw_dev);
> +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
> +                        void *type_data, bool err_stop);
> 
>  /**
>   * struct tcf_pkt_info - packet information diff --git a/net/sched/act_api.c
> b/net/sched/act_api.c index da6fa82..ac97db9 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -21,6 +21,8 @@
>  #include <linux/kmod.h>
>  #include <linux/err.h>
>  #include <linux/module.h>
> +#include <linux/rhashtable.h>
> +#include <linux/list.h>
>  #include <net/net_namespace.h>
>  #include <net/sock.h>
>  #include <net/sch_generic.h>
> @@ -1249,8 +1251,226 @@ static int tc_dump_action(struct sk_buff *skb,
> struct netlink_callback *cb)
>       return skb->len;
>  }
> 
> +struct tcf_action_net {
> +     struct rhashtable egdev_ht;
> +};
> +
> +static unsigned int tcf_action_net_id;
> +
> +struct tcf_action_egdev_cb {
> +     struct list_head list;
> +     tc_setup_cb_t *cb;
> +     void *cb_priv;
> +};
> +
> +struct tcf_action_egdev {
> +     struct rhash_head ht_node;
> +     const struct net_device *dev;
> +     unsigned int refcnt;
> +     struct list_head cb_list;
> +};
> +
> +static const struct rhashtable_params tcf_action_egdev_ht_params = {
> +     .key_offset = offsetof(struct tcf_action_egdev, dev),
> +     .head_offset = offsetof(struct tcf_action_egdev, ht_node),
> +     .key_len = sizeof(const struct net_device *), };
> +
> +static struct tcf_action_egdev *
> +tcf_action_egdev_lookup(const struct net_device *dev) {
> +     struct net *net = dev_net(dev);
> +     struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
> +
> +     return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
> +                                   tcf_action_egdev_ht_params);
> +}
> +
> +static struct tcf_action_egdev *
> +tcf_action_egdev_get(const struct net_device *dev) {
> +     struct tcf_action_egdev *egdev;
> +     struct tcf_action_net *tan;
> +
> +     egdev = tcf_action_egdev_lookup(dev);
> +     if (egdev)
> +             goto inc_ref;
> +
> +     egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
> +     if (!egdev)
> +             return NULL;
> +     INIT_LIST_HEAD(&egdev->cb_list);
> +     tan = net_generic(dev_net(dev), tcf_action_net_id);
> +     rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
> +                            tcf_action_egdev_ht_params);
> +
> +inc_ref:
> +     egdev->refcnt++;
> +     return egdev;
> +}
> +
> +static void tcf_action_egdev_put(struct tcf_action_egdev *egdev) {
> +     struct tcf_action_net *tan;
> +
> +     if (--egdev->refcnt)
> +             return;
> +     tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
> +     rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
> +                            tcf_action_egdev_ht_params);
> +     kfree(egdev);
> +}
> +
> +static struct tcf_action_egdev_cb *
> +tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
> +                        tc_setup_cb_t *cb, void *cb_priv) {
> +     struct tcf_action_egdev_cb *egdev_cb;
> +
> +     list_for_each_entry(egdev_cb, &egdev->cb_list, list)
> +             if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
> +                     return egdev_cb;
> +     return NULL;
> +}
> +
> +static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
> +                                 enum tc_setup_type type,
> +                                 void *type_data, bool err_stop) {
> +     struct tcf_action_egdev_cb *egdev_cb;
> +     int ok_count = 0;
> +     int err;
> +
> +     list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
> +             err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
> +             if (err) {
> +                     if (err_stop)
> +                             return err;
> +             } else {
> +                     ok_count++;
> +             }
> +     }
> +     return ok_count;
> +}
> +
> +static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
> +                                tc_setup_cb_t *cb, void *cb_priv) {
> +     struct tcf_action_egdev_cb *egdev_cb;
> +
> +     egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
> +     if (WARN_ON(egdev_cb))
> +             return -EEXIST;
> +     egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
> +     if (!egdev_cb)
> +             return -ENOMEM;
> +     egdev_cb->cb = cb;
> +     egdev_cb->cb_priv = cb_priv;
> +     list_add(&egdev_cb->list, &egdev->cb_list);
> +     return 0;
> +}
> +
> +static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
> +                                 tc_setup_cb_t *cb, void *cb_priv) {
> +     struct tcf_action_egdev_cb *egdev_cb;
> +
> +     egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
> +     if (WARN_ON(!egdev_cb))
> +             return;
> +     list_del(&egdev_cb->list);
> +     kfree(egdev_cb);
> +}
> +
> +static int __tc_setup_cb_egdev_register(const struct net_device *dev,
> +                                     tc_setup_cb_t *cb, void *cb_priv)
> +{
> +     struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
> +     int err;
> +
> +     if (!egdev)
> +             return -ENOMEM;
> +     err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
> +     if (err)
> +             goto err_cb_add;
> +     return 0;
> +
> +err_cb_add:
> +     tcf_action_egdev_put(egdev);
> +     return err;
> +}
> +int tc_setup_cb_egdev_register(const struct net_device *dev,
> +                            tc_setup_cb_t *cb, void *cb_priv) {
> +     int err;
> +
> +     rtnl_lock();
> +     err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
> +     rtnl_unlock();
> +     return err;
> +}
> +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
> +
> +static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
> +                                        tc_setup_cb_t *cb, void *cb_priv) {
> +     struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
> +
> +     if (WARN_ON(!egdev))
> +             return;
> +     tcf_action_egdev_cb_del(egdev, cb, cb_priv);
> +     tcf_action_egdev_put(egdev);
> +}
> +void tc_setup_cb_egdev_unregister(const struct net_device *dev,
> +                               tc_setup_cb_t *cb, void *cb_priv) {
> +     rtnl_lock();
> +     __tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
> +     rtnl_unlock();
> +}
> +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
> +
> +int tc_setup_cb_egdev_call(const struct net_device *dev,
> +                        enum tc_setup_type type, void *type_data,
> +                        bool err_stop)
> +{
> +     struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
> +
> +     if (!egdev)
> +             return 0;
> +     return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop); }
> +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
> +
> +static __net_init int tcf_action_net_init(struct net *net) {
> +     struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
> +
> +     return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
> }
> +
> +static void __net_exit tcf_action_net_exit(struct net *net) {
> +     struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
> +
> +     rhashtable_destroy(&tan->egdev_ht);
> +}
> +
> +static struct pernet_operations tcf_action_net_ops = {
> +     .init = tcf_action_net_init,
> +     .exit = tcf_action_net_exit,
> +     .id = &tcf_action_net_id,
> +     .size = sizeof(struct tcf_action_net), };
> +
>  static int __init tc_action_init(void)
>  {
> +     int err;
> +
> +     err = register_pernet_subsys(&tcf_action_net_ops);
> +     if (err)
> +             return err;
> +
>       rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
>       rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
>       rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action,
> tc_dump_action, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index
> 450873b..99f9432 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -1026,6 +1026,36 @@ int tcf_exts_get_dev(struct net_device *dev, struct
> tcf_exts *exts,  }  EXPORT_SYMBOL(tcf_exts_get_dev);
> 
> +int tcf_exts_egdev_cb_call(struct tcf_exts *exts, enum tc_setup_type type,
> +                        void *type_data, bool err_stop)
> +{
> +     int ok_count = 0;
> +#ifdef CONFIG_NET_CLS_ACT
> +     const struct tc_action *a;
> +     struct net_device *dev;
> +     LIST_HEAD(actions);
> +     int ret;
> +
> +     if (!tcf_exts_has_actions(exts))
> +             return 0;
> +
> +     tcf_exts_to_list(exts, &actions);
> +     list_for_each_entry(a, &actions, list) {
> +             if (!a->ops->get_dev)
> +                     continue;
> +             dev = a->ops->get_dev(a);
> +             if (!dev || !tc_can_offload(dev))
> +                     continue;
> +             ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
> +             if (ret < 0)
> +                     return ret;
> +             ok_count += ret;
> +     }
> +#endif
> +     return ok_count;
> +}
> +EXPORT_SYMBOL(tcf_exts_egdev_cb_call);
> +
>  static int __init tc_filter_init(void)
>  {
>       rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
> --
> 2.9.5

Hi Jiri,  I am just trying to understand this a bit specially in context of 
offloaded macvlan devices.
I am assuming that any action device [fox example - offloaded macvlan device] 
can register for tc callback function using " tc_setup_cb_egdev_register()"
It was hard for me to call this above API from offloaded macvlan flow 
[.ndo_dfwd_add_station] in PF driver,
since it already grabs rtnl_lock(), so I tried to register it from different 
context in PF driver somehow for the offloaded macvlan device.

Now, I didn't understand actually the usage of egdev callback registration here 
-
Consider this example -

tc qdisc add dev <pf_device> ingress
tc filter add dev <pf_device> prio 1 protocol ip parent ffff: flower skip_sw 
dst_ip 192.168.55.55/24 action mirred egress redirect dev mvlan_0

The above filter add command is received by the PF registered callback [which 
was supplied by tcf_block_cb_register()], the same command is also
received by the callback function which was supplied in 
tc_setup_cb_egdev_register().

What is the significance of getting two callbacks here ? Since, I can add the 
above said filter in first callback function even.
What am I supposed to do in second callback ? is there any special motivation 
for egdev callback registration ?

Thanks,
Manish














Reply via email to