[PATCH net-next] net/sched: cls_flower: Introduce support in SKIP SW flag

2016-06-05 Thread Amir Vadai
From: Amir Vadai 

In order to make a filter processed only by hardware, skip_sw flag
should be supplied. This is an addition to the already existing skip_hw
flag (filter will be processed by software only). If no flag is
specified, filter will be processed by both software and hardware.

If only hardware offloaded filters exist, fl_classify() will return
without doing anything.

A following userspace patch will be sent once kernel patch is accepted.

Example:

tc filter add dev enp0s9 protocol ip prio 20 parent : \
flower \
ip_proto 6 \
indev enp0s9 \
skip_sw \
action skbedit mark 0x1234

Signed-off-by: Amir Vadai 
---
 net/sched/cls_flower.c | 31 ++-
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 730aaca..d737492 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -66,6 +66,7 @@ struct cls_fl_filter {
struct fl_flow_key key;
struct list_head list;
u32 handle;
+   u32 flags;
struct rcu_head rcu;
 };
 
@@ -123,6 +124,9 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
 
+   if (!atomic_read(&head->ht.nelems))
+   return -1;
+
fl_clear_masked_range(&skb_key, &head->mask);
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
@@ -136,7 +140,7 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
f = rhashtable_lookup_fast(&head->ht,
   fl_key_get_start(&skb_mkey, &head->mask),
   head->ht_params);
-   if (f) {
+   if (f && !(f->flags & TCA_CLS_FLAGS_SKIP_SW)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
}
@@ -524,7 +528,6 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
struct cls_fl_filter *fnew;
struct nlattr *tb[TCA_FLOWER_MAX + 1];
struct fl_flow_mask mask = {};
-   u32 flags = 0;
int err;
 
if (!tca[TCA_OPTIONS])
@@ -552,8 +555,14 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
}
fnew->handle = handle;
 
-   if (tb[TCA_FLOWER_FLAGS])
-   flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+   if (tb[TCA_FLOWER_FLAGS]) {
+   fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+
+   if (!tc_flags_valid(fnew->flags)) {
+   err = -EINVAL;
+   goto errout;
+   }
+   }
 
err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
if (err)
@@ -563,10 +572,12 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
if (err)
goto errout;
 
-   err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
-head->ht_params);
-   if (err)
-   goto errout;
+   if (!(fnew->flags & TCA_CLS_FLAGS_SKIP_SW)) {
+   err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+head->ht_params);
+   if (err)
+   goto errout;
+   }
 
fl_hw_replace_filter(tp,
 &head->dissector,
@@ -574,7 +585,7 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
 &fnew->key,
 &fnew->exts,
 (unsigned long)fnew,
-flags);
+fnew->flags);
 
if (fold) {
rhashtable_remove_fast(&head->ht, &fold->ht_node,
@@ -734,6 +745,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, 
unsigned long fh,
  sizeof(key->tp.dst
goto nla_put_failure;
 
+   nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
+
if (tcf_exts_dump(skb, &f->exts))
goto nla_put_failure;
 
-- 
2.8.3



Re: [PATCH net-next] net/sched: cls_flower: Introduce support in SKIP SW flag

2016-06-07 Thread Amir Vadai
On Tue, Jun 07, 2016 at 08:37:40AM -0700, John Fastabend wrote:
> On 16-06-05 07:11 AM, Amir Vadai wrote:
> > From: Amir Vadai 
> > 
> > In order to make a filter processed only by hardware, skip_sw flag
> > should be supplied. This is an addition to the already existing skip_hw
> > flag (filter will be processed by software only). If no flag is
> > specified, filter will be processed by both software and hardware.
> > 
> > If only hardware offloaded filters exist, fl_classify() will return
> > without doing anything.
> > 
> > A following userspace patch will be sent once kernel patch is accepted.
> > 
> > Example:
> > 
> > tc filter add dev enp0s9 protocol ip prio 20 parent : \
> > flower \
> > ip_proto 6 \
> >     indev enp0s9 \
> > skip_sw \
> > action skbedit mark 0x1234
> > 
> > Signed-off-by: Amir Vadai 
> > ---
> 
> 
> 
> Looks good to me. Although we need to do the same error propagation in
> flower that Jakub just added to cls_u32.
Thanks John,
I will send a patch to return error when add to hw is failing and skip_sw
is set.

> 
> Acked-by: John Fastabend 
> 


[PATCH net-next] net/sched: cls_flower: Introduce support in SKIP SW flag

2016-06-13 Thread Amir Vadai
From: Amir Vadai 

In order to make a filter processed only by hardware, skip_sw flag
should be supplied. This is an addition to the already existing skip_hw
flag (filter will be processed by software only). If no flag is
specified, filter will be processed by both software and hardware.

If only hardware offloaded filters exist, fl_classify() will return
without doing anything.

A following userspace patch will be sent once kernel patch is accepted.

Example:

tc filter add dev enp0s9 protocol ip prio 20 parent : \
flower \
ip_proto 6 \
indev enp0s9 \
skip_sw \
action skbedit mark 0x1234

Signed-off-by: Amir Vadai 
---
 net/sched/cls_flower.c | 31 ++-
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 730aaca..d737492 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -66,6 +66,7 @@ struct cls_fl_filter {
struct fl_flow_key key;
struct list_head list;
u32 handle;
+   u32 flags;
struct rcu_head rcu;
 };
 
@@ -123,6 +124,9 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
 
+   if (!atomic_read(&head->ht.nelems))
+   return -1;
+
fl_clear_masked_range(&skb_key, &head->mask);
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
@@ -136,7 +140,7 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
f = rhashtable_lookup_fast(&head->ht,
   fl_key_get_start(&skb_mkey, &head->mask),
   head->ht_params);
-   if (f) {
+   if (f && !(f->flags & TCA_CLS_FLAGS_SKIP_SW)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
}
@@ -524,7 +528,6 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
struct cls_fl_filter *fnew;
struct nlattr *tb[TCA_FLOWER_MAX + 1];
struct fl_flow_mask mask = {};
-   u32 flags = 0;
int err;
 
if (!tca[TCA_OPTIONS])
@@ -552,8 +555,14 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
}
fnew->handle = handle;
 
-   if (tb[TCA_FLOWER_FLAGS])
-   flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+   if (tb[TCA_FLOWER_FLAGS]) {
+   fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+
+   if (!tc_flags_valid(fnew->flags)) {
+   err = -EINVAL;
+   goto errout;
+   }
+   }
 
err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
if (err)
@@ -563,10 +572,12 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
if (err)
goto errout;
 
-   err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
-head->ht_params);
-   if (err)
-   goto errout;
+   if (!(fnew->flags & TCA_CLS_FLAGS_SKIP_SW)) {
+   err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+head->ht_params);
+   if (err)
+   goto errout;
+   }
 
fl_hw_replace_filter(tp,
 &head->dissector,
@@ -574,7 +585,7 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
 &fnew->key,
 &fnew->exts,
 (unsigned long)fnew,
-flags);
+fnew->flags);
 
if (fold) {
rhashtable_remove_fast(&head->ht, &fold->ht_node,
@@ -734,6 +745,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, 
unsigned long fh,
  sizeof(key->tp.dst
goto nla_put_failure;
 
+   nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
+
if (tcf_exts_dump(skb, &f->exts))
goto nla_put_failure;
 
-- 
2.8.3



Re: [PATCH net-next] net/sched: cls_flower: Introduce support in SKIP SW flag

2016-06-13 Thread Amir Vadai
On Mon, Jun 13, 2016 at 11:58:12AM +0300, Amir Vadai wrote:
> From: Amir Vadai 
> 
> In order to make a filter processed only by hardware, skip_sw flag
> should be supplied. This is an addition to the already existing skip_hw
> flag (filter will be processed by software only). If no flag is
> specified, filter will be processed by both software and hardware.
> 
> If only hardware offloaded filters exist, fl_classify() will return
> without doing anything.
> 
> A following userspace patch will be sent once kernel patch is accepted.
> 
> Example:
> 
> tc filter add dev enp0s9 protocol ip prio 20 parent : \
>   flower \
>   ip_proto 6 \
>   indev enp0s9 \
>   skip_sw \
>   action skbedit mark 0x1234
> 
> Signed-off-by: Amir Vadai 
> ---

Please ignore this mail - wrong patch sent.

Amir


[PATCH net-next] net/sched: flower: Return error when hw can't offload and skip_sw is set

2016-06-13 Thread Amir Vadai
From: Amir Vadai 

When skip_sw is set and hardware fails to apply filter, return error to
user. This will make error propagation logic similar to the one
currently used in u32 classifier.
Also, changed code to use tc_skip_sw() utility function.

Signed-off-by: Amir Vadai 
---
 net/sched/cls_flower.c | 42 +-
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1ea6f76..5060801 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -140,7 +140,7 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
f = rhashtable_lookup_fast(&head->ht,
   fl_key_get_start(&skb_mkey, &head->mask),
   head->ht_params);
-   if (f && !(f->flags & TCA_CLS_FLAGS_SKIP_SW)) {
+   if (f && !tc_skip_sw(f->flags)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
}
@@ -187,19 +187,20 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, 
unsigned long cookie)
dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
 }
 
-static void fl_hw_replace_filter(struct tcf_proto *tp,
-struct flow_dissector *dissector,
-struct fl_flow_key *mask,
-struct fl_flow_key *key,
-struct tcf_exts *actions,
-unsigned long cookie, u32 flags)
+static int fl_hw_replace_filter(struct tcf_proto *tp,
+   struct flow_dissector *dissector,
+   struct fl_flow_key *mask,
+   struct fl_flow_key *key,
+   struct tcf_exts *actions,
+   unsigned long cookie, u32 flags)
 {
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
struct tc_to_netdev tc;
+   int err;
 
if (!tc_should_offload(dev, tp, flags))
-   return;
+   return tc_skip_sw(flags) ? -EINVAL : 0;
 
offload.command = TC_CLSFLOWER_REPLACE;
offload.cookie = cookie;
@@ -211,7 +212,12 @@ static void fl_hw_replace_filter(struct tcf_proto *tp,
tc.type = TC_SETUP_CLSFLOWER;
tc.cls_flower = &offload;
 
-   dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+   err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, 
&tc);
+
+   if (tc_skip_sw(flags))
+   return err;
+
+   return 0;
 }
 
 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
@@ -572,20 +578,22 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
if (err)
goto errout;
 
-   if (!(fnew->flags & TCA_CLS_FLAGS_SKIP_SW)) {
+   if (!tc_skip_sw(fnew->flags)) {
err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
 head->ht_params);
if (err)
goto errout;
}
 
-   fl_hw_replace_filter(tp,
-&head->dissector,
-&mask.key,
-&fnew->key,
-&fnew->exts,
-(unsigned long)fnew,
-fnew->flags);
+   err = fl_hw_replace_filter(tp,
+  &head->dissector,
+  &mask.key,
+  &fnew->key,
+  &fnew->exts,
+  (unsigned long)fnew,
+  fnew->flags);
+   if (err)
+   goto errout;
 
if (fold) {
rhashtable_remove_fast(&head->ht, &fold->ht_node,
-- 
2.8.3



[PATCH net-next 2/8] net/sched: act_gact: Update statistics when offloaded to hardware

2016-05-13 Thread Amir Vadai
From: Amir Vadai 

Implement the stats_update callback that will be called by NIC drivers
for hardware offloaded filters.

Signed-off-by: Amir Vadai 
---
 net/sched/act_gact.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 1a6e09f..ec5cc84 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -148,6 +148,20 @@ static int tcf_gact(struct sk_buff *skb, const struct 
tc_action *a,
return action;
 }
 
+static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+ u64 lastuse)
+{
+   struct tcf_gact *gact = a->priv;
+   int action = READ_ONCE(gact->tcf_action);
+   struct tcf_t *tm = &gact->tcf_tm;
+
+   _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, 
packets);
+   if (action == TC_ACT_SHOT)
+   this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
+
+   tm->lastuse = lastuse;
+}
+
 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, 
int ref)
 {
unsigned char *b = skb_tail_pointer(skb);
@@ -207,6 +221,7 @@ static struct tc_action_ops act_gact_ops = {
.type   =   TCA_ACT_GACT,
.owner  =   THIS_MODULE,
.act=   tcf_gact,
+   .stats_update   =   tcf_gact_stats_update,
.dump   =   tcf_gact_dump,
.init   =   tcf_gact_init,
.walk   =   tcf_gact_walker,
-- 
2.8.0



[PATCH net-next 3/8] net/sched: cls_flower: Hardware offloaded filters statistics support

2016-05-13 Thread Amir Vadai
From: Amir Vadai 

Introduce a new command in ndo_setup_tc() for hardware offloaded
filters, to call the NIC driver, and make it update the statistics.
This will be done before dumping the filter and its statistics.

Signed-off-by: Amir Vadai 
---
 include/net/pkt_cls.h  |  1 +
 net/sched/cls_flower.c | 21 +
 2 files changed, 22 insertions(+)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index caa5e18..cc561a8 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -412,6 +412,7 @@ static inline bool tc_should_offload(struct net_device 
*dev, u32 flags)
 enum tc_fl_command {
TC_CLSFLOWER_REPLACE,
TC_CLSFLOWER_DESTROY,
+   TC_CLSFLOWER_STATS,
 };
 
 struct tc_cls_flower_offload {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 2181ffc..730aaca 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -210,6 +210,25 @@ static void fl_hw_replace_filter(struct tcf_proto *tp,
dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
 }
 
+static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
+{
+   struct net_device *dev = tp->q->dev_queue->dev;
+   struct tc_cls_flower_offload offload = {0};
+   struct tc_to_netdev tc;
+
+   if (!tc_should_offload(dev, 0))
+   return;
+
+   offload.command = TC_CLSFLOWER_STATS;
+   offload.cookie = (unsigned long)f;
+   offload.exts = &f->exts;
+
+   tc.type = TC_SETUP_CLSFLOWER;
+   tc.cls_flower = &offload;
+
+   dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
 static bool fl_destroy(struct tcf_proto *tp, bool force)
 {
struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -662,6 +681,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, 
unsigned long fh,
goto nla_put_failure;
}
 
+   fl_hw_update_stats(tp, f);
+
if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
sizeof(key->eth.dst)) ||
-- 
2.8.0



[PATCH net-next 1/8] net/sched: Enable netdev drivers to update statistics of offloaded actions

2016-05-13 Thread Amir Vadai
From: Amir Vadai 

Introduce stats_update callback. netdev driver could call it for offloaded
actions to update the basic statistics (packets, bytes and last use).
Since bstats_update() and bstats_cpu_update() use skb as an argument to
get the counters, _bstats_update() and _bstats_cpu_update(), that get
bytes and packets as arguments, were added.

Signed-off-by: Amir Vadai 
---
 include/net/act_api.h | 12 
 include/net/sch_generic.h | 20 ++--
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 03e322b..2cd9e9b 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -106,6 +106,7 @@ struct tc_action_ops {
int bind);
int (*walk)(struct net *, struct sk_buff *,
struct netlink_callback *, int, struct tc_action *);
+   void(*stats_update)(struct tc_action *, u64, u32, u64);
 };
 
 struct tc_action_net {
@@ -178,10 +179,21 @@ int tcf_action_copy_stats(struct sk_buff *, struct 
tc_action *, int);
 
 #define tc_for_each_action(_a, _exts) \
list_for_each_entry(a, &(_exts)->actions, list)
+
+static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
+  u64 packets, u64 lastuse)
+{
+   if (!a->ops->stats_update)
+   return;
+
+   a->ops->stats_update(a, bytes, packets, lastuse);
+}
+
 #else /* CONFIG_NET_CLS_ACT */
 
 #define tc_no_actions(_exts) true
 #define tc_for_each_action(_a, _exts) while (0)
+#define tcf_action_stats_update(a, bytes, packets, lastuse)
 
 #endif /* CONFIG_NET_CLS_ACT */
 #endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 46e55f0..a1fd76c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -527,11 +527,27 @@ static inline bool qdisc_is_percpu_stats(const struct 
Qdisc *q)
return q->flags & TCQ_F_CPUSTATS;
 }
 
+static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
+ __u64 bytes, __u32 packets)
+{
+   bstats->bytes += bytes;
+   bstats->packets += packets;
+}
+
 static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
 const struct sk_buff *skb)
 {
-   bstats->bytes += qdisc_pkt_len(skb);
-   bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+   _bstats_update(bstats,
+  qdisc_pkt_len(skb),
+  skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
+}
+
+static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
+ __u64 bytes, __u32 packets)
+{
+   u64_stats_update_begin(&bstats->syncp);
+   _bstats_update(&bstats->bstats, bytes, packets);
+   u64_stats_update_end(&bstats->syncp);
 }
 
 static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
-- 
2.8.0



[PATCH net-next 0/8] sched,mlx5: Offloaded TC flower filter statistics

2016-05-13 Thread Amir Vadai
Hi Dave,

This patchset introduces counters support for offloaded cls_flower filters.
When the user calls 'tc show -s ..', fl_dump is called.
Before fl_dump() returns the statistics, it calls the NIC driver (using a new
ndo_setup_tc() command - TC_CLSFLOWER_STATS) to read the hardware counters and
update the statistics accordingly. A new TC action op was added (stats_update())
to be used by the NIC driver to update the statistics.

Patchset was applied and tested over commit ed7cbbc ("udp: Resolve NULL pointer
dereference over flow-based vxlan device")

Thanks,
Amir

Amir Vadai (8):
  net/sched: Enable netdev drivers to update statistics of offloaded
actions
  net/sched: act_gact: Update statistics when offloaded to hardware
  net/sched: cls_flower: Hardware offloaded filters statistics support
  net/mlx5_core: Use a macro in mlx5_command_str()
  net/mlx5_core: Firmware commands to support flow counters
  net/mlx5_core: Introduce flow steering destination of type counter
  net/mlx5_core: Flow counters infrastructure
  net/mlx5e: Hardware offloaded flower filter statistics support

 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  | 309 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c|  71 -
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h|   3 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   | 102 ++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   |   6 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  59 +++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  26 ++
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 226 +++
 include/linux/mlx5/driver.h|  14 +
 include/linux/mlx5/fs.h|   7 +
 include/linux/mlx5/mlx5_ifc.h  | 101 ++-
 include/net/act_api.h  |  12 +
 include/net/pkt_cls.h  |   1 +
 include/net/sch_generic.h  |  20 +-
 net/sched/act_gact.c   |  15 +
 net/sched/cls_flower.c |  21 ++
 18 files changed, 802 insertions(+), 195 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c

-- 
2.8.0



[PATCH net-next 6/8] net/mlx5_core: Introduce flow steering destination of type counter

2016-05-13 Thread Amir Vadai
From: Amir Vadai 

When adding a flow steering rule with a counter, need to supply a
destination of type MLX5_FLOW_DESTINATION_TYPE_COUNTER, with a pointer
to a struct mlx5_fc.
Also, MLX5_FLOW_CONTEXT_ACTION_COUNT bit should be set in the action.

Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 36 +---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h  |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 52 +--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 23 ++
 include/linux/mlx5/fs.h   |  2 +
 include/linux/mlx5/mlx5_ifc.h |  2 +
 6 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index ccb63a0..a5bb6b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -241,17 +241,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
MLX5_SET(flow_context, in_flow_context, action, fte->action);
-   MLX5_SET(flow_context, in_flow_context, destination_list_size,
-fte->dests_size);
in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
  match_value);
memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
 
+   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, 
destination);
+   int list_size = 0;
+
list_for_each_entry(dst, &fte->node.children, node.list) {
unsigned int id;
 
+   if (dst->dest_attr.type == 
MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+   continue;
+
MLX5_SET(dest_format_struct, in_dests, destination_type,
 dst->dest_attr.type);
if (dst->dest_attr.type ==
@@ -262,8 +265,31 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
}
MLX5_SET(dest_format_struct, in_dests, destination_id, 
id);
in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   list_size++;
+   }
+
+   MLX5_SET(flow_context, in_flow_context, destination_list_size,
+list_size);
+   }
+
+   if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+   int list_size = 0;
+
+   list_for_each_entry(dst, &fte->node.children, node.list) {
+   if (dst->dest_attr.type !=
+   MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+   continue;
+
+   MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
+dst->dest_attr.counter->id);
+   in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   list_size++;
}
+
+   MLX5_SET(flow_context, in_flow_context, flow_counter_list_size,
+list_size);
}
+
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
 sizeof(out));
@@ -283,18 +309,16 @@ int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
 int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
unsigned group_id,
+   int modify_mask,
struct fs_fte *fte)
 {
int opmod;
-   int modify_mask;
int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,

flow_table_properties_nic_receive.
flow_modify_en);
if (!atomic_mod_cap)
return -ENOTSUPP;
opmod = 1;
-   modify_mask = 1 <<
-   MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST;
 
return  mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 18c111a..fc4f7b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -62,6 +62,7 @@ int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
 int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
unsigned group_id,
+   

[PATCH net-next 8/8] net/mlx5e: Hardware offloaded flower filter statistics support

2016-05-13 Thread Amir Vadai
From: Amir Vadai 

Introduce support in updating statistics of offloaded TC flower
classifiers. Currently only the DROP action is supported.

Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 71 ---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  3 +
 3 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0804070..fd43929 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2154,6 +2154,8 @@ static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 
handle,
return mlx5e_configure_flower(priv, proto, 
tc->cls_flower);
case TC_CLSFLOWER_DESTROY:
return mlx5e_delete_flower(priv, tc->cls_flower);
+   case TC_CLSFLOWER_STATS:
+   return mlx5e_stats_flower(priv, tc->cls_flower);
}
default:
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index ef017c0..704c3d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -53,13 +53,24 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct 
mlx5e_priv *priv,
u32 *match_c, u32 *match_v,
u32 action, u32 flow_tag)
 {
-   struct mlx5_flow_destination dest = {
-   .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
-   {.ft = priv->fs.vlan.ft.t},
-   };
+   struct mlx5_core_dev *dev = priv->mdev;
+   struct mlx5_flow_destination dest = { 0 };
+   struct mlx5_fc *counter = NULL;
struct mlx5_flow_rule *rule;
bool table_created = false;
 
+   if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+   dest.ft = priv->fs.vlan.ft.t;
+   } else {
+   counter = mlx5_fc_create(dev, true);
+   if (IS_ERR(counter))
+   return ERR_CAST(counter);
+
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+   dest.counter = counter;
+   }
+
if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
priv->fs.tc.t =
mlx5_create_auto_grouped_flow_table(priv->fs.ns,
@@ -70,7 +81,8 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct 
mlx5e_priv *priv,
if (IS_ERR(priv->fs.tc.t)) {
netdev_err(priv->netdev,
   "Failed to create tc offload table\n");
-   return ERR_CAST(priv->fs.tc.t);
+   rule = ERR_CAST(priv->fs.tc.t);
+   goto err_create_ft;
}
 
table_created = true;
@@ -79,12 +91,20 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct 
mlx5e_priv *priv,
rule = mlx5_add_flow_rule(priv->fs.tc.t, MLX5_MATCH_OUTER_HEADERS,
  match_c, match_v,
  action, flow_tag,
- action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST ? 
&dest : NULL);
+ &dest);
+
+   if (IS_ERR(rule))
+   goto err_add_rule;
+
+   return rule;
 
-   if (IS_ERR(rule) && table_created) {
+err_add_rule:
+   if (table_created) {
mlx5_destroy_flow_table(priv->fs.tc.t);
priv->fs.tc.t = NULL;
}
+err_create_ft:
+   mlx5_fc_destroy(dev, counter);
 
return rule;
 }
@@ -92,8 +112,14 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct 
mlx5e_priv *priv,
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
  struct mlx5_flow_rule *rule)
 {
+   struct mlx5_fc *counter = NULL;
+
+   counter = mlx5_flow_rule_counter(rule);
+
mlx5_del_flow_rule(rule);
 
+   mlx5_fc_destroy(priv->mdev, counter);
+
if (!mlx5e_tc_num_filters(priv)) {
mlx5_destroy_flow_table(priv->fs.tc.t);
priv->fs.tc.t = NULL;
@@ -286,6 +312,9 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct 
tcf_exts *exts,
 
if (is_tcf_gact_shot(a)) {
*action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+   if (MLX5_CAP_FLOWTABLE(priv->mdev,
+  
flow_table_properties_nic_receive.flow_counter))
+   *action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
continue;
}
 
@@ 

[PATCH net-next 4/8] net/mlx5_core: Use a macro in mlx5_command_str()

2016-05-13 Thread Amir Vadai
From: Amir Vadai 

Use a macro instead of copying the OP name.

Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 303 +++---
 1 file changed, 132 insertions(+), 171 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index eb926e1..63cac84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -406,178 +406,139 @@ static int mlx5_internal_err_ret_value(struct 
mlx5_core_dev *dev, u16 op,
 
 const char *mlx5_command_str(int command)
 {
-   switch (command) {
-   case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
-   return "QUERY_HCA_VPORT_CONTEXT";
-
-   case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
-   return "MODIFY_HCA_VPORT_CONTEXT";
-
-   case MLX5_CMD_OP_QUERY_HCA_CAP:
-   return "QUERY_HCA_CAP";
-
-   case MLX5_CMD_OP_SET_HCA_CAP:
-   return "SET_HCA_CAP";
-
-   case MLX5_CMD_OP_QUERY_ADAPTER:
-   return "QUERY_ADAPTER";
-
-   case MLX5_CMD_OP_INIT_HCA:
-   return "INIT_HCA";
-
-   case MLX5_CMD_OP_TEARDOWN_HCA:
-   return "TEARDOWN_HCA";
-
-   case MLX5_CMD_OP_ENABLE_HCA:
-   return "MLX5_CMD_OP_ENABLE_HCA";
-
-   case MLX5_CMD_OP_DISABLE_HCA:
-   return "MLX5_CMD_OP_DISABLE_HCA";
-
-   case MLX5_CMD_OP_QUERY_PAGES:
-   return "QUERY_PAGES";
-
-   case MLX5_CMD_OP_MANAGE_PAGES:
-   return "MANAGE_PAGES";
-
-   case MLX5_CMD_OP_CREATE_MKEY:
-   return "CREATE_MKEY";
-
-   case MLX5_CMD_OP_QUERY_MKEY:
-   return "QUERY_MKEY";
-
-   case MLX5_CMD_OP_DESTROY_MKEY:
-   return "DESTROY_MKEY";
-
-   case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
-   return "QUERY_SPECIAL_CONTEXTS";
-
-   case MLX5_CMD_OP_CREATE_EQ:
-   return "CREATE_EQ";
-
-   case MLX5_CMD_OP_DESTROY_EQ:
-   return "DESTROY_EQ";
-
-   case MLX5_CMD_OP_QUERY_EQ:
-   return "QUERY_EQ";
-
-   case MLX5_CMD_OP_CREATE_CQ:
-   return "CREATE_CQ";
-
-   case MLX5_CMD_OP_DESTROY_CQ:
-   return "DESTROY_CQ";
-
-   case MLX5_CMD_OP_QUERY_CQ:
-   return "QUERY_CQ";
-
-   case MLX5_CMD_OP_MODIFY_CQ:
-   return "MODIFY_CQ";
-
-   case MLX5_CMD_OP_CREATE_QP:
-   return "CREATE_QP";
-
-   case MLX5_CMD_OP_DESTROY_QP:
-   return "DESTROY_QP";
-
-   case MLX5_CMD_OP_RST2INIT_QP:
-   return "RST2INIT_QP";
-
-   case MLX5_CMD_OP_INIT2RTR_QP:
-   return "INIT2RTR_QP";
-
-   case MLX5_CMD_OP_RTR2RTS_QP:
-   return "RTR2RTS_QP";
-
-   case MLX5_CMD_OP_RTS2RTS_QP:
-   return "RTS2RTS_QP";
-
-   case MLX5_CMD_OP_SQERR2RTS_QP:
-   return "SQERR2RTS_QP";
-
-   case MLX5_CMD_OP_2ERR_QP:
-   return "2ERR_QP";
-
-   case MLX5_CMD_OP_2RST_QP:
-   return "2RST_QP";
-
-   case MLX5_CMD_OP_QUERY_QP:
-   return "QUERY_QP";
-
-   case MLX5_CMD_OP_MAD_IFC:
-   return "MAD_IFC";
-
-   case MLX5_CMD_OP_INIT2INIT_QP:
-   return "INIT2INIT_QP";
-
-   case MLX5_CMD_OP_CREATE_PSV:
-   return "CREATE_PSV";
-
-   case MLX5_CMD_OP_DESTROY_PSV:
-   return "DESTROY_PSV";
-
-   case MLX5_CMD_OP_CREATE_SRQ:
-   return "CREATE_SRQ";
-
-   case MLX5_CMD_OP_DESTROY_SRQ:
-   return "DESTROY_SRQ";
-
-   case MLX5_CMD_OP_QUERY_SRQ:
-   return "QUERY_SRQ";
-
-   case MLX5_CMD_OP_ARM_RQ:
-   return "ARM_RQ";
-
-   case MLX5_CMD_OP_CREATE_XRC_SRQ:
-   return "CREATE_XRC_SRQ";
-
-   case MLX5_CMD_OP_DESTROY_XRC_SRQ:
-   return "DESTROY_XRC_SRQ";
-
-   case MLX5_CMD_OP_QUERY_XRC_SRQ:
-   return "QUERY_XRC_SRQ";
-
-   case MLX5_CMD_OP_ARM_XRC_SRQ:
-   return "ARM_XRC_SRQ";
-
-   case MLX5_CMD_OP_ALLOC_PD:
-   return "ALLOC_PD";
-
-   case MLX5_CMD_OP_DEALLOC_PD:
-   return "DEALLOC_PD";
-
-   case MLX5_CMD_OP_ALLOC_UAR:
-   return "ALLOC_UAR";
-
-   case MLX5_CMD_OP_DEALLOC_UAR:
-   return "DEALLOC_UAR";
-
-   case MLX5_CMD_OP_ATTACH_TO_MCG:
-   return "ATTACH_TO_MCG";

[PATCH net-next 5/8] net/mlx5_core: Firmware commands to support flow counters

2016-05-13 Thread Amir Vadai
From: Amir Vadai 

Getting packet/byte statistics on flows is done through flow counters.
Implement the firmware commands to alloc, free and query flow counters.

Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c|  6 ++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 66 
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h |  5 ++
 include/linux/mlx5/mlx5_ifc.h| 99 +++-
 4 files changed, 173 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 63cac84..dcd2df6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -294,6 +294,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
+   case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
return MLX5_CMD_STAT_OK;
 
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -395,6 +396,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_QUERY_FLOW_GROUP:
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+   case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+   case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
@@ -539,6 +542,9 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY);
MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY);
MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY);
+   MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
+   MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
+   MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
default: return "unknown command opcode";
}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 9797768..ccb63a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -323,3 +323,69 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 
return err;
 }
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id)
+{
+   u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)];
+   u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)];
+   int err;
+
+   memset(in, 0, sizeof(in));
+   memset(out, 0, sizeof(out));
+
+   MLX5_SET(alloc_flow_counter_in, in, opcode,
+MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+
+   err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+sizeof(out));
+   if (err)
+   return err;
+
+   *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+
+   return 0;
+}
+
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id)
+{
+   u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)];
+   u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)];
+
+   memset(in, 0, sizeof(in));
+   memset(out, 0, sizeof(out));
+
+   MLX5_SET(dealloc_flow_counter_in, in, opcode,
+MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+   MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
+
+   return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+ sizeof(out));
+}
+
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
+ u64 *packets, u64 *bytes)
+{
+   u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+   MLX5_ST_SZ_BYTES(traffic_counter)];
+   u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
+   void *stats;
+   int err = 0;
+
+   memset(in, 0, sizeof(in));
+   memset(out, 0, sizeof(out));
+
+   MLX5_SET(query_flow_counter_in, in, opcode,
+MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+   MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+   MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
+
+   err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+   if (err)
+   return err;
+
+   stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
+   *packets = MLX5_GET64(traffic_counter, stats, packets);
+   *bytes = MLX5_GET64(traffic_counter, stats, octets);
+
+   return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index c97b4a0..18c111a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -70,4 +70,9 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft);
+
+int mlx5_cmd_fc_al

[PATCH net-next 7/8] net/mlx5_core: Flow counters infrastructure

2016-05-13 Thread Amir Vadai
From: Amir Vadai 

If a counter has the aging flag set when created, it is added to a list
of counters that will be queried periodically from a workqueue.  query
result and last use timestamp are cached.
add/del counter must be very efficient since thousands of such
operations might be issued in a second.
There is only a single reference to counters without aging, therefore
no need for locks.
But, counters with aging enabled are stored in a list. In order to make
code as lockless as possible, all the list manipulation and access to
hardware is done from a single context - the periodic counters query
thread.

The hardware supports multiple counters per FTE, however currently we
are using one counter for each FTE.

Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |   7 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |   3 +
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 226 +
 include/linux/mlx5/driver.h|  14 ++
 include/linux/mlx5/fs.h|   5 +
 6 files changed, 255 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index b531d4f..9ea7b58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o   \
-   mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o
+   mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o 
fs_counters.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 9420def..8b5f0b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1771,6 +1771,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns);
cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns);
+   mlx5_cleanup_fc_stats(dev);
 }
 
 static int init_fdb_root_ns(struct mlx5_core_dev *dev)
@@ -1827,10 +1828,14 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
int err = 0;
 
+   err = mlx5_init_fc_stats(dev);
+   if (err)
+   return err;
+
if (MLX5_CAP_GEN(dev, nic_flow_table)) {
err = init_root_ns(dev);
if (err)
-   return err;
+   goto err;
}
if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
err = init_fdb_root_ns(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 1989048..aa41a73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -169,6 +169,9 @@ struct mlx5_flow_root_namespace {
struct mutexchain_lock;
 };
 
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev);
+
 int mlx5_init_fs(struct mlx5_core_dev *dev);
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
new file mode 100644
index 000..164dc37
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY K

Re: [PATCH] mlx5: avoid unused variable warning

2016-05-18 Thread Amir Vadai"
On Wed, May 18, 2016 at 04:21:07PM +0200, Arnd Bergmann wrote:
> When CONFIG_NET_CLS_ACT is disabled, we get a new warning in the mlx5
> ethernet driver because the tc_for_each_action() loop never references
> the iterator:
> 
> mellanox/mlx5/core/en_tc.c: In function 'mlx5e_stats_flower':
> mellanox/mlx5/core/en_tc.c:431:20: error: unused variable 'a' 
> [-Werror=unused-variable]
>   struct tc_action *a;
> 
> This changes the dummy tc_for_each_action() macro by adding a
> cast to void, letting the compiler know that the variable is
> intentionally declared but not used here. I could not come up
> with a nicer workaround, but this seems to do the trick.
> 
> Signed-off-by: Arnd Bergmann 
> Fixes: aad7e08d39bd ("net/mlx5e: Hardware offloaded flower filter statistics 
> support")
> Fixes: 00175aec941e ("net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef")
> ---
Acked-By: Amir Vadai 

Thanks Arnd.


Re: [net-next PATCH 7/7] net: ixgbe: add support for tc_u32 offload

2016-02-03 Thread Amir Vadai"
On Wed, Feb 03, 2016 at 01:29:59AM -0800, John Fastabend wrote:
> This adds initial support for offloading the u32 tc classifier. This
> initial implementation only implements a few base matches and actions
> to illustrate the use of the infrastructure patches.
> 
> However it is an interesting subset because it handles the u32 next
> hdr logic to correctly map tcp packets from ip headers using the ihl
> and protocol fields. After this is accepted we can extend the match
> and action fields easily by updating the model header file.
> 
> Also only the drop action is supported initially.
> 
> Here is a short test script,
> 
>  #tc qdisc add dev eth4 ingress
>  #tc filter add dev eth4 parent : protocol ip \
>   u32 ht 800: order 1 \
>   match ip dst 15.0.0.1/32 match ip src 15.0.0.2/32 action drop
> 
> <-- hardware has dst/src ip match rule installed -->
> 
>  #tc filter del dev eth4 parent : prio 49152
>  #tc filter add dev eth4 parent : protocol ip prio 99 \
>   handle 1: u32 divisor 1
>  #tc filter add dev eth4 protocol ip parent : prio 99 \
>   u32 ht 800: order 1 link 1: \
>   offset at 0 mask 0f00 shift 6 plus 0 eat match ip protocol 6 ff
>  #tc filter add dev eth4 parent : protocol ip \
>   u32 ht 1: order 3 match tcp src 23  action drop
> 
> <-- hardware has tcp src port rule installed -->
> 
>  #tc qdisc del dev eth4 parent :
> 
> <-- hardware cleaned up -->
> 
> Signed-off-by: John Fastabend 
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe.h |3 
>  drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |6 -
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c|  196 
> ++
>  3 files changed, 198 insertions(+), 7 deletions(-)
> 

What are you doing w.r.t priorities? Are the filters processed by the
order of the priorities?

[...]

>  
> -static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
> -struct ixgbe_fdir_filter *input,
> -u16 sw_idx)
> +int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
> + struct ixgbe_fdir_filter *input,
> + u16 sw_idx)
>  {
>   struct ixgbe_hw *hw = &adapter->hw;
>   struct hlist_node *node2;
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
> b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index 03e236c..a1a91bf 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -51,6 +51,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #ifdef CONFIG_OF
>  #include 
> @@ -8200,10 +8201,197 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
>   return 0;
>  }
>  
> +#include 
> +#include "ixgbe_model.h"
Did you leave those #include's in the middle of the file on purpose?

[...]



Re: [net-next PATCH 0/7] tc offload for cls_u32 on ixgbe

2016-02-03 Thread Amir Vadai"
On Wed, Feb 03, 2016 at 01:27:32AM -0800, John Fastabend wrote:
> This extends the setup_tc framework so it can support more than just
> the mqprio offload and push other classifiers and qdiscs into the
> hardware. The series here targets the u32 classifier and ixgbe
> driver. I worked out the u32 classifier because it is protocol
> oblivious and aligns with multiple hardware devices I have access
> to. I did an initial implementation on ixgbe because (a) I have one
> in my box (b) its a stable driver and (c) it is relatively simple
> compared to the other devices I have here but still has enough
> flexibility to exercise the features of cls_u32.
> 
> I intentionally limited the scope of this series to the basic
> feature set. Specifically this uses a 'big hammer' feature bit
> to do the offload or not. If the bit is set you get offloaded rules
> if it is not then rules will not be offloaded. If we can agree on
> this patch series there are some more patches on my queue we can
> talk about to make the offload decision per rule using flags similar
> to how we do l2 mac updates. Additionally the error strategy can
> be improved to be hard aborting, log and continue, etc. I think
> these are nice to have improvements but shouldn't block this series.
> 
> Also by adding get_parse_graph and set_parse_graph attributes as
> in my previous flow_api work we can build programmable devices
> and programmatically learn when rules can or can not be loaded
> into the hardware. Again future work.
> 
> Any comments/feedback appreciated.
> 
> Thanks,
> John
> 
> ---
> 
> John Fastabend (7):
>   net: rework ndo tc op to consume additional qdisc handle parameter
>   net: rework setup_tc ndo op to consume general tc operand
>   net: sched: add cls_u32 offload hooks for netdevs
>   net: add tc offload feature flag
>   net: tc: helper functions to query action types
>   net: ixgbe: add minimal parser details for ixgbe
>   net: ixgbe: add support for tc_u32 offload
> 

Hi John,

Nice work :)

I will add mlx5 support, and see if can live with u32. If not - will
add flower support too.

Amir


Re: [net-next PATCH 7/7] net: ixgbe: add support for tc_u32 offload

2016-02-03 Thread Amir Vadai"
On Wed, Feb 03, 2016 at 01:29:59AM -0800, John Fastabend wrote:
> This adds initial support for offloading the u32 tc classifier. This
> initial implementation only implements a few base matches and actions
> to illustrate the use of the infrastructure patches.
> 
> However it is an interesting subset because it handles the u32 next
> hdr logic to correctly map tcp packets from ip headers using the ihl
> and protocol fields. After this is accepted we can extend the match
> and action fields easily by updating the model header file.
> 
> Also only the drop action is supported initially.
> 
> Here is a short test script,
> 
>  #tc qdisc add dev eth4 ingress
>  #tc filter add dev eth4 parent : protocol ip \
>   u32 ht 800: order 1 \
>   match ip dst 15.0.0.1/32 match ip src 15.0.0.2/32 action drop
> 
> <-- hardware has dst/src ip match rule installed -->
> 
>  #tc filter del dev eth4 parent : prio 49152
>  #tc filter add dev eth4 parent : protocol ip prio 99 \
>   handle 1: u32 divisor 1
>  #tc filter add dev eth4 protocol ip parent : prio 99 \
>   u32 ht 800: order 1 link 1: \
>   offset at 0 mask 0f00 shift 6 plus 0 eat match ip protocol 6 ff
>  #tc filter add dev eth4 parent : protocol ip \
>   u32 ht 1: order 3 match tcp src 23  action drop
> 
> <-- hardware has tcp src port rule installed -->
> 
>  #tc qdisc del dev eth4 parent :
> 
> <-- hardware cleaned up -->
> 
> Signed-off-by: John Fastabend 
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe.h |3 
>  drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |6 -
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c|  196 
> ++
>  3 files changed, 198 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
> b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> index 4b9156c..09c2d9b 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h

[...]

> @@ -8277,6 +8465,7 @@ static int ixgbe_set_features(struct net_device *netdev,
>*/
>   switch (features & NETIF_F_NTUPLE) {
>   case NETIF_F_NTUPLE:
> + case NETIF_F_HW_TC:
>   /* turn off ATR, enable perfect filters and reset */
>   if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
>   need_reset = true;

I think you have a bug here. I don't see how the NETIF_F_HW_TC case will
happen after masking 'features' out.


Re: [net-next PATCH 7/7] net: ixgbe: add support for tc_u32 offload

2016-02-04 Thread Amir Vadai"
On Thu, Feb 04, 2016 at 12:23:02AM -0800, Fastabend, John R wrote:
> On 2/3/2016 11:30 PM, Amir Vadai" wrote:
> > On Wed, Feb 03, 2016 at 01:29:59AM -0800, John Fastabend wrote:
> >> This adds initial support for offloading the u32 tc classifier. This
> >> initial implementation only implements a few base matches and actions
> >> to illustrate the use of the infrastructure patches.
> >>
> >> However it is an interesting subset because it handles the u32 next
> >> hdr logic to correctly map tcp packets from ip headers using the ihl
> >> and protocol fields. After this is accepted we can extend the match
> >> and action fields easily by updating the model header file.
> >>
> >> Also only the drop action is supported initially.
> >>
> >> Here is a short test script,
> >>
> >>  #tc qdisc add dev eth4 ingress
> >>  #tc filter add dev eth4 parent : protocol ip \
> >>u32 ht 800: order 1 \
> >>match ip dst 15.0.0.1/32 match ip src 15.0.0.2/32 action drop
> >>
> >> <-- hardware has dst/src ip match rule installed -->
> >>
> >>  #tc filter del dev eth4 parent : prio 49152
> >>  #tc filter add dev eth4 parent : protocol ip prio 99 \
> >>handle 1: u32 divisor 1
> >>  #tc filter add dev eth4 protocol ip parent : prio 99 \
> >>u32 ht 800: order 1 link 1: \
> >>offset at 0 mask 0f00 shift 6 plus 0 eat match ip protocol 6 ff
> >>  #tc filter add dev eth4 parent : protocol ip \
> >>u32 ht 1: order 3 match tcp src 23  action drop
> >>
> >> <-- hardware has tcp src port rule installed -->
> >>
> >>  #tc qdisc del dev eth4 parent :
> >>
> >> <-- hardware cleaned up -->
> >>
> >> Signed-off-by: John Fastabend 
> >> ---
> >>  drivers/net/ethernet/intel/ixgbe/ixgbe.h |3 
> >>  drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |6 -
> >>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c|  196 
> >> ++
> >>  3 files changed, 198 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
> >> b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> >> index 4b9156c..09c2d9b 100644
> >> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> >> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> > 
> > [...]
> > 
> >> @@ -8277,6 +8465,7 @@ static int ixgbe_set_features(struct net_device 
> >> *netdev,
> >> */
> >>switch (features & NETIF_F_NTUPLE) {
> >>case NETIF_F_NTUPLE:
> >> +  case NETIF_F_HW_TC:
> >>/* turn off ATR, enable perfect filters and reset */
> >>if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
> >>need_reset = true;
> > 
> > I think you have a bug here. I don't see how the NETIF_F_HW_TC case will
> > happen after masking 'features' out.
> > 
> 
> Ah I should have annotated this in the commit msg. I turn the feature
> off by default to enable it the user needs to run
> 
>   # ethtool -K ethx hw-tc-offload on
> 
> this is just a habit of mine to leave new features off by default for
> a bit until I work out some of the kinks. For example I found a case
> today where if you build loops into your u32 graph the hardware tables
> can get out of sync with the software tables. This is sort of extreme
> corner case not sure if anyone would really use u32 but it is valid
> and the hardware should abort correctly.
Yeh - that is nice :) But I was just pointing out on a small typo which I
think you have.
The new case will never happen. You compare: (features & NETIF_F_NTUPLE) == 
NETIF_F_HW_TC
Also the comment before the switch should be modified.

> 
> Thanks,
> John
> 


Re: [net-next PATCH 3/7] net: sched: add cls_u32 offload hooks for netdevs

2016-02-04 Thread Amir Vadai"
On Wed, Feb 03, 2016 at 01:28:37AM -0800, John Fastabend wrote:
> This patch allows netdev drivers to consume cls_u32 offloads via
> the ndo_setup_tc ndo op.
> 
> This works aligns with how network drivers have been doing qdisc
> offloads for mqprio.
> 
> Signed-off-by: John Fastabend 
> ---
>  include/linux/netdevice.h |6 +++-
>  include/net/pkt_cls.h |   33 
>  net/sched/cls_u32.c   |   73 
> -
>  3 files changed, 109 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 9090ff7..861ce67 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -778,17 +778,21 @@ static inline bool netdev_phys_item_id_same(struct 
> netdev_phys_item_id *a,
>  typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
>  struct sk_buff *skb);
>  
> -/* This structure holds attributes of qdisc and classifiers
> +/* These structures hold the attributes of qdisc and classifiers
>   * that are being passed to the netdevice through the setup_tc op.
>   */
>  enum {
>   TC_SETUP_MQPRIO,
> + TC_SETUP_CLSU32,
>  };
>  
> +struct tc_cls_u32_offload;
> +
>  struct tc_to_netdev {
>   unsigned int type;
>   union {
>   u8 tc;
> + struct tc_cls_u32_offload *cls_u32;
>   };
>  };
>  
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
> index bc49967..0bd12cd 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -358,4 +358,37 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
>  }
>  #endif /* CONFIG_NET_CLS_IND */
>  
> +struct tc_cls_u32_knode {
> + struct tcf_exts *exts;
> + u8 fshift;
> + u32 handle;
> + u32 val;
> + u32 mask;
> + u32 link_handle;
> + struct tc_u32_sel *sel;
> +};
> +
> +struct tc_cls_u32_hnode {
> + u32 handle;
> + u32 prio;
> + unsigned int divisor;
> +};
> +
> +enum {
> + TC_CLSU32_NEW_KNODE,
TC_CLSU32_NEW_KNODE is never used

[...]



Re: [net-next PATCH v3 3/8] net: sched: add cls_u32 offload hooks for netdevs

2016-02-18 Thread Amir Vadai"
On Wed, Feb 17, 2016 at 03:07:23PM -0800, John Fastabend wrote:
> [...]
> 
> >>
> >>> +static void u32_replace_hw_hnode(struct tcf_proto *tp, struct
> >>> tc_u_hnode *h)
> >>> +{
> >>> +struct net_device *dev = tp->q->dev_queue->dev;
> >>> +struct tc_cls_u32_offload u32_offload = {0};
> >>> +struct tc_to_netdev offload;
> >>> +
> >>> +offload.type = TC_SETUP_CLSU32;
> >>> +offload.cls_u32 = &u32_offload;
> >>> +
> >>> +if (dev->netdev_ops->ndo_setup_tc) {
> >>> +offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
> >>
> >> TC_CLSU32_REPLACE_HNODE?
> >>
> > 
> > Yep I made this change and will send out v4.
> > 
> > [...]
> > 
> >>
> 
> Actually thinking about this a bit more I wrote this thinking
> that there existed some hardware that actually cared if it was
> a new rule or an existing rule. For me it doesn't matter I do
> the same thing in the new/replace cases I just write into the
> slot on the hardware table and if it happens to have something
> in it well its overwritten e.g. "replaced". This works because
> the cls_u32 layer protects us from doing something unexpected.
> 
> I'm wondering (mostly asking the mlx folks) is there hardware
> out there that cares to make this distinction between new and
> replace? Otherwise I can just drop new and always use replace.
> Or vice versa which is the case in its current form.
I don't see a need for such a distinction in mlx hardware.

Thanks,
Amir.

> 
> Thanks,
> John
> 


Re: [net-next PATCH 3/4] net: sched: cls_u32 add bit to specify software only rules

2016-02-24 Thread Amir Vadai"
On Tue, Feb 23, 2016 at 11:03:21AM -0800, John Fastabend wrote:
> In the initial implementation the only way to stop a rule from being
> inserted into the hardware table was via the device feature flag.
> However this doesn't work well when working on an end host system
> where packets are expect to hit both the hardware and software
> datapaths.
> 
> For example we can imagine a rule that will match an IP address and
> increment a field. If we install this rule in both hardware and
> software we may increment the field twice. To date we have only
> added support for the drop action so we have been able to ignore
> these cases. But as we extend the action support we will hit this
> example plus more such cases. Arguably these are not even corner
> cases in many working systems these cases will be common.
> 
> To avoid forcing the driver to always abort (i.e. the above example)
> this patch adds a flag to add a rule in software only. A careful
> user can use this flag to build software and hardware datapaths
> that work together. One example we have found particularly useful
> is to use hardware resources to set the skb->mark on the skb when
> the match may be expensive to run in software but a mark lookup
> in a hash table is cheap. The idea here is hardware can do in one
> lookup what the u32 classifier may need to traverse multiple lists
> and hash tables to compute. The flag is only passed down on inserts
> on deletion to avoid stale references in hardware we always try
> to remove a rule if it exists.
> 
> Notice we do not add a hardware only case here. If you were to
> add a hardware only case then you are stuck with the problem
> of where to stick the software representation of that filter
> rule. If its stuck on the same filter list as the software only and
> software/hardware rules it then has to be walked over and ignored
> in the classify path. The overhead is not huge but is measurable.
> And with so much work being invested in speeding up rx/tx of
> pkt processing this is unacceptable IMO. The other option is to
> have a special hook just for hardware only resources. This is
> implemented in the next patch.
> 
> Signed-off-by: John Fastabend 

[...]

>  
> -static bool u32_should_offload(struct net_device *dev)
> +static bool u32_should_offload(struct net_device *dev, u32 flags)
>  {
>   if (!(dev->features & NETIF_F_HW_TC))
>   return false;
>  
> - return dev->netdev_ops->ndo_setup_tc;
> + if (flags & TCA_U32_FLAGS_SOFTWARE)
> + return false;
> +
> + if (!dev->netdev_ops->ndo_setup_tc)
> + return false;
> +
> + return true;
>  }
This function and flag should be a generic filter attribute - not just
u32.

Thanks,
Amir

[...]



[PATCH net-next 2/8] net/flow_dissector: Make dissector_uses_key() and skb_flow_dissector_target() public

2016-03-01 Thread Amir Vadai
Will be used in a following patch to query if a key is being used, and
what it's value in the target object.

Signed-off-by: Amir Vadai 
---
 include/net/flow_dissector.h | 13 +
 net/core/flow_dissector.c| 13 -
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8c8548c..d3d60dc 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -184,4 +184,17 @@ static inline bool flow_keys_have_l4(struct flow_keys 
*keys)
 
 u32 flow_hash_from_keys(struct flow_keys *keys);
 
+static inline bool dissector_uses_key(const struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+   return flow_dissector->used_keys & (1 << key_id);
+}
+
+static inline void *skb_flow_dissector_target(struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id,
+ void *target_container)
+{
+   return ((char *)target_container) + flow_dissector->offset[key_id];
+}
+
 #endif
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 7c7b873..a669dea 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,25 +19,12 @@
 #include 
 #include 
 
-static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id)
-{
-   return flow_dissector->used_keys & (1 << key_id);
-}
-
 static void dissector_set_key(struct flow_dissector *flow_dissector,
  enum flow_dissector_key_id key_id)
 {
flow_dissector->used_keys |= (1 << key_id);
 }
 
-static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id,
-  void *target_container)
-{
-   return ((char *) target_container) + flow_dissector->offset[key_id];
-}
-
 void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 const struct flow_dissector_key *key,
 unsigned int key_count)
-- 
2.7.0



[PATCH net-next 1/8] net/flower: Introduce hardware offload support

2016-03-01 Thread Amir Vadai
This patch is based on a patch made by John Fastabend.
It adds support for offloading cls_flower.
A filter that is offloaded successfuly by hardware, will not be added to
the hashtable and won't be processed by software.

Suggested-by: John Fastabend 
Signed-off-by: Amir Vadai 
---
 include/linux/netdevice.h|  2 ++
 include/net/pkt_cls.h| 14 +
 include/uapi/linux/pkt_cls.h |  2 ++
 net/sched/cls_flower.c   | 75 +---
 4 files changed, 88 insertions(+), 5 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e52077f..0fd329a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -785,6 +785,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device 
*dev,
 enum {
TC_SETUP_MQPRIO,
TC_SETUP_CLSU32,
+   TC_SETUP_CLSFLOWER,
 };
 
 struct tc_cls_u32_offload;
@@ -794,6 +795,7 @@ struct tc_to_netdev {
union {
u8 tc;
struct tc_cls_u32_offload *cls_u32;
+   struct tc_cls_flower_offload *cls_flower;
};
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bea14ee..beb2ee1 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -409,4 +409,18 @@ static inline bool tc_should_offload(struct net_device 
*dev, u32 flags)
return true;
 }
 
+enum {
+   TC_CLSFLOWER_REPLACE,
+   TC_CLSFLOWER_DESTROY,
+};
+
+struct tc_cls_flower_offload {
+   int command;
+   u64 cookie;
+   struct flow_dissector *dissector;
+   struct fl_flow_key *mask;
+   struct fl_flow_key *key;
+   struct tcf_exts *exts;
+};
+
 #endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 9874f568..c43c5f7 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -417,6 +417,8 @@ enum {
TCA_FLOWER_KEY_TCP_DST, /* be16 */
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
TCA_FLOWER_KEY_UDP_DST, /* be16 */
+
+   TCA_FLOWER_FLAGS,
__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b0212..e599bea 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -165,6 +165,53 @@ static void fl_destroy_filter(struct rcu_head *head)
kfree(f);
 }
 
+static int fl_hw_destroy_filter(struct tcf_proto *tp, u64 cookie)
+{
+   struct net_device *dev = tp->q->dev_queue->dev;
+   struct tc_cls_flower_offload offload = {0};
+   struct tc_to_netdev tc;
+
+   if (!tc_should_offload(dev, 0))
+   return -ENOTSUPP;
+
+   offload.command = TC_CLSFLOWER_DESTROY;
+   offload.cookie = cookie;
+
+   tc.type = TC_SETUP_CLSFLOWER;
+   tc.cls_flower = &offload;
+
+   return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+&tc);
+}
+
+static int fl_hw_replace_filter(struct tcf_proto *tp,
+   struct flow_dissector *dissector,
+   struct fl_flow_key *mask,
+   struct fl_flow_key *key,
+   struct tcf_exts *actions,
+   u64 cookie, u32 flags)
+{
+   struct net_device *dev = tp->q->dev_queue->dev;
+   struct tc_cls_flower_offload offload = {0};
+   struct tc_to_netdev tc;
+
+   if (!tc_should_offload(dev, flags))
+   return -ENOTSUPP;
+
+   offload.command = TC_CLSFLOWER_REPLACE;
+   offload.cookie = cookie;
+   offload.dissector = dissector;
+   offload.mask = mask;
+   offload.key = key;
+   offload.exts = actions;
+
+   tc.type = TC_SETUP_CLSFLOWER;
+   tc.cls_flower = &offload;
+
+   return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+&tc);
+}
+
 static bool fl_destroy(struct tcf_proto *tp, bool force)
 {
struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -174,6 +221,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
return false;
 
list_for_each_entry_safe(f, next, &head->filters, list) {
+   fl_hw_destroy_filter(tp, (u64)f);
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
@@ -459,6 +507,7 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
struct cls_fl_filter *fnew;
struct nlattr *tb[TCA_FLOWER_MAX + 1];
struct fl_flow_mask mask = {};
+   u32 flags = 0;
int err;
 
if (!tca[TCA_OPTIONS])
@@ -494,13 +543,28 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
if (err)
goto errout;
 
-   err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
-head-&

[PATCH net-next 0/8] cls_flower hardware offload support

2016-03-01 Thread Amir Vadai
Hi,

This patchset introduces cls_flower hardware offload support over ConnectX-4
driver, more hardware vendors are welcome to use it too.

This patchset is based on John's infrastructure for tc offloading [2] to add
hardware offload support to the flower filter. It also extends the support to
an additional tc action - skbedit mark operation.
NIC driver that was used is ConnectX-4. Feature is off by default and could be
turned on using ethtool.

Some commands to use this code:

export TC=../iproute2/tc/tc
export ETH=ens9

ethtool  -K ens9 hw-tc-offload on

# add an ingress qdisc
$TC qdisc add dev $ETH ingress

# Drop ICMP (ip_proto 1) packets
$TC filter add dev $ETH protocol ip prio 20 parent : \
flower ip_proto 1 \
dst_mac 7c:fe:90:69:81:62 \
src_mac 7c:fe:90:69:81:56 \
dst_ip 11.11.11.11 \
src_ip 11.11.11.12 \
indev $ETH \
action drop

# Mark (with 0x1234) TCP (ip_proto 6) packets
$TC filter add dev $ETH protocol ip prio 30 parent : \
flower ip_proto 6 \
indev $ETH \
action skbedit mark 0x1234

# A NOP software filter used to count marked packets using "tc show -s"
$TC filter add dev $ETH protocol ip prio 10 parent : \
handle 0x1234 fw action pass

The code was tested and applied on top of commit f12d33f
("3c59x: Ensure to apply the expires time") + John's pending patches [3]

Main changes from the RFC [1]:
- API
  - Using ndo_setup_tc() instead of switchdev
- act_skbedit, act_gact
  - Actions are not serialized to NIC driver, instead using access functions.
- cls_flower
  - prevent double classification by software by not adding
successfuly offloaded filters to the hashtable
  - Fixed some bugs in original RFC with rule delete  
- mlx5
  - Adding flow table to kernel namespace instead of a new namespace
  - s/offload/tc/ in many places
  - no need for a special kconfig since switchdev is not used

Thanks,
Amir

[1] - http://permalink.gmane.org/gmane.linux.network/397064
[2] - http://permalink.gmane.org/gmane.linux.network/397045 
[3] - http://permalink.gmane.org/gmane.linux.network/401226

Amir Vadai (8):
  net/flower: Introduce hardware offload support
  net/flow_dissector: Make dissector_uses_key() and
skb_flow_dissector_target() public
  net/act_skbedit: Utility functions for mark action
  net/mlx5_core: Set flow steering dest only for forward rules
  net/mlx5e: Add a new priority for kernel flow tables
  net/mlx5e: Introduce tc offload support
  net/mlx5e: Support offload cls_flower with drop action
  net/mlx5e: Support offload cls_flower with sskbedit mark action

 drivers/net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   9 +
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  40 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 434 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  51 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  29 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c |  22 +-
 include/linux/netdevice.h |   2 +
 include/net/flow_dissector.h  |  13 +
 include/net/pkt_cls.h |  14 +
 include/net/tc_act/tc_skbedit.h   |  15 +
 include/uapi/linux/pkt_cls.h  |   2 +
 net/core/flow_dissector.c |  13 -
 net/sched/cls_flower.c|  75 +++-
 16 files changed, 686 insertions(+), 42 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

-- 
2.7.0



[PATCH net-next 6/8] net/mlx5e: Introduce tc offload support

2016-03-01 Thread Amir Vadai
Extend ndo_setup_tc() to support ingress tc offloading. Will be used by
later patches to offload tc flower filter.

Feature is off by default and could be enabled by issuing:
 # ethtool  -K eth0 hw-tc-offload on

Offloads flow table is dynamically created when first filter is
added.
Rules are saved in a hash table that is maintained by the consumer (for
example - the flower offload in the next patch).
When last filter is removed and no filters exist in the hash table, the
offload flow table is destroyed.

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   9 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  31 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 131 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  44 
 5 files changed, 216 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 11b592d..4fc45ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,6 +6,6 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
-   en_txrx.o en_clock.o vxlan.o
+   en_txrx.o en_clock.o vxlan.o en_tc.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1dca3dc..6571a25 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "wq.h"
 #include "mlx5_core.h"
 
@@ -524,8 +525,16 @@ struct mlx5e_flow_table {
struct mlx5_flow_group  **g;
 };
 
+struct mlx5e_tc_flow_table {
+   struct mlx5_flow_table  *t;
+
+   struct rhashtable_paramsht_params;
+   struct rhashtable   ht;
+};
+
 struct mlx5e_flow_tables {
struct mlx5_flow_namespace  *ns;
+   struct mlx5e_tc_flow_table  tc;
struct mlx5e_flow_table vlan;
struct mlx5e_flow_table main;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0d45f35..cb02b4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -30,9 +30,12 @@
  * SOFTWARE.
  */
 
+#include 
+#include 
 #include 
 #include 
 #include "en.h"
+#include "en_tc.h"
 #include "eswitch.h"
 #include "vxlan.h"
 
@@ -1880,6 +1883,17 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 
tc)
 static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
  __be16 proto, struct tc_to_netdev *tc)
 {
+   struct mlx5e_priv *priv = netdev_priv(dev);
+
+   if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+   goto mqprio;
+
+   switch (tc->type) {
+   default:
+   return -EINVAL;
+   }
+
+mqprio:
if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
 
@@ -1963,6 +1977,13 @@ static int mlx5e_set_features(struct net_device *netdev,
mlx5e_disable_vlan_filter(priv);
}
 
+   if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
+   mlx5e_tc_num_filters(priv)) {
+   netdev_err(netdev,
+  "Active offloaded tc filters, can't turn 
hw_tc_offload off\n");
+   return -EINVAL;
+   }
+
return err;
 }
 
@@ -2361,10 +2382,18 @@ static void mlx5e_build_netdev(struct net_device 
*netdev)
if (!priv->params.lro_en)
netdev->features  &= ~NETIF_F_LRO;
 
+#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+   if (FT_CAP(flow_modify_en) &&
+   FT_CAP(modify_root) &&
+   FT_CAP(identified_miss_table_mode) &&
+   FT_CAP(flow_table_modify))
+   priv->netdev->hw_features  |= NETIF_F_HW_TC;
+
netdev->features |= NETIF_F_HIGHDMA;
 
netdev->priv_flags   |= IFF_UNICAST_FLT;
 
+   mlx5e_tc_init(priv);
mlx5e_set_netdev_dev_addr(netdev);
 }
 
@@ -2531,6 +2560,7 @@ err_unmap_free_uar:
mlx5_unmap_free_uar(mdev, &priv->cq_uar);
 
 err_free_netdev:
+   mlx5e_tc_cleanup(priv);
free_netdev(netdev);
 
return NULL;
@@ 

[PATCH net-next 8/8] net/mlx5e: Support offload cls_flower with sskbedit mark action

2016-03-01 Thread Amir Vadai
Introduce offloading of skbedit mark action.

For example, to mark with 0x1234, all TCP (ip_proto 6) packets arriving
to interface ens9:

 # tc qdisc add dev ens9 ingress
 # tc filter add dev ens9 protocol ip parent : \
 flower ip_proto 6 \
 indev ens9 \
 action skbedit mark 0x1234

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |  3 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 16 
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h |  2 ++
 3 files changed, 21 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 519a07f..f293afe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include "en.h"
+#include "en_tc.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 {
@@ -224,6 +225,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 
*cqe,
if (cqe_has_vlan(cqe))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
   be16_to_cpu(cqe->vlan_info));
+
+   skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
 }
 
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 8fee983..22ab439 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -287,6 +288,21 @@ static int parse_tc_actions(struct mlx5e_priv *priv, 
struct tcf_exts *exts,
continue;
}
 
+   if (is_tcf_skbedit_mark(a)) {
+   u32 mark = tcf_skbedit_mark(a);
+
+   if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
+   netdev_warn(priv->netdev,
+   "Bad flow mark - only 16 bit is 
supported: 0x%x\n",
+   mark);
+   return -EINVAL;
+   }
+
+   *flow_tag = mark;
+   *action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+   continue;
+   }
+
return -EINVAL;
}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index f1e7180..155e9bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -33,6 +33,8 @@
 #ifndef __MLX5_EN_TC_H__
 #define __MLX5_EN_TC_H__
 
+#define MLX5E_TC_FLOW_ID_MASK 0x
+
 void mlx5e_tc_init(struct mlx5e_priv *priv);
 void mlx5e_tc_cleanup(struct mlx5e_priv *priv);
 
-- 
2.7.0



[PATCH net-next 5/8] net/mlx5e: Add a new priority for kernel flow tables

2016-03-01 Thread Amir Vadai
Move the vlan and main flow tables to use priority 1. This will allow
the upcoming TC offload logic to use a higher priority (0) for the
offload steering table.

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 80d81ab..d00a242 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -1041,7 +1041,7 @@ static int mlx5e_create_main_flow_table(struct mlx5e_priv 
*priv)
int err;
 
ft->num_groups = 0;
-   ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE);
+   ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE);
 
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
@@ -1150,7 +1150,7 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv 
*priv)
int err;
 
ft->num_groups = 0;
-   ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE);
+   ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE);
 
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f0e67d2..e848d70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -73,8 +73,8 @@
 #define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
   LEFTOVERS_MAX_FT)
 
-#define KERNEL_MAX_FT 2
-#define KERNEL_NUM_PRIOS 1
+#define KERNEL_MAX_FT 3
+#define KERNEL_NUM_PRIOS 2
 #define KENREL_MIN_LEVEL 2
 
 struct node_caps {
-- 
2.7.0



[PATCH net-next 7/8] net/mlx5e: Support offload cls_flower with drop action

2016-03-01 Thread Amir Vadai
Parse tc_cls_flower_offload into device specific commands and program
the hardware to classify and act accordingly.

For example, to drop ICMP (ip_proto 1) packets from specific smac, dmac,
src_ip, src_ip, arriving to interface ens9:

 # tc qdisc add dev ens9 ingress

 # tc filter add dev ens9 protocol ip parent : \
 flower ip_proto 1 \
 dst_mac 7c:fe:90:69:81:62 src_mac 7c:fe:90:69:81:56 \
 dst_ip 11.11.11.11 src_ip 11.11.11.12 indev ens9 \
 action drop

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   9 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 287 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |   5 +
 3 files changed, 301 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cb02b4c..1d1da94 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1889,6 +1889,15 @@ static int mlx5e_ndo_setup_tc(struct net_device *dev, 
u32 handle,
goto mqprio;
 
switch (tc->type) {
+   case TC_SETUP_CLSFLOWER:
+   switch (tc->cls_flower->command) {
+   case TC_CLSFLOWER_REPLACE:
+   return mlx5e_configure_flower(priv, proto, 
tc->cls_flower);
+   case TC_CLSFLOWER_DESTROY:
+   return mlx5e_delete_flower(priv, tc->cls_flower);
+   default:
+   return -EINVAL;
+   }
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 7d1c0a3..8fee983 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -30,6 +30,9 @@
  * SOFTWARE.
  */
 
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -94,6 +97,290 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
}
 }
 
+static int parse_cls_flower(struct mlx5e_priv *priv,
+   u32 *match_c, u32 *match_v,
+   struct tc_cls_flower_offload *f)
+{
+   void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers);
+   void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers);
+   u16 addr_type = 0;
+   u8 ip_proto = 0;
+
+   if (f->dissector->used_keys &
+   ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS))) {
+   netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
+   f->dissector->used_keys);
+   return -ENOTSUPP;
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+   struct flow_dissector_key_control *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+   addr_type = key->addr_type;
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+   struct flow_dissector_key_basic *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+   struct flow_dissector_key_basic *mask =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+   ip_proto = key->ip_proto;
+
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ntohs(mask->n_proto));
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ntohs(key->n_proto));
+
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+mask->ip_proto);
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+key->ip_proto);
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+   struct flow_dissector_key_eth_addrs *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+   struct flow

[PATCH net-next 4/8] net/mlx5_core: Set flow steering dest only for forward rules

2016-03-01 Thread Amir Vadai
We need to handle flow table entry destinations only if the action
associated with the rule is forwarding (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST).

Fixes: 26a8145390b3 ('net/mlx5_core: Introduce flow steering firmware commands')
Signed-off-by: Amir Vadai 
Signed-off-by: Maor Gottlieb 
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 29 +--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 18 +-
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index a9894d2..f46f1db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -218,19 +218,22 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
  match_value);
memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
 
-   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
-   list_for_each_entry(dst, &fte->node.children, node.list) {
-   unsigned int id;
-
-   MLX5_SET(dest_format_struct, in_dests, destination_type,
-dst->dest_attr.type);
-   if (dst->dest_attr.type ==
-   MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
-   id = dst->dest_attr.ft->id;
-   else
-   id = dst->dest_attr.tir_num;
-   MLX5_SET(dest_format_struct, in_dests, destination_id, id);
-   in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, 
destination);
+   list_for_each_entry(dst, &fte->node.children, node.list) {
+   unsigned int id;
+
+   MLX5_SET(dest_format_struct, in_dests, destination_type,
+dst->dest_attr.type);
+   if (dst->dest_attr.type ==
+   MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+   id = dst->dest_attr.ft->id;
+   } else {
+   id = dst->dest_attr.tir_num;
+   }
+   MLX5_SET(dest_format_struct, in_dests, destination_id, 
id);
+   in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   }
}
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6f68dba..f0e67d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -360,8 +360,8 @@ static void del_rule(struct fs_node *node)
memcpy(match_value, fte->val, sizeof(fte->val));
fs_get_obj(ft, fg->node.parent);
list_del(&rule->node.list);
-   fte->dests_size--;
-   if (fte->dests_size) {
+   if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+   --fte->dests_size) {
err = mlx5_cmd_update_fte(dev, ft,
  fg->id, fte);
if (err)
@@ -763,7 +763,8 @@ static struct mlx5_flow_rule *alloc_rule(struct 
mlx5_flow_destination *dest)
return NULL;
 
rule->node.type = FS_TYPE_FLOW_DEST;
-   memcpy(&rule->dest_attr, dest, sizeof(*dest));
+   if (dest)
+   memcpy(&rule->dest_attr, dest, sizeof(*dest));
 
return rule;
 }
@@ -785,8 +786,9 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte 
*fte,
/* Add dest to dests list- added as first element after the head */
tree_init_node(&rule->node, 1, del_rule);
list_add_tail(&rule->node.list, &fte->node.children);
-   fte->dests_size++;
-   if (fte->dests_size == 1)
+   if (dest)
+   fte->dests_size++;
+   if (fte->dests_size == 1 || !dest)
err = mlx5_cmd_create_fte(get_dev(&ft->node),
  ft, fg->id, fte);
else
@@ -802,7 +804,8 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte 
*fte,
 free_rule:
list_del(&rule->node.list);
kfree(rule);
-   fte->dests_size--;
+   if (dest)
+   fte->dests_size--;
return ERR_PTR(err);
 }
 
@@ -996,6 +999,9 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
struct mlx5_flow_group *g;
struct mlx5_flow_rule *rule;
 
+   if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest)
+   return ERR_PTR(-EINVAL);
+
nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
fs_for_each_fg(g, ft)
if (compare_match_criteria(g->mask.match_criteria_enable,
-- 
2.7.0



[PATCH net-next 3/8] net/act_skbedit: Utility functions for mark action

2016-03-01 Thread Amir Vadai
Enable device drivers to query the action if is a mark action and what
value to use for marking.

Signed-off-by: Amir Vadai 
---
 include/net/tc_act/tc_skbedit.h | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 0df9a0d..ad27d69 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -20,6 +20,7 @@
 #define __NET_TC_SKBEDIT_H
 
 #include 
+#include 
 
 struct tcf_skbedit {
struct tcf_common   common;
@@ -32,4 +33,18 @@ struct tcf_skbedit {
 #define to_skbedit(a) \
container_of(a->priv, struct tcf_skbedit, common)
 
+#ifdef CONFIG_NET_CLS_ACT
+static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
+{
+   if (!a->ops || a->ops->type != TCA_ACT_SKBEDIT)
+   return false;
+
+   return to_skbedit(a)->flags == SKBEDIT_F_MARK;
+}
+
+static inline u32 tcf_skbedit_mark(const struct tc_action *a)
+{
+   return to_skbedit(a)->mark;
+}
+#endif
 #endif /* __NET_TC_SKBEDIT_H */
-- 
2.7.0



Re: [PATCH net-next 1/8] net/flower: Introduce hardware offload support

2016-03-01 Thread Amir Vadai
On Tue, Mar 01, 2016 at 03:47:19PM +0100, Jiri Pirko wrote:
> Tue, Mar 01, 2016 at 03:24:43PM CET, a...@vadai.me wrote:
> >This patch is based on a patch made by John Fastabend.
> >It adds support for offloading cls_flower.
> >A filter that is offloaded successfuly by hardware, will not be added to
> >the hashtable and won't be processed by software.
> 
> That is wrong. User should explitly specify to not include rule into sw
> by SKIP_KERNEL flag (does not exist now, with John's recent patch we'll
> have only SKIP_HW). Please add that in this patchset.
Why? If a rule is offloaded, why would the user want to reprocess it by
software?
If the user use SKIP_HW, it will be processed by SW. Else, the user
would want it to be processed by HW or fallback to SW. I don't
understand in which case the user would like to have it done twice.

> 

[..]


Re: [PATCH net-next 1/8] net/flower: Introduce hardware offload support

2016-03-01 Thread Amir Vadai
On Tue, Mar 01, 2016 at 03:53:44PM +0100, Jiri Pirko wrote:
> Tue, Mar 01, 2016 at 03:24:43PM CET, a...@vadai.me wrote:
> >This patch is based on a patch made by John Fastabend.
> >It adds support for offloading cls_flower.
> >A filter that is offloaded successfuly by hardware, will not be added to
> >the hashtable and won't be processed by software.
> >
> 
> 
> 
> >+enum {
> >+TC_CLSFLOWER_REPLACE,
> >+TC_CLSFLOWER_DESTROY,
> >+};
> 
> Name this enum
Right,
Thanks

> 
> >+
> >+struct tc_cls_flower_offload {
> >+int command;
> 
> ^^^ and use it here
> 
> >+u64 cookie;
> >+struct flow_dissector *dissector;
> >+struct fl_flow_key *mask;
> >+struct fl_flow_key *key;
> >+struct tcf_exts *exts;
> >+};


Re: [PATCH net-next 7/8] net/mlx5e: Support offload cls_flower with drop action

2016-03-01 Thread Amir Vadai
On Tue, Mar 01, 2016 at 03:55:48PM +0100, Jiri Pirko wrote:
> Tue, Mar 01, 2016 at 03:24:49PM CET, a...@vadai.me wrote:
> >Parse tc_cls_flower_offload into device specific commands and program
> >the hardware to classify and act accordingly.
> >
> >For example, to drop ICMP (ip_proto 1) packets from specific smac, dmac,
> >src_ip, src_ip, arriving to interface ens9:
> >
> > # tc qdisc add dev ens9 ingress
> >
> > # tc filter add dev ens9 protocol ip parent : \
> > flower ip_proto 1 \
> > dst_mac 7c:fe:90:69:81:62 src_mac 7c:fe:90:69:81:56 \
> > dst_ip 11.11.11.11 src_ip 11.11.11.12 indev ens9 \
> > action drop
> >
> >Signed-off-by: Amir Vadai 
> >Signed-off-by: Or Gerlitz 
> >---
> > drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   9 +
> > drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 287 
> > ++
> > drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |   5 +
> > 3 files changed, 301 insertions(+)
> >
> >diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
> >b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> >index cb02b4c..1d1da94 100644
> >--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> >+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> >@@ -1889,6 +1889,15 @@ static int mlx5e_ndo_setup_tc(struct net_device *dev, 
> >u32 handle,
> > goto mqprio;
> > 
> > switch (tc->type) {
> >+case TC_SETUP_CLSFLOWER:
> >+switch (tc->cls_flower->command) {
> >+case TC_CLSFLOWER_REPLACE:
> >+return mlx5e_configure_flower(priv, proto, 
> >tc->cls_flower);
> >+case TC_CLSFLOWER_DESTROY:
> >+return mlx5e_delete_flower(priv, tc->cls_flower);
> >+default:
> >+return -EINVAL;
> 
> No need to default case here is you change "command" to enum as I
> suggested it the reply to patch 1.
ack


Re: [PATCH net-next 8/8] net/mlx5e: Support offload cls_flower with sskbedit mark action

2016-03-01 Thread Amir Vadai
On Tue, Mar 01, 2016 at 03:58:10PM +0100, Jiri Pirko wrote:
> Tue, Mar 01, 2016 at 03:24:50PM CET, a...@vadai.me wrote:
> >Introduce offloading of skbedit mark action.
> >
> >For example, to mark with 0x1234, all TCP (ip_proto 6) packets arriving
> >to interface ens9:
> >
> > # tc qdisc add dev ens9 ingress
> > # tc filter add dev ens9 protocol ip parent : \
> > flower ip_proto 6 \
> > indev ens9 \
> > action skbedit mark 0x1234
> >
> 
> 
> 
> >@@ -287,6 +288,21 @@ static int parse_tc_actions(struct mlx5e_priv *priv, 
> >struct tcf_exts *exts,
> > continue;
> > }
> > 
> >+if (is_tcf_skbedit_mark(a)) {
> >+u32 mark = tcf_skbedit_mark(a);
> >+
> >+if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
> >+netdev_warn(priv->netdev,
> >+"Bad flow mark - only 16 bit is 
> >supported: 0x%x\n",
> 
> You can start printk string on the first line.
ack


Re: [PATCH net-next 6/8] net/mlx5e: Introduce tc offload support

2016-03-01 Thread Amir Vadai
On Tue, Mar 01, 2016 at 03:52:08PM +0100, Jiri Pirko wrote:
> Tue, Mar 01, 2016 at 03:24:48PM CET, a...@vadai.me wrote:
> >Extend ndo_setup_tc() to support ingress tc offloading. Will be used by
> >later patches to offload tc flower filter.
> >
> >Feature is off by default and could be enabled by issuing:
> > # ethtool  -K eth0 hw-tc-offload on
> >
> >Offloads flow table is dynamically created when first filter is
> >added.
> >Rules are saved in a hash table that is maintained by the consumer (for
> >example - the flower offload in the next patch).
> >When last filter is removed and no filters exist in the hash table, the
> >offload flow table is destroyed.
> 
> 
>   
> >@@ -1880,6 +1883,17 @@ static int mlx5e_setup_tc(struct net_device *netdev, 
> >u8 tc)
> > static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
> >   __be16 proto, struct tc_to_netdev *tc)
> > {
> >+struct mlx5e_priv *priv = netdev_priv(dev);
> >+
> >+if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
> >+goto mqprio;
> >+
> >+switch (tc->type) {
> >+default:
> >+return -EINVAL;
> 
> -EOPNOTSUPP would be better here perhaps?
> 
> 
> >+}
> >+
> >+mqprio:
> > if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
> > return -EINVAL;
> > 
> >@@ -1963,6 +1977,13 @@ static int mlx5e_set_features(struct net_device 
> >*netdev,
> > mlx5e_disable_vlan_filter(priv);
> > }
> > 
> >+if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
> >+mlx5e_tc_num_filters(priv)) {
> >+netdev_err(netdev,
> >+   "Active offloaded tc filters, can't turn 
> >hw_tc_offload off\n");
> >+return -EINVAL;
> 
> This should not fail I believe. Just disable it in hw. I would even toss
> away the rules if necessary.
It depends on the answer regarding your comment on the previous patch.
If we have the rule in both SW and HW, and remove it from the HW it is
ok (although, currently I don't understand why would anyone want in both
places).
If the rule is processed by HW only - turning off this flag, will
disable the offloaded rules - it might be misleading, so I prefered not
to allow it and print a message.


> 


Re: [PATCH net-next 6/8] net/mlx5e: Introduce tc offload support

2016-03-01 Thread Amir Vadai
On Tue, Mar 01, 2016 at 05:59:59PM +0200, Saeed Mahameed wrote:
> On Tue, Mar 1, 2016 at 4:24 PM, Amir Vadai  wrote:
> > +#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, 
> > flow_table_properties_nic_receive.f)
> > +   if (FT_CAP(flow_modify_en) &&
> > +   FT_CAP(modify_root) &&
> > +   FT_CAP(identified_miss_table_mode) &&
> > +   FT_CAP(flow_table_modify))
> > +   priv->netdev->hw_features  |= NETIF_F_HW_TC;
> > +
> > netdev->features |= NETIF_F_HIGHDMA;
> >
> > netdev->priv_flags   |= IFF_UNICAST_FLT;
> >
> > +   mlx5e_tc_init(priv);
> 
> This is not the place for this, We usually do internal data structure
> initialization  after we create all HW resources in
> mlx5e_create_netdev
> Please see mlx5e_vxlan_init as example, and you already call
> mlx5e_tc_cleanup inside mlx5e_destroy_netdev, please move the
> mlx5e_tc_init
> to mlx5e_create_netdev after HW resources creation,
ack

> 
> 
> > @@ -2558,6 +2588,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev 
> > *mdev, void *vpriv)
> > mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
> > mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
> > mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
> > +   mlx5e_tc_cleanup(priv);
> 
> I would suggest to move  mlx5e_tc_init to be right after
> mlx5e_vxlan_init and mlx5e_tc_cleanup before mlx5e_vxlan_cleanup.
ok

> 
> > +struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
> > +u32 *match_c, u32 *match_v,
> > +u32 action, u32 flow_tag)
> > +{
> > +   struct mlx5_flow_destination dest = {
> > +   .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
> > +   {.ft = priv->fts.vlan.t},
> > +   };
> > +   struct mlx5_flow_rule *rule;
> > +   bool table_created = false;
> > +
> > +   if (IS_ERR_OR_NULL(priv->fts.tc.t)) {
> > +   priv->fts.tc.t =
> > +   mlx5_create_auto_grouped_flow_table(priv->fts.ns, 0,
> > +   
> > MLX5E_TC_FLOW_TABLE_NUM_ENTRIES,
> > +   
> > MLX5E_TC_FLOW_TABLE_NUM_GROUPS);
> > +   if (IS_ERR(priv->fts.tc.t)) {
> > +   netdev_err(priv->netdev,
> > +  "Failed to create tc offload table\n");
> > +   return ERR_CAST(priv->fts.tc.t);
> 
> Here priv->fts.tc.t will be invalid pointer and in your code you treat
> it as NULL in case of failure.
> 
> > +   }
> > +
> > +   table_created = true;
> > +   }
> > +
> > +   rule = mlx5_add_flow_rule(priv->fts.tc.t, MLX5_MATCH_OUTER_HEADERS,
> > + match_c, match_v,
> > + action, flow_tag,
> > + action & 
> > MLX5_FLOW_CONTEXT_ACTION_FWD_DEST ? &dest : NULL);
> > +
> > +   if (IS_ERR(rule) && table_created) {
> > +   mlx5_destroy_flow_table(priv->fts.tc.t);
> > +   priv->fts.tc.t = NULL;
> > +   }
> > +
> > +   return rule;
> > +}
> > +
> 
> > +void mlx5e_tc_cleanup(struct mlx5e_priv *priv)
> > +{
> > +   struct mlx5e_tc_flow_table *tc = &priv->fts.tc;
> > +
> > +   rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv);
> > +
> > +   if (priv->fts.tc.t) {
> 
> priv->fts.tc.t will be invalid pointer and this test will pass in case
>  mlx5_create_auto_grouped_flow_table had failed
Yeh - should have used !IS_ERR_OR_NULL() or set it to NULL on failure
above.

Thanks,
Amir


> 
> > +   mlx5_destroy_flow_table(priv->fts.tc.t);
> > +   priv->fts.tc.t = NULL;
> > +   }
> > +}


Re: [PATCH net-next 6/8] net/mlx5e: Introduce tc offload support

2016-03-02 Thread Amir Vadai
On Tue, Mar 01, 2016 at 09:13:25AM -0800, John Fastabend wrote:
> On 16-03-01 09:00 AM, Amir Vadai wrote:
> > On Tue, Mar 01, 2016 at 03:52:08PM +0100, Jiri Pirko wrote:
> >> Tue, Mar 01, 2016 at 03:24:48PM CET, a...@vadai.me wrote:
> >>> Extend ndo_setup_tc() to support ingress tc offloading. Will be used by
> >>> later patches to offload tc flower filter.
> >>>
> >>> Feature is off by default and could be enabled by issuing:
> >>> # ethtool  -K eth0 hw-tc-offload on
> >>>
> >>> Offloads flow table is dynamically created when first filter is
> >>> added.
> >>> Rules are saved in a hash table that is maintained by the consumer (for
> >>> example - the flower offload in the next patch).
> >>> When last filter is removed and no filters exist in the hash table, the
> >>> offload flow table is destroyed.
> >>
> >>  
> >>
> >>> @@ -1880,6 +1883,17 @@ static int mlx5e_setup_tc(struct net_device 
> >>> *netdev, u8 tc)
> >>> static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
> >>> __be16 proto, struct tc_to_netdev *tc)
> >>> {
> >>> + struct mlx5e_priv *priv = netdev_priv(dev);
> >>> +
> >>> + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
> >>> + goto mqprio;
> >>> +
> >>> + switch (tc->type) {
> >>> + default:
> >>> + return -EINVAL;
> >>
> >> -EOPNOTSUPP would be better here perhaps?
> >>
> >>
> >>> + }
> >>> +
> >>> +mqprio:
> >>>   if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
> >>>   return -EINVAL;
> >>>
> >>> @@ -1963,6 +1977,13 @@ static int mlx5e_set_features(struct net_device 
> >>> *netdev,
> >>>   mlx5e_disable_vlan_filter(priv);
> >>>   }
> >>>
> >>> + if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
> >>> + mlx5e_tc_num_filters(priv)) {
> >>> + netdev_err(netdev,
> >>> +"Active offloaded tc filters, can't turn 
> >>> hw_tc_offload off\n");
> >>> + return -EINVAL;
> >>
> >> This should not fail I believe. Just disable it in hw. I would even toss
> >> away the rules if necessary.
> > It depends on the answer regarding your comment on the previous patch.
> > If we have the rule in both SW and HW, and remove it from the HW it is
> > ok (although, currently I don't understand why would anyone want in both
> > places).
> > If the rule is processed by HW only - turning off this flag, will
> > disable the offloaded rules - it might be misleading, so I prefered not
> > to allow it and print a message.
> 
> When we get the HW only mode we will need to also flush the hardware
> representation in software as well as the hardware state.

Yep, I do think that just failing the operation is the best appraoch.
Will make the design simpler, and from the user point of view, less
surprises.

Jiri?


[PATCH net-next V1 01/10] net/flower: Introduce hardware offload support

2016-03-03 Thread Amir Vadai
This patch is based on a patch made by John Fastabend.
It adds support for offloading cls_flower.
when NETIF_F_HW_TC is on:
  flags = 0   => Rule will be processed twice - by hardware, and if
 still relevant, by software.
  flags = SKIP_HW => Rull will be processed by software only

If hardare fail/not capabale to apply the rule, operation will fail.

Suggested-by: John Fastabend 
Signed-off-by: Amir Vadai 
---
 include/linux/netdevice.h|  2 ++
 include/net/pkt_cls.h| 14 +
 include/uapi/linux/pkt_cls.h |  2 ++
 net/sched/cls_flower.c   | 71 +++-
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index efe7cec..12db9d6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -785,6 +785,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device 
*dev,
 enum {
TC_SETUP_MQPRIO,
TC_SETUP_CLSU32,
+   TC_SETUP_CLSFLOWER,
 };
 
 struct tc_cls_u32_offload;
@@ -794,6 +795,7 @@ struct tc_to_netdev {
union {
u8 tc;
struct tc_cls_u32_offload *cls_u32;
+   struct tc_cls_flower_offload *cls_flower;
};
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bea14ee..5b4e8f0 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -409,4 +409,18 @@ static inline bool tc_should_offload(struct net_device 
*dev, u32 flags)
return true;
 }
 
+enum tc_fl_command {
+   TC_CLSFLOWER_REPLACE,
+   TC_CLSFLOWER_DESTROY,
+};
+
+struct tc_cls_flower_offload {
+   enum tc_fl_command command;
+   u64 cookie;
+   struct flow_dissector *dissector;
+   struct fl_flow_key *mask;
+   struct fl_flow_key *key;
+   struct tcf_exts *exts;
+};
+
 #endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 9874f568..c43c5f7 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -417,6 +417,8 @@ enum {
TCA_FLOWER_KEY_TCP_DST, /* be16 */
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
TCA_FLOWER_KEY_UDP_DST, /* be16 */
+
+   TCA_FLOWER_FLAGS,
__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b0212..ed3cd5a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -165,6 +165,52 @@ static void fl_destroy_filter(struct rcu_head *head)
kfree(f);
 }
 
+static void fl_hw_destroy_filter(struct tcf_proto *tp, u64 cookie)
+{
+   struct net_device *dev = tp->q->dev_queue->dev;
+   struct tc_cls_flower_offload offload = {0};
+   struct tc_to_netdev tc;
+
+   if (!tc_should_offload(dev, 0))
+   return;
+
+   offload.command = TC_CLSFLOWER_DESTROY;
+   offload.cookie = cookie;
+
+   tc.type = TC_SETUP_CLSFLOWER;
+   tc.cls_flower = &offload;
+
+   dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
+static int fl_hw_replace_filter(struct tcf_proto *tp,
+   struct flow_dissector *dissector,
+   struct fl_flow_key *mask,
+   struct fl_flow_key *key,
+   struct tcf_exts *actions,
+   u64 cookie, u32 flags)
+{
+   struct net_device *dev = tp->q->dev_queue->dev;
+   struct tc_cls_flower_offload offload = {0};
+   struct tc_to_netdev tc;
+
+   if (!tc_should_offload(dev, flags))
+   return 0;
+
+   offload.command = TC_CLSFLOWER_REPLACE;
+   offload.cookie = cookie;
+   offload.dissector = dissector;
+   offload.mask = mask;
+   offload.key = key;
+   offload.exts = actions;
+
+   tc.type = TC_SETUP_CLSFLOWER;
+   tc.cls_flower = &offload;
+
+   return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+&tc);
+}
+
 static bool fl_destroy(struct tcf_proto *tp, bool force)
 {
struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -174,6 +220,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
return false;
 
list_for_each_entry_safe(f, next, &head->filters, list) {
+   fl_hw_destroy_filter(tp, (u64)f);
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
@@ -454,11 +501,13 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
 u32 handle, struct nlattr **tca,
 unsigned long *arg, bool ovr)
 {
+   struct net_device *dev = tp->q->dev_queue->dev;
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
struct cls_fl_filter *fne

[PATCH net-next V1 00/10] cls_flower hardware offload support

2016-03-03 Thread Amir Vadai
Hi,

Please see changes from V0 at the bottom.

This patchset introduces cls_flower hardware offload support over ConnectX-4
driver, more hardware vendors are welcome to use it too.

This patchset is based on John's infrastructure for tc offloading [2] to add
hardware offload support to the flower filter. It also extends the support to
an additional tc action - skbedit mark operation.
NIC driver that was used is ConnectX-4. Feature is off by default and could be
turned on using ethtool.

Some commands to use this code:

export TC=../iproute2/tc/tc
export ETH=ens9

ethtool  -K ens9 hw-tc-offload on

# add an ingress qdisc
$TC qdisc add dev $ETH ingress

# Drop ICMP (ip_proto 1) packets
$TC filter add dev $ETH protocol ip prio 20 parent : \
flower ip_proto 1 \
dst_mac 7c:fe:90:69:81:62 \
src_mac 7c:fe:90:69:81:56 \
dst_ip 11.11.11.11 \
src_ip 11.11.11.12 \
indev $ETH \
action drop

# Mark (with 0x1234) TCP (ip_proto 6) packets
$TC filter add dev $ETH protocol ip prio 30 parent : \
flower ip_proto 6 \
indev $ETH \
action skbedit mark 0x1234

# A NOP software filter used to count marked packets using "tc show -s"
$TC filter add dev $ETH protocol ip prio 10 parent : \
handle 0x1234 fw action pass

The code was tested and applied on top of commit 3ebeac1 ("Merge branch
'cxgb4-next'")

Changes from V0:
- Use tc_no_actions and tc_for_each_action instead of ifdef CONFIG_NET_CLS_ACT
- Replace ENOTSUPP (and some EINVAL) with EOPNOTSUPP
- Name the flower command enum
- fl_hw_destroy_filter() to return void - nobody uses the return value
- mlx5e_tc_init() and mlx5e_tc_cleanup() to be called from the right places.
- When adding HW rule fails - fail the command
- Rules are added to be processed both by HW and SW unless SKIP_HW is given
- Adding patch 6/10 ("net/mlx5e: Relax ndo_setup_tc handle restriction")

Main changes from the RFC [1]:
- API
  - Using ndo_setup_tc() instead of switchdev
- act_skbedit, act_gact
  - Actions are not serialized to NIC driver, instead using access functions.
- cls_flower
  - prevent double classification by software by not adding
successfuly offloaded filters to the hashtable
  - Fixed some bugs in original RFC with rule delete  
- mlx5
  - Adding flow table to kernel namespace instead of a new namespace
  - s/offload/tc/ in many places
  - no need for a special kconfig since switchdev is not used

Thanks,
Amir

[1] - http://permalink.gmane.org/gmane.linux.network/397064
[2] - http://permalink.gmane.org/gmane.linux.network/397045 
[3] - http://permalink.gmane.org/gmane.linux.network/401226

Amir Vadai (10):
  net/flower: Introduce hardware offload support
  net/flow_dissector: Make dissector_uses_key() and
skb_flow_dissector_target() public
  net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef
  net/act_skbedit: Utility functions for mark action
  net/mlx5_core: Set flow steering dest only for forward rules
  net/mlx5e: Relax ndo_setup_tc handle restriction
  net/mlx5e: Add a new priority for kernel flow tables
  net/mlx5e: Introduce tc offload support
  net/mlx5e: Support offload cls_flower with drop action
  net/mlx5e: Support offload cls_flower with skbedit mark action

 drivers/net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   9 +
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  47 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 429 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  51 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  29 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c |  22 +-
 include/linux/netdevice.h |   2 +
 include/net/act_api.h |  21 +-
 include/net/flow_dissector.h  |  13 +
 include/net/pkt_cls.h |  14 +
 include/net/tc_act/tc_gact.h  |   4 +-
 include/net/tc_act/tc_skbedit.h   |  15 +
 include/uapi/linux/pkt_cls.h  |   2 +
 net/core/flow_dissector.c |  13 -
 net/sched/cls_flower.c|  71 +++-
 18 files changed, 704 insertions(+), 47 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

-- 
2.7.0



[PATCH net-next V1 02/10] net/flow_dissector: Make dissector_uses_key() and skb_flow_dissector_target() public

2016-03-03 Thread Amir Vadai
Will be used in a following patch to query if a key is being used, and
what it's value in the target object.

Signed-off-by: Amir Vadai 
---
 include/net/flow_dissector.h | 13 +
 net/core/flow_dissector.c| 13 -
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8c8548c..d3d60dc 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -184,4 +184,17 @@ static inline bool flow_keys_have_l4(struct flow_keys 
*keys)
 
 u32 flow_hash_from_keys(struct flow_keys *keys);
 
+static inline bool dissector_uses_key(const struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+   return flow_dissector->used_keys & (1 << key_id);
+}
+
+static inline void *skb_flow_dissector_target(struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id,
+ void *target_container)
+{
+   return ((char *)target_container) + flow_dissector->offset[key_id];
+}
+
 #endif
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 7c7b873..a669dea 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,25 +19,12 @@
 #include 
 #include 
 
-static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id)
-{
-   return flow_dissector->used_keys & (1 << key_id);
-}
-
 static void dissector_set_key(struct flow_dissector *flow_dissector,
  enum flow_dissector_key_id key_id)
 {
flow_dissector->used_keys |= (1 << key_id);
 }
 
-static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id,
-  void *target_container)
-{
-   return ((char *) target_container) + flow_dissector->offset[key_id];
-}
-
 void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 const struct flow_dissector_key *key,
 unsigned int key_count)
-- 
2.7.0



[PATCH net-next V1 05/10] net/mlx5_core: Set flow steering dest only for forward rules

2016-03-03 Thread Amir Vadai
We need to handle flow table entry destinations only if the action
associated with the rule is forwarding (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST).

Fixes: 26a8145390b3 ('net/mlx5_core: Introduce flow steering firmware commands')
Signed-off-by: Amir Vadai 
Signed-off-by: Maor Gottlieb 
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 29 +--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 18 +-
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index a9894d2..f46f1db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -218,19 +218,22 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
  match_value);
memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
 
-   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
-   list_for_each_entry(dst, &fte->node.children, node.list) {
-   unsigned int id;
-
-   MLX5_SET(dest_format_struct, in_dests, destination_type,
-dst->dest_attr.type);
-   if (dst->dest_attr.type ==
-   MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
-   id = dst->dest_attr.ft->id;
-   else
-   id = dst->dest_attr.tir_num;
-   MLX5_SET(dest_format_struct, in_dests, destination_id, id);
-   in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, 
destination);
+   list_for_each_entry(dst, &fte->node.children, node.list) {
+   unsigned int id;
+
+   MLX5_SET(dest_format_struct, in_dests, destination_type,
+dst->dest_attr.type);
+   if (dst->dest_attr.type ==
+   MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+   id = dst->dest_attr.ft->id;
+   } else {
+   id = dst->dest_attr.tir_num;
+   }
+   MLX5_SET(dest_format_struct, in_dests, destination_id, 
id);
+   in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   }
}
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6f68dba..f0e67d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -360,8 +360,8 @@ static void del_rule(struct fs_node *node)
memcpy(match_value, fte->val, sizeof(fte->val));
fs_get_obj(ft, fg->node.parent);
list_del(&rule->node.list);
-   fte->dests_size--;
-   if (fte->dests_size) {
+   if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+   --fte->dests_size) {
err = mlx5_cmd_update_fte(dev, ft,
  fg->id, fte);
if (err)
@@ -763,7 +763,8 @@ static struct mlx5_flow_rule *alloc_rule(struct 
mlx5_flow_destination *dest)
return NULL;
 
rule->node.type = FS_TYPE_FLOW_DEST;
-   memcpy(&rule->dest_attr, dest, sizeof(*dest));
+   if (dest)
+   memcpy(&rule->dest_attr, dest, sizeof(*dest));
 
return rule;
 }
@@ -785,8 +786,9 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte 
*fte,
/* Add dest to dests list- added as first element after the head */
tree_init_node(&rule->node, 1, del_rule);
list_add_tail(&rule->node.list, &fte->node.children);
-   fte->dests_size++;
-   if (fte->dests_size == 1)
+   if (dest)
+   fte->dests_size++;
+   if (fte->dests_size == 1 || !dest)
err = mlx5_cmd_create_fte(get_dev(&ft->node),
  ft, fg->id, fte);
else
@@ -802,7 +804,8 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte 
*fte,
 free_rule:
list_del(&rule->node.list);
kfree(rule);
-   fte->dests_size--;
+   if (dest)
+   fte->dests_size--;
return ERR_PTR(err);
 }
 
@@ -996,6 +999,9 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
struct mlx5_flow_group *g;
struct mlx5_flow_rule *rule;
 
+   if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest)
+   return ERR_PTR(-EINVAL);
+
nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
fs_for_each_fg(g, ft)
if (compare_match_criteria(g->mask.match_criteria_enable,
-- 
2.7.0



[PATCH net-next V1 07/10] net/mlx5e: Add a new priority for kernel flow tables

2016-03-03 Thread Amir Vadai
Move the vlan and main flow tables to use priority 1. This will allow
the upcoming TC offload logic to use a higher priority (0) for the
offload steering table.

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 80d81ab..d00a242 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -1041,7 +1041,7 @@ static int mlx5e_create_main_flow_table(struct mlx5e_priv 
*priv)
int err;
 
ft->num_groups = 0;
-   ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE);
+   ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE);
 
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
@@ -1150,7 +1150,7 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv 
*priv)
int err;
 
ft->num_groups = 0;
-   ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE);
+   ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE);
 
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f0e67d2..e848d70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -73,8 +73,8 @@
 #define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
   LEFTOVERS_MAX_FT)
 
-#define KERNEL_MAX_FT 2
-#define KERNEL_NUM_PRIOS 1
+#define KERNEL_MAX_FT 3
+#define KERNEL_NUM_PRIOS 2
 #define KENREL_MIN_LEVEL 2
 
 struct node_caps {
-- 
2.7.0



[PATCH net-next V1 08/10] net/mlx5e: Introduce tc offload support

2016-03-03 Thread Amir Vadai
Extend ndo_setup_tc() to support ingress tc offloading. Will be used by
later patches to offload tc flower filter.

Feature is off by default and could be enabled by issuing:
 # ethtool  -K eth0 hw-tc-offload on

Offloads flow table is dynamically created when first filter is
added.
Rules are saved in a hash table that is maintained by the consumer (for
example - the flower offload in the next patch).
When last filter is removed and no filters exist in the hash table, the
offload flow table is destroyed.

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   9 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  38 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 131 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  44 
 5 files changed, 222 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 11b592d..4fc45ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,6 +6,6 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
-   en_txrx.o en_clock.o vxlan.o
+   en_txrx.o en_clock.o vxlan.o en_tc.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9c0e80e..36f3dba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "wq.h"
 #include "mlx5_core.h"
 
@@ -526,8 +527,16 @@ struct mlx5e_flow_table {
struct mlx5_flow_group  **g;
 };
 
+struct mlx5e_tc_flow_table {
+   struct mlx5_flow_table  *t;
+
+   struct rhashtable_paramsht_params;
+   struct rhashtable   ht;
+};
+
 struct mlx5e_flow_tables {
struct mlx5_flow_namespace  *ns;
+   struct mlx5e_tc_flow_table  tc;
struct mlx5e_flow_table vlan;
struct mlx5e_flow_table main;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5e3692f..011c4f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -30,9 +30,12 @@
  * SOFTWARE.
  */
 
+#include 
+#include 
 #include 
 #include 
 #include "en.h"
+#include "en_tc.h"
 #include "eswitch.h"
 #include "vxlan.h"
 
@@ -1883,6 +1886,17 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 
tc)
 static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
  __be16 proto, struct tc_to_netdev *tc)
 {
+   struct mlx5e_priv *priv = netdev_priv(dev);
+
+   if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+   goto mqprio;
+
+   switch (tc->type) {
+   default:
+   return -EOPNOTSUPP;
+   }
+
+mqprio:
if (tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
 
@@ -1966,6 +1980,13 @@ static int mlx5e_set_features(struct net_device *netdev,
mlx5e_disable_vlan_filter(priv);
}
 
+   if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
+   mlx5e_tc_num_filters(priv)) {
+   netdev_err(netdev,
+  "Active offloaded tc filters, can't turn 
hw_tc_offload off\n");
+   return -EINVAL;
+   }
+
return err;
 }
 
@@ -2365,6 +2386,13 @@ static void mlx5e_build_netdev(struct net_device *netdev)
if (!priv->params.lro_en)
netdev->features  &= ~NETIF_F_LRO;
 
+#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+   if (FT_CAP(flow_modify_en) &&
+   FT_CAP(modify_root) &&
+   FT_CAP(identified_miss_table_mode) &&
+   FT_CAP(flow_table_modify))
+   priv->netdev->hw_features  |= NETIF_F_HW_TC;
+
netdev->features |= NETIF_F_HIGHDMA;
 
netdev->priv_flags   |= IFF_UNICAST_FLT;
@@ -2486,6 +2514,10 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev 
*mdev)
 
mlx5e_vxlan_init(priv);
 
+   err = mlx5e_tc_init(priv);
+   if (err)
+   goto err_destroy_flow_tables;
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_ieee_setets_core(pri

[PATCH net-next V1 09/10] net/mlx5e: Support offload cls_flower with drop action

2016-03-03 Thread Amir Vadai
Parse tc_cls_flower_offload into device specific commands and program
the hardware to classify and act accordingly.

For example, to drop ICMP (ip_proto 1) packets from specific smac, dmac,
src_ip, src_ip, arriving to interface ens9:

 # tc qdisc add dev ens9 ingress

 # tc filter add dev ens9 protocol ip parent : \
 flower ip_proto 1 \
 dst_mac 7c:fe:90:69:81:62 src_mac 7c:fe:90:69:81:56 \
 dst_ip 11.11.11.11 src_ip 11.11.11.12 indev ens9 \
 action drop

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   7 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 297 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |   5 +
 3 files changed, 309 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 011c4f6..9aa9103 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1892,6 +1892,13 @@ static int mlx5e_ndo_setup_tc(struct net_device *dev, 
u32 handle,
goto mqprio;
 
switch (tc->type) {
+   case TC_SETUP_CLSFLOWER:
+   switch (tc->cls_flower->command) {
+   case TC_CLSFLOWER_REPLACE:
+   return mlx5e_configure_flower(priv, proto, 
tc->cls_flower);
+   case TC_CLSFLOWER_DESTROY:
+   return mlx5e_delete_flower(priv, tc->cls_flower);
+   }
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9457173..3aea5da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -30,6 +30,9 @@
  * SOFTWARE.
  */
 
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -94,6 +97,300 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
}
 }
 
+static int parse_cls_flower(struct mlx5e_priv *priv,
+   u32 *match_c, u32 *match_v,
+   struct tc_cls_flower_offload *f)
+{
+   void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers);
+   void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers);
+   u16 addr_type = 0;
+   u8 ip_proto = 0;
+
+   if (f->dissector->used_keys &
+   ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS))) {
+   netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
+   f->dissector->used_keys);
+   return -EOPNOTSUPP;
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+   struct flow_dissector_key_control *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+   addr_type = key->addr_type;
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+   struct flow_dissector_key_basic *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+   struct flow_dissector_key_basic *mask =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+   ip_proto = key->ip_proto;
+
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ntohs(mask->n_proto));
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ntohs(key->n_proto));
+
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+mask->ip_proto);
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+key->ip_proto);
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+   struct flow_dissector_key_eth_addrs *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+   struct flow_dissector_key_eth_addrs *mask =
+   skb_flow_dissector_targ

[PATCH net-next V1 03/10] net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef

2016-03-03 Thread Amir Vadai
Introduce the macros tc_no_actions and tc_for_each_action to make code
clearer.

CC: John Fastabend 
Suggested-by: Jiri Pirko 
Signed-off-by: Amir Vadai 
---
John hi,

I wanted to change ixgbe code to use this, but it is not a trivial change
there, so would prefer you do it.

 include/net/act_api.h| 21 -
 include/net/tc_act/tc_gact.h |  4 ++--
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 342be6c..e30856f 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -78,11 +78,6 @@ static inline void tcf_lastuse_update(struct tcf_t *tm)
tm->lastuse = now;
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-
-#define ACT_P_CREATED 1
-#define ACT_P_DELETED 1
-
 struct tc_action {
void*priv;
const struct tc_action_ops  *ops;
@@ -92,6 +87,11 @@ struct tc_action {
struct tcf_hashinfo *hinfo;
 };
 
+#ifdef CONFIG_NET_CLS_ACT
+
+#define ACT_P_CREATED 1
+#define ACT_P_DELETED 1
+
 struct tc_action_ops {
struct list_head head;
charkind[IFNAMSIZ];
@@ -171,5 +171,16 @@ int tcf_action_dump(struct sk_buff *skb, struct list_head 
*, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
+
+#define tc_no_actions(_exts) \
+   (list_empty(&(_exts)->actions))
+
+#define tc_for_each_action(_a, _exts) \
+   list_for_each_entry(a, &(_exts)->actions, list)
+#else /* CONFIG_NET_CLS_ACT */
+
+#define tc_no_actions(_exts) false
+#define tc_for_each_action(_a, _exts) while (0)
+
 #endif /* CONFIG_NET_CLS_ACT */
 #endif
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 04a3183..93c520b 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -16,9 +16,9 @@ struct tcf_gact {
 #define to_gact(a) \
container_of(a->priv, struct tcf_gact, common)
 
-#ifdef CONFIG_NET_CLS_ACT
 static inline bool is_tcf_gact_shot(const struct tc_action *a)
 {
+#ifdef CONFIG_NET_CLS_ACT
struct tcf_gact *gact;
 
if (a->ops && a->ops->type != TCA_ACT_GACT)
@@ -28,7 +28,7 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a)
if (gact->tcf_action == TC_ACT_SHOT)
return true;
 
+#endif
return false;
 }
-#endif
 #endif /* __NET_TC_GACT_H */
-- 
2.7.0



[PATCH net-next V1 06/10] net/mlx5e: Relax ndo_setup_tc handle restriction

2016-03-03 Thread Amir Vadai
Restricting handle to TC_H_ROOT breaks the old instantiation of mqprio
to setup a hardware qdisc. This patch relaxes the test, to only check the
type.

Fixes: 08fb1da ("net/mlx5e: Support DCBNL IEEE ETS")
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5063c0e..5e3692f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1883,7 +1883,7 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 
tc)
 static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
  __be16 proto, struct tc_to_netdev *tc)
 {
-   if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+   if (tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
 
return mlx5e_setup_tc(dev, tc->tc);
-- 
2.7.0



[PATCH net-next V1 10/10] net/mlx5e: Support offload cls_flower with skbedit mark action

2016-03-03 Thread Amir Vadai
Introduce offloading of skbedit mark action.

For example, to mark with 0x1234, all TCP (ip_proto 6) packets arriving
to interface ens9:

 # tc qdisc add dev ens9 ingress
 # tc filter add dev ens9 protocol ip parent : \
 flower ip_proto 6 \
 indev ens9 \
 action skbedit mark 0x1234

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 519a07f..f293afe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include "en.h"
+#include "en_tc.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 {
@@ -224,6 +225,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 
*cqe,
if (cqe_has_vlan(cqe))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
   be16_to_cpu(cqe->vlan_info));
+
+   skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
 }
 
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3aea5da..544c739 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 70642f4..d677428 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -33,6 +33,8 @@
 #ifndef __MLX5_EN_TC_H__
 #define __MLX5_EN_TC_H__
 
+#define MLX5E_TC_FLOW_ID_MASK 0x
+
 int mlx5e_tc_init(struct mlx5e_priv *priv);
 void mlx5e_tc_cleanup(struct mlx5e_priv *priv);
 
-- 
2.7.0



[PATCH net-next V1 04/10] net/act_skbedit: Utility functions for mark action

2016-03-03 Thread Amir Vadai
Enable device drivers to query the action if is a mark action and what
value to use for marking.

Signed-off-by: Amir Vadai 
---
 include/net/tc_act/tc_skbedit.h | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 0df9a0d..4497460 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -20,6 +20,7 @@
 #define __NET_TC_SKBEDIT_H
 
 #include 
+#include 
 
 struct tcf_skbedit {
struct tcf_common   common;
@@ -32,4 +33,18 @@ struct tcf_skbedit {
 #define to_skbedit(a) \
container_of(a->priv, struct tcf_skbedit, common)
 
+static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+   if (a->ops && a->ops->type == TCA_ACT_SKBEDIT)
+   return to_skbedit(a)->flags == SKBEDIT_F_MARK;
+#endif
+   return false;
+}
+
+static inline u32 tcf_skbedit_mark(const struct tc_action *a)
+{
+   return to_skbedit(a)->mark;
+}
+
 #endif /* __NET_TC_SKBEDIT_H */
-- 
2.7.0



[PATCH net-next V2 01/10] net/flower: Introduce hardware offload support

2016-03-03 Thread Amir Vadai
This patch is based on a patch made by John Fastabend.
It adds support for offloading cls_flower.
when NETIF_F_HW_TC is on:
  flags = 0   => Rule will be processed twice - by hardware, and if
 still relevant, by software.
  flags = SKIP_HW => Rull will be processed by software only

If hardare fail/not capabale to apply the rule, operation will fail.

Suggested-by: John Fastabend 
Signed-off-by: Amir Vadai 
---
 include/linux/netdevice.h|  2 ++
 include/net/pkt_cls.h| 14 +
 include/uapi/linux/pkt_cls.h |  2 ++
 net/sched/cls_flower.c   | 71 +++-
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index efe7cec..12db9d6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -785,6 +785,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device 
*dev,
 enum {
TC_SETUP_MQPRIO,
TC_SETUP_CLSU32,
+   TC_SETUP_CLSFLOWER,
 };
 
 struct tc_cls_u32_offload;
@@ -794,6 +795,7 @@ struct tc_to_netdev {
union {
u8 tc;
struct tc_cls_u32_offload *cls_u32;
+   struct tc_cls_flower_offload *cls_flower;
};
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bea14ee..5b4e8f0 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -409,4 +409,18 @@ static inline bool tc_should_offload(struct net_device 
*dev, u32 flags)
return true;
 }
 
+enum tc_fl_command {
+   TC_CLSFLOWER_REPLACE,
+   TC_CLSFLOWER_DESTROY,
+};
+
+struct tc_cls_flower_offload {
+   enum tc_fl_command command;
+   u64 cookie;
+   struct flow_dissector *dissector;
+   struct fl_flow_key *mask;
+   struct fl_flow_key *key;
+   struct tcf_exts *exts;
+};
+
 #endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 9874f568..c43c5f7 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -417,6 +417,8 @@ enum {
TCA_FLOWER_KEY_TCP_DST, /* be16 */
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
TCA_FLOWER_KEY_UDP_DST, /* be16 */
+
+   TCA_FLOWER_FLAGS,
__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b0212..ed3cd5a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -165,6 +165,52 @@ static void fl_destroy_filter(struct rcu_head *head)
kfree(f);
 }
 
+static void fl_hw_destroy_filter(struct tcf_proto *tp, u64 cookie)
+{
+   struct net_device *dev = tp->q->dev_queue->dev;
+   struct tc_cls_flower_offload offload = {0};
+   struct tc_to_netdev tc;
+
+   if (!tc_should_offload(dev, 0))
+   return;
+
+   offload.command = TC_CLSFLOWER_DESTROY;
+   offload.cookie = cookie;
+
+   tc.type = TC_SETUP_CLSFLOWER;
+   tc.cls_flower = &offload;
+
+   dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
+static int fl_hw_replace_filter(struct tcf_proto *tp,
+   struct flow_dissector *dissector,
+   struct fl_flow_key *mask,
+   struct fl_flow_key *key,
+   struct tcf_exts *actions,
+   u64 cookie, u32 flags)
+{
+   struct net_device *dev = tp->q->dev_queue->dev;
+   struct tc_cls_flower_offload offload = {0};
+   struct tc_to_netdev tc;
+
+   if (!tc_should_offload(dev, flags))
+   return 0;
+
+   offload.command = TC_CLSFLOWER_REPLACE;
+   offload.cookie = cookie;
+   offload.dissector = dissector;
+   offload.mask = mask;
+   offload.key = key;
+   offload.exts = actions;
+
+   tc.type = TC_SETUP_CLSFLOWER;
+   tc.cls_flower = &offload;
+
+   return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+&tc);
+}
+
 static bool fl_destroy(struct tcf_proto *tp, bool force)
 {
struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -174,6 +220,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
return false;
 
list_for_each_entry_safe(f, next, &head->filters, list) {
+   fl_hw_destroy_filter(tp, (u64)f);
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
@@ -454,11 +501,13 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
 u32 handle, struct nlattr **tca,
 unsigned long *arg, bool ovr)
 {
+   struct net_device *dev = tp->q->dev_queue->dev;
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
struct cls_fl_filter *fne

[PATCH net-next V2 02/10] net/flow_dissector: Make dissector_uses_key() and skb_flow_dissector_target() public

2016-03-03 Thread Amir Vadai
Will be used in a following patch to query if a key is being used, and
what it's value in the target object.

Signed-off-by: Amir Vadai 
---
 include/net/flow_dissector.h | 13 +
 net/core/flow_dissector.c| 13 -
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8c8548c..d3d60dc 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -184,4 +184,17 @@ static inline bool flow_keys_have_l4(struct flow_keys 
*keys)
 
 u32 flow_hash_from_keys(struct flow_keys *keys);
 
+static inline bool dissector_uses_key(const struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+   return flow_dissector->used_keys & (1 << key_id);
+}
+
+static inline void *skb_flow_dissector_target(struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id,
+ void *target_container)
+{
+   return ((char *)target_container) + flow_dissector->offset[key_id];
+}
+
 #endif
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 7c7b873..a669dea 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,25 +19,12 @@
 #include 
 #include 
 
-static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id)
-{
-   return flow_dissector->used_keys & (1 << key_id);
-}
-
 static void dissector_set_key(struct flow_dissector *flow_dissector,
  enum flow_dissector_key_id key_id)
 {
flow_dissector->used_keys |= (1 << key_id);
 }
 
-static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id,
-  void *target_container)
-{
-   return ((char *) target_container) + flow_dissector->offset[key_id];
-}
-
 void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 const struct flow_dissector_key *key,
 unsigned int key_count)
-- 
2.7.0



[PATCH net-next V2 07/10] net/mlx5e: Add a new priority for kernel flow tables

2016-03-03 Thread Amir Vadai
Move the vlan and main flow tables to use priority 1. This will allow
the upcoming TC offload logic to use a higher priority (0) for the
offload steering table.

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 80d81ab..d00a242 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -1041,7 +1041,7 @@ static int mlx5e_create_main_flow_table(struct mlx5e_priv 
*priv)
int err;
 
ft->num_groups = 0;
-   ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE);
+   ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE);
 
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
@@ -1150,7 +1150,7 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv 
*priv)
int err;
 
ft->num_groups = 0;
-   ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE);
+   ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE);
 
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f0e67d2..e848d70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -73,8 +73,8 @@
 #define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
   LEFTOVERS_MAX_FT)
 
-#define KERNEL_MAX_FT 2
-#define KERNEL_NUM_PRIOS 1
+#define KERNEL_MAX_FT 3
+#define KERNEL_NUM_PRIOS 2
 #define KENREL_MIN_LEVEL 2
 
 struct node_caps {
-- 
2.7.0



[PATCH net-next V2 08/10] net/mlx5e: Introduce tc offload support

2016-03-03 Thread Amir Vadai
Extend ndo_setup_tc() to support ingress tc offloading. Will be used by
later patches to offload tc flower filter.

Feature is off by default and could be enabled by issuing:
 # ethtool  -K eth0 hw-tc-offload on

Offloads flow table is dynamically created when first filter is
added.
Rules are saved in a hash table that is maintained by the consumer (for
example - the flower offload in the next patch).
When last filter is removed and no filters exist in the hash table, the
offload flow table is destroyed.

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   9 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  38 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 131 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  44 
 5 files changed, 222 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 11b592d..4fc45ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,6 +6,6 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
-   en_txrx.o en_clock.o vxlan.o
+   en_txrx.o en_clock.o vxlan.o en_tc.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9c0e80e..36f3dba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "wq.h"
 #include "mlx5_core.h"
 
@@ -526,8 +527,16 @@ struct mlx5e_flow_table {
struct mlx5_flow_group  **g;
 };
 
+struct mlx5e_tc_flow_table {
+   struct mlx5_flow_table  *t;
+
+   struct rhashtable_paramsht_params;
+   struct rhashtable   ht;
+};
+
 struct mlx5e_flow_tables {
struct mlx5_flow_namespace  *ns;
+   struct mlx5e_tc_flow_table  tc;
struct mlx5e_flow_table vlan;
struct mlx5e_flow_table main;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5e3692f..011c4f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -30,9 +30,12 @@
  * SOFTWARE.
  */
 
+#include 
+#include 
 #include 
 #include 
 #include "en.h"
+#include "en_tc.h"
 #include "eswitch.h"
 #include "vxlan.h"
 
@@ -1883,6 +1886,17 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 
tc)
 static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
  __be16 proto, struct tc_to_netdev *tc)
 {
+   struct mlx5e_priv *priv = netdev_priv(dev);
+
+   if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+   goto mqprio;
+
+   switch (tc->type) {
+   default:
+   return -EOPNOTSUPP;
+   }
+
+mqprio:
if (tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
 
@@ -1966,6 +1980,13 @@ static int mlx5e_set_features(struct net_device *netdev,
mlx5e_disable_vlan_filter(priv);
}
 
+   if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
+   mlx5e_tc_num_filters(priv)) {
+   netdev_err(netdev,
+  "Active offloaded tc filters, can't turn 
hw_tc_offload off\n");
+   return -EINVAL;
+   }
+
return err;
 }
 
@@ -2365,6 +2386,13 @@ static void mlx5e_build_netdev(struct net_device *netdev)
if (!priv->params.lro_en)
netdev->features  &= ~NETIF_F_LRO;
 
+#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+   if (FT_CAP(flow_modify_en) &&
+   FT_CAP(modify_root) &&
+   FT_CAP(identified_miss_table_mode) &&
+   FT_CAP(flow_table_modify))
+   priv->netdev->hw_features  |= NETIF_F_HW_TC;
+
netdev->features |= NETIF_F_HIGHDMA;
 
netdev->priv_flags   |= IFF_UNICAST_FLT;
@@ -2486,6 +2514,10 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev 
*mdev)
 
mlx5e_vxlan_init(priv);
 
+   err = mlx5e_tc_init(priv);
+   if (err)
+   goto err_destroy_flow_tables;
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_ieee_setets_core(pri

[PATCH net-next V2 06/10] net/mlx5e: Relax ndo_setup_tc handle restriction

2016-03-03 Thread Amir Vadai
Restricting handle to TC_H_ROOT breaks the old instantiation of mqprio
to setup a hardware qdisc. This patch relaxes the test, to only check the
type.

Fixes: 08fb1da ("net/mlx5e: Support DCBNL IEEE ETS")
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5063c0e..5e3692f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1883,7 +1883,7 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 
tc)
 static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
  __be16 proto, struct tc_to_netdev *tc)
 {
-   if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+   if (tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
 
return mlx5e_setup_tc(dev, tc->tc);
-- 
2.7.0



[PATCH net-next V2 05/10] net/mlx5_core: Set flow steering dest only for forward rules

2016-03-03 Thread Amir Vadai
We need to handle flow table entry destinations only if the action
associated with the rule is forwarding (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST).

Fixes: 26a8145390b3 ('net/mlx5_core: Introduce flow steering firmware commands')
Signed-off-by: Amir Vadai 
Signed-off-by: Maor Gottlieb 
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 29 +--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 18 +-
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index a9894d2..f46f1db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -218,19 +218,22 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
  match_value);
memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
 
-   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
-   list_for_each_entry(dst, &fte->node.children, node.list) {
-   unsigned int id;
-
-   MLX5_SET(dest_format_struct, in_dests, destination_type,
-dst->dest_attr.type);
-   if (dst->dest_attr.type ==
-   MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
-   id = dst->dest_attr.ft->id;
-   else
-   id = dst->dest_attr.tir_num;
-   MLX5_SET(dest_format_struct, in_dests, destination_id, id);
-   in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, 
destination);
+   list_for_each_entry(dst, &fte->node.children, node.list) {
+   unsigned int id;
+
+   MLX5_SET(dest_format_struct, in_dests, destination_type,
+dst->dest_attr.type);
+   if (dst->dest_attr.type ==
+   MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+   id = dst->dest_attr.ft->id;
+   } else {
+   id = dst->dest_attr.tir_num;
+   }
+   MLX5_SET(dest_format_struct, in_dests, destination_id, 
id);
+   in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   }
}
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6f68dba..f0e67d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -360,8 +360,8 @@ static void del_rule(struct fs_node *node)
memcpy(match_value, fte->val, sizeof(fte->val));
fs_get_obj(ft, fg->node.parent);
list_del(&rule->node.list);
-   fte->dests_size--;
-   if (fte->dests_size) {
+   if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+   --fte->dests_size) {
err = mlx5_cmd_update_fte(dev, ft,
  fg->id, fte);
if (err)
@@ -763,7 +763,8 @@ static struct mlx5_flow_rule *alloc_rule(struct 
mlx5_flow_destination *dest)
return NULL;
 
rule->node.type = FS_TYPE_FLOW_DEST;
-   memcpy(&rule->dest_attr, dest, sizeof(*dest));
+   if (dest)
+   memcpy(&rule->dest_attr, dest, sizeof(*dest));
 
return rule;
 }
@@ -785,8 +786,9 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte 
*fte,
/* Add dest to dests list- added as first element after the head */
tree_init_node(&rule->node, 1, del_rule);
list_add_tail(&rule->node.list, &fte->node.children);
-   fte->dests_size++;
-   if (fte->dests_size == 1)
+   if (dest)
+   fte->dests_size++;
+   if (fte->dests_size == 1 || !dest)
err = mlx5_cmd_create_fte(get_dev(&ft->node),
  ft, fg->id, fte);
else
@@ -802,7 +804,8 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte 
*fte,
 free_rule:
list_del(&rule->node.list);
kfree(rule);
-   fte->dests_size--;
+   if (dest)
+   fte->dests_size--;
return ERR_PTR(err);
 }
 
@@ -996,6 +999,9 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
struct mlx5_flow_group *g;
struct mlx5_flow_rule *rule;
 
+   if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest)
+   return ERR_PTR(-EINVAL);
+
nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
fs_for_each_fg(g, ft)
if (compare_match_criteria(g->mask.match_criteria_enable,
-- 
2.7.0



[PATCH net-next V2 03/10] net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef

2016-03-03 Thread Amir Vadai
Introduce the macros tc_no_actions and tc_for_each_action to make code
clearer.

Suggested-by: Jiri Pirko 
Signed-off-by: Amir Vadai 
---
 include/net/act_api.h| 21 -
 include/net/tc_act/tc_gact.h |  4 ++--
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 342be6c..2a19fe1 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -78,11 +78,6 @@ static inline void tcf_lastuse_update(struct tcf_t *tm)
tm->lastuse = now;
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-
-#define ACT_P_CREATED 1
-#define ACT_P_DELETED 1
-
 struct tc_action {
void*priv;
const struct tc_action_ops  *ops;
@@ -92,6 +87,11 @@ struct tc_action {
struct tcf_hashinfo *hinfo;
 };
 
+#ifdef CONFIG_NET_CLS_ACT
+
+#define ACT_P_CREATED 1
+#define ACT_P_DELETED 1
+
 struct tc_action_ops {
struct list_head head;
charkind[IFNAMSIZ];
@@ -171,5 +171,16 @@ int tcf_action_dump(struct sk_buff *skb, struct list_head 
*, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
+
+#define tc_no_actions(_exts) \
+   (list_empty(&(_exts)->actions))
+
+#define tc_for_each_action(_a, _exts) \
+   list_for_each_entry(a, &(_exts)->actions, list)
+#else /* CONFIG_NET_CLS_ACT */
+
+#define tc_no_actions(_exts) true
+#define tc_for_each_action(_a, _exts) while (0)
+
 #endif /* CONFIG_NET_CLS_ACT */
 #endif
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 04a3183..93c520b 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -16,9 +16,9 @@ struct tcf_gact {
 #define to_gact(a) \
container_of(a->priv, struct tcf_gact, common)
 
-#ifdef CONFIG_NET_CLS_ACT
 static inline bool is_tcf_gact_shot(const struct tc_action *a)
 {
+#ifdef CONFIG_NET_CLS_ACT
struct tcf_gact *gact;
 
if (a->ops && a->ops->type != TCA_ACT_GACT)
@@ -28,7 +28,7 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a)
if (gact->tcf_action == TC_ACT_SHOT)
return true;
 
+#endif
return false;
 }
-#endif
 #endif /* __NET_TC_GACT_H */
-- 
2.7.0



[PATCH net-next V2 00/10] cls_flower hardware offload support

2016-03-03 Thread Amir Vadai
Hi,

This patchset is identical to V1 but with a fixed return value of tc_no_actions
in patch 3/10 ("net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef").

Please see changes from V1 at the bottom.

This patchset introduces cls_flower hardware offload support over ConnectX-4
driver, more hardware vendors are welcome to use it too.

This patchset is based on John's infrastructure for tc offloading [2] to add
hardware offload support to the flower filter. It also extends the support to
an additional tc action - skbedit mark operation.
NIC driver that was used is ConnectX-4. Feature is off by default and could be
turned on using ethtool.

Some commands to use this code:

export TC=../iproute2/tc/tc
export ETH=ens9

ethtool  -K ens9 hw-tc-offload on

# add an ingress qdisc
$TC qdisc add dev $ETH ingress

# Drop ICMP (ip_proto 1) packets
$TC filter add dev $ETH protocol ip prio 20 parent : \
flower ip_proto 1 \
dst_mac 7c:fe:90:69:81:62 \
src_mac 7c:fe:90:69:81:56 \
dst_ip 11.11.11.11 \
src_ip 11.11.11.12 \
indev $ETH \
action drop

# Mark (with 0x1234) TCP (ip_proto 6) packets
$TC filter add dev $ETH protocol ip prio 30 parent : \
flower ip_proto 6 \
indev $ETH \
action skbedit mark 0x1234

# A NOP software filter used to count marked packets using "tc show -s"
$TC filter add dev $ETH protocol ip prio 10 parent : \
handle 0x1234 fw action pass

The code was tested and applied on top of commit 3ebeac1 ("Merge branch
'cxgb4-next'")

Changes from V0:
- Use tc_no_actions and tc_for_each_action instead of ifdef CONFIG_NET_CLS_ACT
- Replace ENOTSUPP (and some EINVAL) with EOPNOTSUPP
- Name the flower command enum
- fl_hw_destroy_filter() to return void - nobody uses the return value
- mlx5e_tc_init() and mlx5e_tc_cleanup() to be called from the right places.
- When adding HW rule fails - fail the command
- Rules are added to be processed both by HW and SW unless SKIP_HW is given
- Adding patch 6/10 ("net/mlx5e: Relax ndo_setup_tc handle restriction")

Main changes from the RFC [1]:
- API
  - Using ndo_setup_tc() instead of switchdev
- act_skbedit, act_gact
  - Actions are not serialized to NIC driver, instead using access functions.
- cls_flower
  - prevent double classification by software by not adding
successfuly offloaded filters to the hashtable
  - Fixed some bugs in original RFC with rule delete  
- mlx5
  - Adding flow table to kernel namespace instead of a new namespace
  - s/offload/tc/ in many places
  - no need for a special kconfig since switchdev is not used

Thanks,
Amir

[1] - http://permalink.gmane.org/gmane.linux.network/397064
[2] - http://permalink.gmane.org/gmane.linux.network/397045 
[3] - http://permalink.gmane.org/gmane.linux.network/401226

Amir Vadai (10):
  net/flower: Introduce hardware offload support
  net/flow_dissector: Make dissector_uses_key() and
skb_flow_dissector_target() public
  net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef
  net/act_skbedit: Utility functions for mark action
  net/mlx5_core: Set flow steering dest only for forward rules
  net/mlx5e: Relax ndo_setup_tc handle restriction
  net/mlx5e: Add a new priority for kernel flow tables
  net/mlx5e: Introduce tc offload support
  net/mlx5e: Support offload cls_flower with drop action
  net/mlx5e: Support offload cls_flower with skbedit mark action

 drivers/net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   9 +
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  47 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 429 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  51 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  29 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c |  22 +-
 include/linux/netdevice.h |   2 +
 include/net/act_api.h |  21 +-
 include/net/flow_dissector.h  |  13 +
 include/net/pkt_cls.h |  14 +
 include/net/tc_act/tc_gact.h  |   4 +-
 include/net/tc_act/tc_skbedit.h   |  15 +
 include/uapi/linux/pkt_cls.h  |   2 +
 net/core/flow_dissector.c |  13 -
 net/sched/cls_flower.c|  71 +++-
 18 files changed, 704 insertions(+), 47 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

-- 
2.7.0



[PATCH net-next V2 10/10] net/mlx5e: Support offload cls_flower with skbedit mark action

2016-03-03 Thread Amir Vadai
Introduce offloading of skbedit mark action.

For example, to mark with 0x1234, all TCP (ip_proto 6) packets arriving
to interface ens9:

 # tc qdisc add dev ens9 ingress
 # tc filter add dev ens9 protocol ip parent : \
 flower ip_proto 6 \
 indev ens9 \
 action skbedit mark 0x1234

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 519a07f..f293afe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include "en.h"
+#include "en_tc.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 {
@@ -224,6 +225,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 
*cqe,
if (cqe_has_vlan(cqe))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
   be16_to_cpu(cqe->vlan_info));
+
+   skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
 }
 
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3aea5da..544c739 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 70642f4..d677428 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -33,6 +33,8 @@
 #ifndef __MLX5_EN_TC_H__
 #define __MLX5_EN_TC_H__
 
+#define MLX5E_TC_FLOW_ID_MASK 0x
+
 int mlx5e_tc_init(struct mlx5e_priv *priv);
 void mlx5e_tc_cleanup(struct mlx5e_priv *priv);
 
-- 
2.7.0



[PATCH net-next V2 04/10] net/act_skbedit: Utility functions for mark action

2016-03-03 Thread Amir Vadai
Enable device drivers to query the action if is a mark action and what
value to use for marking.

Signed-off-by: Amir Vadai 
---
 include/net/tc_act/tc_skbedit.h | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 0df9a0d..4497460 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -20,6 +20,7 @@
 #define __NET_TC_SKBEDIT_H
 
 #include 
+#include 
 
 struct tcf_skbedit {
struct tcf_common   common;
@@ -32,4 +33,18 @@ struct tcf_skbedit {
 #define to_skbedit(a) \
container_of(a->priv, struct tcf_skbedit, common)
 
+static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+   if (a->ops && a->ops->type == TCA_ACT_SKBEDIT)
+   return to_skbedit(a)->flags == SKBEDIT_F_MARK;
+#endif
+   return false;
+}
+
+static inline u32 tcf_skbedit_mark(const struct tc_action *a)
+{
+   return to_skbedit(a)->mark;
+}
+
 #endif /* __NET_TC_SKBEDIT_H */
-- 
2.7.0



[PATCH net-next V2 09/10] net/mlx5e: Support offload cls_flower with drop action

2016-03-03 Thread Amir Vadai
Parse tc_cls_flower_offload into device specific commands and program
the hardware to classify and act accordingly.

For example, to drop ICMP (ip_proto 1) packets from specific smac, dmac,
src_ip, src_ip, arriving to interface ens9:

 # tc qdisc add dev ens9 ingress

 # tc filter add dev ens9 protocol ip parent : \
 flower ip_proto 1 \
 dst_mac 7c:fe:90:69:81:62 src_mac 7c:fe:90:69:81:56 \
 dst_ip 11.11.11.11 src_ip 11.11.11.12 indev ens9 \
 action drop

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   7 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 297 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |   5 +
 3 files changed, 309 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 011c4f6..9aa9103 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1892,6 +1892,13 @@ static int mlx5e_ndo_setup_tc(struct net_device *dev, 
u32 handle,
goto mqprio;
 
switch (tc->type) {
+   case TC_SETUP_CLSFLOWER:
+   switch (tc->cls_flower->command) {
+   case TC_CLSFLOWER_REPLACE:
+   return mlx5e_configure_flower(priv, proto, 
tc->cls_flower);
+   case TC_CLSFLOWER_DESTROY:
+   return mlx5e_delete_flower(priv, tc->cls_flower);
+   }
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9457173..3aea5da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -30,6 +30,9 @@
  * SOFTWARE.
  */
 
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -94,6 +97,300 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
}
 }
 
+static int parse_cls_flower(struct mlx5e_priv *priv,
+   u32 *match_c, u32 *match_v,
+   struct tc_cls_flower_offload *f)
+{
+   void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers);
+   void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers);
+   u16 addr_type = 0;
+   u8 ip_proto = 0;
+
+   if (f->dissector->used_keys &
+   ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS))) {
+   netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
+   f->dissector->used_keys);
+   return -EOPNOTSUPP;
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+   struct flow_dissector_key_control *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+   addr_type = key->addr_type;
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+   struct flow_dissector_key_basic *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+   struct flow_dissector_key_basic *mask =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+   ip_proto = key->ip_proto;
+
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ntohs(mask->n_proto));
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ntohs(key->n_proto));
+
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+mask->ip_proto);
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+key->ip_proto);
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+   struct flow_dissector_key_eth_addrs *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+   struct flow_dissector_key_eth_addrs *mask =
+   skb_flow_dissector_targ

Re: [PATCH net-next V2 03/10] net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef

2016-03-03 Thread Amir Vadai
On Thu, Mar 03, 2016 at 09:45:28AM -0800, Cong Wang wrote:
> On Thu, Mar 3, 2016 at 6:55 AM, Amir Vadai  wrote:
> > Introduce the macros tc_no_actions and tc_for_each_action to make code
> > clearer.
> >
> > Suggested-by: Jiri Pirko 
> > Signed-off-by: Amir Vadai 
> > ---
> >  include/net/act_api.h| 21 -
> >  include/net/tc_act/tc_gact.h |  4 ++--
> >  2 files changed, 18 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/net/act_api.h b/include/net/act_api.h
> > index 342be6c..2a19fe1 100644
> > --- a/include/net/act_api.h
> > +++ b/include/net/act_api.h
> > @@ -78,11 +78,6 @@ static inline void tcf_lastuse_update(struct tcf_t *tm)
> > tm->lastuse = now;
> >  }
> >
> > -#ifdef CONFIG_NET_CLS_ACT
> > -
> > -#define ACT_P_CREATED 1
> > -#define ACT_P_DELETED 1
> > -
> >  struct tc_action {
> > void*priv;
> > const struct tc_action_ops  *ops;
> > @@ -92,6 +87,11 @@ struct tc_action {
> > struct tcf_hashinfo *hinfo;
> >  };
> 
> You also expose struct tc_action out of CONFIG_NET_CLS_ACT,
> which you never mention in your changelog at all.
Yes - it was a mistake not to mention it in the changelog.

> 
> So why?
The struct will not be used, and without exposing it, the compiler will
complain on code like I have in patch 9/10 ("net/mlx5e: Support offload
cls_flower with drop action"):

static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
u32 *action, u32 *flow_tag)
{
const struct tc_action *a;

if (tc_no_actions(exts))
return -EINVAL;

*flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
*action = 0;

tc_for_each_action(a, exts) {

[...]


Re: [PATCH net-next V2 01/10] net/flower: Introduce hardware offload support

2016-03-03 Thread Amir Vadai"
On Thu, Mar 03, 2016 at 12:30:33PM -0500, David Miller wrote:
> From: Amir Vadai 
> Date: Thu,  3 Mar 2016 16:55:51 +0200
> 
> > @@ -454,11 +501,13 @@ static int fl_change(struct net *net, struct sk_buff 
> > *in_skb,
> >  u32 handle, struct nlattr **tca,
> >  unsigned long *arg, bool ovr)
> >  {
> > +   struct net_device *dev = tp->q->dev_queue->dev;
> > struct cls_fl_head *head = rtnl_dereference(tp->root);
> 
> This variable is not used.
> 
> And the compiler warns about this, and because of this I am pretty sure you
> aren't looking at the compiler output while testing your builds which is a
> big no-no.
My bad. I did a last minute change that left this variable and somehow
missed the warning (though I did compile and test it).
Will fix for v3


Re: [PATCH net-next V2 04/10] net/act_skbedit: Utility functions for mark action

2016-03-03 Thread Amir Vadai
On Thu, Mar 03, 2016 at 09:48:40AM -0800, Cong Wang wrote:
> On Thu, Mar 3, 2016 at 6:55 AM, Amir Vadai  wrote:
> > +static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
> > +{
> > +#ifdef CONFIG_NET_CLS_ACT
> > +   if (a->ops && a->ops->type == TCA_ACT_SKBEDIT)
> > +   return to_skbedit(a)->flags == SKBEDIT_F_MARK;
> 
> 
> You mean to_skbedit(a)->flags & SKBEDIT_F_MARK ?
I will add a comment in v3 - it is on purpose. The function return true
iff the function is mark - other actions should not be offloaded.


Re: [PATCH net-next V2 03/10] net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef

2016-03-06 Thread Amir Vadai
On Fri, Mar 04, 2016 at 10:20:18AM -0800, Cong Wang wrote:
> On Thu, Mar 3, 2016 at 11:51 AM, Amir Vadai  wrote:
> > On Thu, Mar 03, 2016 at 09:45:28AM -0800, Cong Wang wrote:
> >>
> >> So why?
> > The struct will not be used, and without exposing it, the compiler will
> > complain on code like I have in patch 9/10 ("net/mlx5e: Support offload
> > cls_flower with drop action"):
> >
> > static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
> > u32 *action, u32 *flow_tag)
> 
> Why not make this a nop when CONFIG_NET_CLS_ACT is not set?

In V0 I did make it a nop. Jiri has suggested [1] that I will replace
the ifdefs with the macro's tc_for_each_action and is_tcf_gact_shot. And
I do think it looks more elegant.

Why do you think it is a problem to expose truct tc_action?

Thanks for your review,
Amir

[1] - https://patchwork.ozlabs.org/patch/590550/


Re: [PATCH net-next V2 01/10] net/flower: Introduce hardware offload support

2016-03-06 Thread Amir Vadai
On Fri, Mar 04, 2016 at 09:01:39AM -0800, John Fastabend wrote:
> On 16-03-03 06:55 AM, Amir Vadai wrote:
> > This patch is based on a patch made by John Fastabend.
> > It adds support for offloading cls_flower.
> > when NETIF_F_HW_TC is on:
> >   flags = 0   => Rule will be processed twice - by hardware, and if
> >  still relevant, by software.
> >   flags = SKIP_HW => Rull will be processed by software only
> > 
> > If hardare fail/not capabale to apply the rule, operation will fail.
> > 
> > Suggested-by: John Fastabend 
> > Signed-off-by: Amir Vadai 
> > ---
> 
> [...]
> 
> >  static bool fl_destroy(struct tcf_proto *tp, bool force)
> >  {
> > struct cls_fl_head *head = rtnl_dereference(tp->root);
> > @@ -174,6 +220,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
> > return false;
> >  
> > list_for_each_entry_safe(f, next, &head->filters, list) {
> > +   fl_hw_destroy_filter(tp, (u64)f);
> > list_del_rcu(&f->list);
> > call_rcu(&f->rcu, fl_destroy_filter);
> > }
> > @@ -454,11 +501,13 @@ static int fl_change(struct net *net, struct sk_buff 
> > *in_skb,
> >  u32 handle, struct nlattr **tca,
> >  unsigned long *arg, bool ovr)
> >  {
> > +   struct net_device *dev = tp->q->dev_queue->dev;
> > struct cls_fl_head *head = rtnl_dereference(tp->root);
> > struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
> > struct cls_fl_filter *fnew;
> > struct nlattr *tb[TCA_FLOWER_MAX + 1];
> > struct fl_flow_mask mask = {};
> > +   u32 flags = 0;
> > int err;
> >  
> > if (!tca[TCA_OPTIONS])
> > @@ -486,6 +535,9 @@ static int fl_change(struct net *net, struct sk_buff 
> > *in_skb,
> > }
> > fnew->handle = handle;
> >  
> > +   if (tb[TCA_FLOWER_FLAGS])
> > +   flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
> > +
> > err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
> > if (err)
> > goto errout;
> > @@ -498,9 +550,22 @@ static int fl_change(struct net *net, struct sk_buff 
> > *in_skb,
> >  head->ht_params);
> > if (err)
> > goto errout;
> > -   if (fold)
> > +
> > +   err = fl_hw_replace_filter(tp,
> > +  &head->dissector,
> > +  &mask.key,
> > +  &fnew->key,
> > +  &fnew->exts,
> > +  (u64)fnew,
> > +  flags);
> > +   if (err)
> > +   goto err_hash_remove;
> > +
> 
> This behaviour is different than how I did u32 in the u32 case I just
> let the software case get loaded and do not throw any errors. The
> intent was if we required a HW entry we would explicitly state that
> with the SKIP_SW (to be implemented) flag. This error path seems
> to block the software filter when the hardware fails.
Makes sense.

> 
> I think it would be best to do the same as u32 here and use the error
> path only if SKIP_SW is set. Or if you really want an error path on
> SW/HW loads then use another bit in the flag to specify STRICT or
> something along those lines.
I will do the same as u32. I won't add this STRICT flag, because I don't
have any use case for this mode in which processing is done in both SW
and HW.

> 
> 
> > +   if (fold) {
> > rhashtable_remove_fast(&head->ht, &fold->ht_node,
> >head->ht_params);
> > +   fl_hw_destroy_filter(tp, (u64)fold);
> > +   }
> >  
> > *arg = (unsigned long) fnew;
> >  
> > @@ -514,6 +579,9 @@ static int fl_change(struct net *net, struct sk_buff 
> > *in_skb,
> >  
> > return 0;
> >  
> > +err_hash_remove:
> > +   rhashtable_remove_fast(&head->ht, &fnew->ht_node, head->ht_params);
> > +
> >  errout:
> > kfree(fnew);
> > return err;
> > @@ -527,6 +595,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned 
> > long arg)
> > rhashtable_remove_fast(&head->ht, &f->ht_node,
> >head->ht_params);
> > list_del_rcu(&f->list);
> > +   fl_hw_destroy_filter(tp, (u64)f);
> > tcf_unbind_filter(tp, &f->res);
> > call_rcu(&f->rcu, fl_destroy_filter);
> > return 0;
> > 
> 


[PATCH net-next V3 04/10] net/act_skbedit: Utility functions for mark action

2016-03-08 Thread Amir Vadai
Enable device drivers to query the action, if and only if is a mark
action and what value to use for marking.

Acked-by: Jiri Pirko 
Signed-off-by: Amir Vadai 
---
 include/net/tc_act/tc_skbedit.h | 16 
 1 file changed, 16 insertions(+)

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 0df9a0d..b496d5a 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -20,6 +20,7 @@
 #define __NET_TC_SKBEDIT_H
 
 #include 
+#include 
 
 struct tcf_skbedit {
struct tcf_common   common;
@@ -32,4 +33,19 @@ struct tcf_skbedit {
 #define to_skbedit(a) \
container_of(a->priv, struct tcf_skbedit, common)
 
+/* Return true iff action is mark */
+static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+   if (a->ops && a->ops->type == TCA_ACT_SKBEDIT)
+   return to_skbedit(a)->flags == SKBEDIT_F_MARK;
+#endif
+   return false;
+}
+
+static inline u32 tcf_skbedit_mark(const struct tc_action *a)
+{
+   return to_skbedit(a)->mark;
+}
+
 #endif /* __NET_TC_SKBEDIT_H */
-- 
2.7.0



[PATCH net-next V3 03/10] net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef

2016-03-08 Thread Amir Vadai
Introduce the macros tc_no_actions and tc_for_each_action to make code
clearer.
Extracted struct tc_action out of the ifdef to make calls to
is_tcf_gact_shot() and similar functions valid, even when it is a nop.

Acked-by: Jiri Pirko 
Acked-by: John Fastabend 
Suggested-by: Jiri Pirko 
Signed-off-by: Amir Vadai 
---
 include/net/act_api.h| 21 -
 include/net/tc_act/tc_gact.h |  4 ++--
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 342be6c..2a19fe1 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -78,11 +78,6 @@ static inline void tcf_lastuse_update(struct tcf_t *tm)
tm->lastuse = now;
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-
-#define ACT_P_CREATED 1
-#define ACT_P_DELETED 1
-
 struct tc_action {
void*priv;
const struct tc_action_ops  *ops;
@@ -92,6 +87,11 @@ struct tc_action {
struct tcf_hashinfo *hinfo;
 };
 
+#ifdef CONFIG_NET_CLS_ACT
+
+#define ACT_P_CREATED 1
+#define ACT_P_DELETED 1
+
 struct tc_action_ops {
struct list_head head;
charkind[IFNAMSIZ];
@@ -171,5 +171,16 @@ int tcf_action_dump(struct sk_buff *skb, struct list_head 
*, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
+
+#define tc_no_actions(_exts) \
+   (list_empty(&(_exts)->actions))
+
+#define tc_for_each_action(_a, _exts) \
+   list_for_each_entry(a, &(_exts)->actions, list)
+#else /* CONFIG_NET_CLS_ACT */
+
+#define tc_no_actions(_exts) true
+#define tc_for_each_action(_a, _exts) while (0)
+
 #endif /* CONFIG_NET_CLS_ACT */
 #endif
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 04a3183..93c520b 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -16,9 +16,9 @@ struct tcf_gact {
 #define to_gact(a) \
container_of(a->priv, struct tcf_gact, common)
 
-#ifdef CONFIG_NET_CLS_ACT
 static inline bool is_tcf_gact_shot(const struct tc_action *a)
 {
+#ifdef CONFIG_NET_CLS_ACT
struct tcf_gact *gact;
 
if (a->ops && a->ops->type != TCA_ACT_GACT)
@@ -28,7 +28,7 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a)
if (gact->tcf_action == TC_ACT_SHOT)
return true;
 
+#endif
return false;
 }
-#endif
 #endif /* __NET_TC_GACT_H */
-- 
2.7.0



[PATCH net-next V3 06/10] net/mlx5e: Relax ndo_setup_tc handle restriction

2016-03-08 Thread Amir Vadai
Restricting handle to TC_H_ROOT breaks the old instantiation of mqprio
to setup a hardware qdisc. This patch relaxes the test, to only check the
type.

Fixes: 08fb1da ("net/mlx5e: Support DCBNL IEEE ETS")
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5063c0e..5e3692f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1883,7 +1883,7 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 
tc)
 static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
  __be16 proto, struct tc_to_netdev *tc)
 {
-   if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+   if (tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
 
return mlx5e_setup_tc(dev, tc->tc);
-- 
2.7.0



[PATCH net-next V3 10/10] net/mlx5e: Support offload cls_flower with skbedit mark action

2016-03-08 Thread Amir Vadai
Introduce offloading of skbedit mark action.

For example, to mark with 0x1234, all TCP (ip_proto 6) packets arriving
to interface ens9:

 # tc qdisc add dev ens9 ingress
 # tc filter add dev ens9 protocol ip parent : \
 flower ip_proto 6 \
 indev ens9 \
 action skbedit mark 0x1234

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 519a07f..f293afe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include "en.h"
+#include "en_tc.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 {
@@ -224,6 +225,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 
*cqe,
if (cqe_has_vlan(cqe))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
   be16_to_cpu(cqe->vlan_info));
+
+   skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
 }
 
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3ed4d96..b3de09f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 70642f4..d677428 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -33,6 +33,8 @@
 #ifndef __MLX5_EN_TC_H__
 #define __MLX5_EN_TC_H__
 
+#define MLX5E_TC_FLOW_ID_MASK 0x
+
 int mlx5e_tc_init(struct mlx5e_priv *priv);
 void mlx5e_tc_cleanup(struct mlx5e_priv *priv);
 
-- 
2.7.0



[PATCH net-next V3 09/10] net/mlx5e: Support offload cls_flower with drop action

2016-03-08 Thread Amir Vadai
Parse tc_cls_flower_offload into device specific commands and program
the hardware to classify and act accordingly.

For example, to drop ICMP (ip_proto 1) packets from specific smac, dmac,
src_ip, src_ip, arriving to interface ens9:

 # tc qdisc add dev ens9 ingress

 # tc filter add dev ens9 protocol ip parent : \
 flower ip_proto 1 \
 dst_mac 7c:fe:90:69:81:62 src_mac 7c:fe:90:69:81:56 \
 dst_ip 11.11.11.11 src_ip 11.11.11.12 indev ens9 \
 action drop

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   7 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 297 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |   5 +
 3 files changed, 309 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 011c4f6..9aa9103 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1892,6 +1892,13 @@ static int mlx5e_ndo_setup_tc(struct net_device *dev, 
u32 handle,
goto mqprio;
 
switch (tc->type) {
+   case TC_SETUP_CLSFLOWER:
+   switch (tc->cls_flower->command) {
+   case TC_CLSFLOWER_REPLACE:
+   return mlx5e_configure_flower(priv, proto, 
tc->cls_flower);
+   case TC_CLSFLOWER_DESTROY:
+   return mlx5e_delete_flower(priv, tc->cls_flower);
+   }
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1dc715d..3ed4d96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -30,6 +30,9 @@
  * SOFTWARE.
  */
 
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -94,6 +97,300 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
}
 }
 
+static int parse_cls_flower(struct mlx5e_priv *priv,
+   u32 *match_c, u32 *match_v,
+   struct tc_cls_flower_offload *f)
+{
+   void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers);
+   void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers);
+   u16 addr_type = 0;
+   u8 ip_proto = 0;
+
+   if (f->dissector->used_keys &
+   ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS))) {
+   netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
+   f->dissector->used_keys);
+   return -EOPNOTSUPP;
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+   struct flow_dissector_key_control *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+   addr_type = key->addr_type;
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+   struct flow_dissector_key_basic *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+   struct flow_dissector_key_basic *mask =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+   ip_proto = key->ip_proto;
+
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ntohs(mask->n_proto));
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ntohs(key->n_proto));
+
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+mask->ip_proto);
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+key->ip_proto);
+   }
+
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+   struct flow_dissector_key_eth_addrs *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+   struct flow_dissector_key_eth_addrs *mask =
+   skb_flow_dissector_targ

[PATCH net-next V3 02/10] net/flow_dissector: Make dissector_uses_key() and skb_flow_dissector_target() public

2016-03-08 Thread Amir Vadai
Will be used in a following patch to query if a key is being used, and
what it's value in the target object.

Acked-by: John Fastabend 
Acked-by: Jiri Pirko 
Signed-off-by: Amir Vadai 
---
 include/net/flow_dissector.h | 13 +
 net/core/flow_dissector.c| 13 -
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8c8548c..d3d60dc 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -184,4 +184,17 @@ static inline bool flow_keys_have_l4(struct flow_keys 
*keys)
 
 u32 flow_hash_from_keys(struct flow_keys *keys);
 
+static inline bool dissector_uses_key(const struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+   return flow_dissector->used_keys & (1 << key_id);
+}
+
+static inline void *skb_flow_dissector_target(struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id,
+ void *target_container)
+{
+   return ((char *)target_container) + flow_dissector->offset[key_id];
+}
+
 #endif
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 7c7b873..a669dea 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,25 +19,12 @@
 #include 
 #include 
 
-static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id)
-{
-   return flow_dissector->used_keys & (1 << key_id);
-}
-
 static void dissector_set_key(struct flow_dissector *flow_dissector,
  enum flow_dissector_key_id key_id)
 {
flow_dissector->used_keys |= (1 << key_id);
 }
 
-static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id,
-  void *target_container)
-{
-   return ((char *) target_container) + flow_dissector->offset[key_id];
-}
-
 void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 const struct flow_dissector_key *key,
 unsigned int key_count)
-- 
2.7.0



[PATCH net-next V3 05/10] net/mlx5_core: Set flow steering dest only for forward rules

2016-03-08 Thread Amir Vadai
We need to handle flow table entry destinations only if the action
associated with the rule is forwarding (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST).

Fixes: 26a8145390b3 ('net/mlx5_core: Introduce flow steering firmware commands')
Signed-off-by: Amir Vadai 
Signed-off-by: Maor Gottlieb 
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 29 +--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 18 +-
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index a9894d2..f46f1db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -218,19 +218,22 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
  match_value);
memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
 
-   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
-   list_for_each_entry(dst, &fte->node.children, node.list) {
-   unsigned int id;
-
-   MLX5_SET(dest_format_struct, in_dests, destination_type,
-dst->dest_attr.type);
-   if (dst->dest_attr.type ==
-   MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
-   id = dst->dest_attr.ft->id;
-   else
-   id = dst->dest_attr.tir_num;
-   MLX5_SET(dest_format_struct, in_dests, destination_id, id);
-   in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+   in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, 
destination);
+   list_for_each_entry(dst, &fte->node.children, node.list) {
+   unsigned int id;
+
+   MLX5_SET(dest_format_struct, in_dests, destination_type,
+dst->dest_attr.type);
+   if (dst->dest_attr.type ==
+   MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+   id = dst->dest_attr.ft->id;
+   } else {
+   id = dst->dest_attr.tir_num;
+   }
+   MLX5_SET(dest_format_struct, in_dests, destination_id, 
id);
+   in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+   }
}
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6f68dba..f0e67d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -360,8 +360,8 @@ static void del_rule(struct fs_node *node)
memcpy(match_value, fte->val, sizeof(fte->val));
fs_get_obj(ft, fg->node.parent);
list_del(&rule->node.list);
-   fte->dests_size--;
-   if (fte->dests_size) {
+   if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+   --fte->dests_size) {
err = mlx5_cmd_update_fte(dev, ft,
  fg->id, fte);
if (err)
@@ -763,7 +763,8 @@ static struct mlx5_flow_rule *alloc_rule(struct 
mlx5_flow_destination *dest)
return NULL;
 
rule->node.type = FS_TYPE_FLOW_DEST;
-   memcpy(&rule->dest_attr, dest, sizeof(*dest));
+   if (dest)
+   memcpy(&rule->dest_attr, dest, sizeof(*dest));
 
return rule;
 }
@@ -785,8 +786,9 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte 
*fte,
/* Add dest to dests list- added as first element after the head */
tree_init_node(&rule->node, 1, del_rule);
list_add_tail(&rule->node.list, &fte->node.children);
-   fte->dests_size++;
-   if (fte->dests_size == 1)
+   if (dest)
+   fte->dests_size++;
+   if (fte->dests_size == 1 || !dest)
err = mlx5_cmd_create_fte(get_dev(&ft->node),
  ft, fg->id, fte);
else
@@ -802,7 +804,8 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte 
*fte,
 free_rule:
list_del(&rule->node.list);
kfree(rule);
-   fte->dests_size--;
+   if (dest)
+   fte->dests_size--;
return ERR_PTR(err);
 }
 
@@ -996,6 +999,9 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
struct mlx5_flow_group *g;
struct mlx5_flow_rule *rule;
 
+   if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest)
+   return ERR_PTR(-EINVAL);
+
nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
fs_for_each_fg(g, ft)
if (compare_match_criteria(g->mask.match_criteria_enable,
-- 
2.7.0



[PATCH net-next V3 00/10] cls_flower hardware offload support

2016-03-08 Thread Amir Vadai
Hi,

Please see changes from V2 at the bottom.

This patchset introduces cls_flower hardware offload support over ConnectX-4
driver, more hardware vendors are welcome to use it too.

This patchset is based on John's infrastructure for tc offloading [2] to add
hardware offload support to the flower filter. It also extends the support to
an additional tc action - skbedit mark operation.
NIC driver that was used is ConnectX-4. Feature is off by default and could be
turned on using ethtool.

Some commands to use this code:

export TC=../iproute2/tc/tc
export ETH=ens9

ethtool  -K ens9 hw-tc-offload on

# add an ingress qdisc
$TC qdisc add dev $ETH ingress

# Drop ICMP (ip_proto 1) packets
$TC filter add dev $ETH protocol ip prio 20 parent : \
flower ip_proto 1 \
dst_mac 7c:fe:90:69:81:62 \
src_mac 7c:fe:90:69:81:56 \
dst_ip 11.11.11.11 \
src_ip 11.11.11.12 \
indev $ETH \
action drop

# Mark (with 0x1234) TCP (ip_proto 6) packets
$TC filter add dev $ETH protocol ip prio 30 parent : \
flower ip_proto 6 \
indev $ETH \
action skbedit mark 0x1234

# A NOP software filter used to count marked packets using "tc show -s"
$TC filter add dev $ETH protocol ip prio 10 parent : \
handle 0x1234 fw action pass

The code was tested and applied on top of commit 3ebeac1 ("Merge branch
'cxgb4-next'")

Changes from V2:
- patch 1/10 ("net/flower: Introduce hardware offload support")
  - Remove unused variable [Dave]
  - Don't fail command when HW can't offload filter [John]
- patch 3/10 ("net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef")
  - Mention in changelog that struct tc_action is now exposed out of the ifdef.
- patch 4/10 ("net/act_skbedit: Utility functions for mark action")
  - Document clearly that is_tcf_skbedit_mark() is returning true if and only
if the only action is mark [Dave]
- patch 8/10 ("net/mlx5e: Introduce tc offload support")
  - make mlx5e_tc_add_flow() static

Changes from V1:
- patch 3/10 ("net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef")
  - fixed return value of tc_no_actions

Changes from V0:
- Use tc_no_actions and tc_for_each_action instead of ifdef CONFIG_NET_CLS_ACT
- Replace ENOTSUPP (and some EINVAL) with EOPNOTSUPP
- Name the flower command enum
- fl_hw_destroy_filter() to return void - nobody uses the return value
- mlx5e_tc_init() and mlx5e_tc_cleanup() to be called from the right places.
- When adding HW rule fails - fail the command
- Rules are added to be processed both by HW and SW unless SKIP_HW is given
- Adding patch 6/10 ("net/mlx5e: Relax ndo_setup_tc handle restriction")

Main changes from the RFC [1]:
- API
  - Using ndo_setup_tc() instead of switchdev
- act_skbedit, act_gact
  - Actions are not serialized to NIC driver, instead using access functions.
- cls_flower
  - prevent double classification by software by not adding
successfuly offloaded filters to the hashtable
  - Fixed some bugs in original RFC with rule delete  
- mlx5
  - Adding flow table to kernel namespace instead of a new namespace
  - s/offload/tc/ in many places
  - no need for a special kconfig since switchdev is not used

Thanks,
Amir

[1] - http://permalink.gmane.org/gmane.linux.network/397064
[2] - http://permalink.gmane.org/gmane.linux.network/397045 
[3] - http://permalink.gmane.org/gmane.linux.network/401226

Amir Vadai (10):
  net/flower: Introduce hardware offload support
  net/flow_dissector: Make dissector_uses_key() and
skb_flow_dissector_target() public
  net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef
  net/act_skbedit: Utility functions for mark action
  net/mlx5_core: Set flow steering dest only for forward rules
  net/mlx5e: Relax ndo_setup_tc handle restriction
  net/mlx5e: Add a new priority for kernel flow tables
  net/mlx5e: Introduce tc offload support
  net/mlx5e: Support offload cls_flower with drop action
  net/mlx5e: Support offload cls_flower with skbedit mark action

 drivers/net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   9 +
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  47 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 429 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  51 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  29 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c |  22 +-
 include/linux/netdevice.h |   2 +
 include/net/act_api.h |  21 +-
 include/net/flow_dissector.h  |  13 +
 include/net/pkt_cls.h |  14 +
 include/net/tc_act/tc_gact.h  |   4 +-
 include/net/tc_act/tc_skbedit.h   

[PATCH net-next V3 07/10] net/mlx5e: Add a new priority for kernel flow tables

2016-03-08 Thread Amir Vadai
Move the vlan and main flow tables to use priority 1. This will allow
the upcoming TC offload logic to use a higher priority (0) for the
offload steering table.

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 80d81ab..d00a242 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -1041,7 +1041,7 @@ static int mlx5e_create_main_flow_table(struct mlx5e_priv 
*priv)
int err;
 
ft->num_groups = 0;
-   ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE);
+   ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE);
 
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
@@ -1150,7 +1150,7 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv 
*priv)
int err;
 
ft->num_groups = 0;
-   ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE);
+   ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE);
 
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f0e67d2..e848d70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -73,8 +73,8 @@
 #define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
   LEFTOVERS_MAX_FT)
 
-#define KERNEL_MAX_FT 2
-#define KERNEL_NUM_PRIOS 1
+#define KERNEL_MAX_FT 3
+#define KERNEL_NUM_PRIOS 2
 #define KENREL_MIN_LEVEL 2
 
 struct node_caps {
-- 
2.7.0



[PATCH net-next V3 01/10] net/flower: Introduce hardware offload support

2016-03-08 Thread Amir Vadai
This patch is based on a patch made by John Fastabend.
It adds support for offloading cls_flower.
when NETIF_F_HW_TC is on:
  flags = 0   => Rule will be processed twice - by hardware, and if
 still relevant, by software.
  flags = SKIP_HW => Rull will be processed by software only

If hardware fail/not capabale to apply the rule, operation will NOT
fail. Filter will be processed by SW only.

Acked-by: Jiri Pirko 
Suggested-by: John Fastabend 
Signed-off-by: Amir Vadai 
---
 include/linux/netdevice.h|  2 ++
 include/net/pkt_cls.h| 14 ++
 include/uapi/linux/pkt_cls.h |  2 ++
 net/sched/cls_flower.c   | 64 +++-
 4 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index efe7cec..12db9d6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -785,6 +785,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device 
*dev,
 enum {
TC_SETUP_MQPRIO,
TC_SETUP_CLSU32,
+   TC_SETUP_CLSFLOWER,
 };
 
 struct tc_cls_u32_offload;
@@ -794,6 +795,7 @@ struct tc_to_netdev {
union {
u8 tc;
struct tc_cls_u32_offload *cls_u32;
+   struct tc_cls_flower_offload *cls_flower;
};
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bea14ee..5b4e8f0 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -409,4 +409,18 @@ static inline bool tc_should_offload(struct net_device 
*dev, u32 flags)
return true;
 }
 
+enum tc_fl_command {
+   TC_CLSFLOWER_REPLACE,
+   TC_CLSFLOWER_DESTROY,
+};
+
+struct tc_cls_flower_offload {
+   enum tc_fl_command command;
+   u64 cookie;
+   struct flow_dissector *dissector;
+   struct fl_flow_key *mask;
+   struct fl_flow_key *key;
+   struct tcf_exts *exts;
+};
+
 #endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 9874f568..c43c5f7 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -417,6 +417,8 @@ enum {
TCA_FLOWER_KEY_TCP_DST, /* be16 */
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
TCA_FLOWER_KEY_UDP_DST, /* be16 */
+
+   TCA_FLOWER_FLAGS,
__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b0212..25d8766 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -165,6 +165,51 @@ static void fl_destroy_filter(struct rcu_head *head)
kfree(f);
 }
 
+static void fl_hw_destroy_filter(struct tcf_proto *tp, u64 cookie)
+{
+   struct net_device *dev = tp->q->dev_queue->dev;
+   struct tc_cls_flower_offload offload = {0};
+   struct tc_to_netdev tc;
+
+   if (!tc_should_offload(dev, 0))
+   return;
+
+   offload.command = TC_CLSFLOWER_DESTROY;
+   offload.cookie = cookie;
+
+   tc.type = TC_SETUP_CLSFLOWER;
+   tc.cls_flower = &offload;
+
+   dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
+static void fl_hw_replace_filter(struct tcf_proto *tp,
+struct flow_dissector *dissector,
+struct fl_flow_key *mask,
+struct fl_flow_key *key,
+struct tcf_exts *actions,
+u64 cookie, u32 flags)
+{
+   struct net_device *dev = tp->q->dev_queue->dev;
+   struct tc_cls_flower_offload offload = {0};
+   struct tc_to_netdev tc;
+
+   if (!tc_should_offload(dev, flags))
+   return;
+
+   offload.command = TC_CLSFLOWER_REPLACE;
+   offload.cookie = cookie;
+   offload.dissector = dissector;
+   offload.mask = mask;
+   offload.key = key;
+   offload.exts = actions;
+
+   tc.type = TC_SETUP_CLSFLOWER;
+   tc.cls_flower = &offload;
+
+   dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
 static bool fl_destroy(struct tcf_proto *tp, bool force)
 {
struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -174,6 +219,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
return false;
 
list_for_each_entry_safe(f, next, &head->filters, list) {
+   fl_hw_destroy_filter(tp, (u64)f);
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
@@ -459,6 +505,7 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
struct cls_fl_filter *fnew;
struct nlattr *tb[TCA_FLOWER_MAX + 1];
struct fl_flow_mask mask = {};
+   u32 flags = 0;
int err;
 
if (!tca[TCA_OPTIONS])
@@ -486,6 +533,9 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
}
fnew->handle = ha

[PATCH net-next V3 08/10] net/mlx5e: Introduce tc offload support

2016-03-08 Thread Amir Vadai
Extend ndo_setup_tc() to support ingress tc offloading. Will be used by
later patches to offload tc flower filter.

Feature is off by default and could be enabled by issuing:
 # ethtool  -K eth0 hw-tc-offload on

Offloads flow table is dynamically created when first filter is
added.
Rules are saved in a hash table that is maintained by the consumer (for
example - the flower offload in the next patch).
When last filter is removed and no filters exist in the hash table, the
offload flow table is destroyed.

Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   9 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  38 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 131 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  44 
 5 files changed, 222 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 11b592d..4fc45ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,6 +6,6 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
-   en_txrx.o en_clock.o vxlan.o
+   en_txrx.o en_clock.o vxlan.o en_tc.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9c0e80e..36f3dba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "wq.h"
 #include "mlx5_core.h"
 
@@ -526,8 +527,16 @@ struct mlx5e_flow_table {
struct mlx5_flow_group  **g;
 };
 
+struct mlx5e_tc_flow_table {
+   struct mlx5_flow_table  *t;
+
+   struct rhashtable_paramsht_params;
+   struct rhashtable   ht;
+};
+
 struct mlx5e_flow_tables {
struct mlx5_flow_namespace  *ns;
+   struct mlx5e_tc_flow_table  tc;
struct mlx5e_flow_table vlan;
struct mlx5e_flow_table main;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5e3692f..011c4f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -30,9 +30,12 @@
  * SOFTWARE.
  */
 
+#include 
+#include 
 #include 
 #include 
 #include "en.h"
+#include "en_tc.h"
 #include "eswitch.h"
 #include "vxlan.h"
 
@@ -1883,6 +1886,17 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 
tc)
 static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
  __be16 proto, struct tc_to_netdev *tc)
 {
+   struct mlx5e_priv *priv = netdev_priv(dev);
+
+   if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+   goto mqprio;
+
+   switch (tc->type) {
+   default:
+   return -EOPNOTSUPP;
+   }
+
+mqprio:
if (tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
 
@@ -1966,6 +1980,13 @@ static int mlx5e_set_features(struct net_device *netdev,
mlx5e_disable_vlan_filter(priv);
}
 
+   if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
+   mlx5e_tc_num_filters(priv)) {
+   netdev_err(netdev,
+  "Active offloaded tc filters, can't turn 
hw_tc_offload off\n");
+   return -EINVAL;
+   }
+
return err;
 }
 
@@ -2365,6 +2386,13 @@ static void mlx5e_build_netdev(struct net_device *netdev)
if (!priv->params.lro_en)
netdev->features  &= ~NETIF_F_LRO;
 
+#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+   if (FT_CAP(flow_modify_en) &&
+   FT_CAP(modify_root) &&
+   FT_CAP(identified_miss_table_mode) &&
+   FT_CAP(flow_table_modify))
+   priv->netdev->hw_features  |= NETIF_F_HW_TC;
+
netdev->features |= NETIF_F_HIGHDMA;
 
netdev->priv_flags   |= IFF_UNICAST_FLT;
@@ -2486,6 +2514,10 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev 
*mdev)
 
mlx5e_vxlan_init(priv);
 
+   err = mlx5e_tc_init(priv);
+   if (err)
+   goto err_destroy_flow_tables;
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_ieee_setets_core(pri

Best userspace tool to set maxrate for an interface

2016-03-10 Thread Amir Vadai
Hi John,

I would like to have a standard userspace tool to set the max ratelimit
of an interface.
If you remember, we added [1] a dcb netlink attribute
(DCB_ATTR_IEEE_MAXRATE) under DCB_CMD_IEEE_SET.

The natural place for it is in lldpad/dcbtool. The maxrate setting is
not part of the 8021Qaz spec, therefore it is a bit different from the
other ieee attributes. So I'm not sure it is a good place.

Where do you think it should be added?

Thanks,
Amir

[1] - 08f10af ("net/dcb: Add an optional max rate attribute")


[PATCH net-next] net/flower: Fix pointer cast

2016-03-11 Thread Amir Vadai
Cast pointer to unsigned long instead of u64, to fix compilation warning
on 32 bit arch, spotted by 0day build.

Fixes: 5b33f48 ("net/flower: Introduce hardware offload support")
Signed-off-by: Amir Vadai 
---
 include/net/pkt_cls.h  |  2 +-
 net/sched/cls_flower.c | 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 5b4e8f0..caa5e18 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -416,7 +416,7 @@ enum tc_fl_command {
 
 struct tc_cls_flower_offload {
enum tc_fl_command command;
-   u64 cookie;
+   unsigned long cookie;
struct flow_dissector *dissector;
struct fl_flow_key *mask;
struct fl_flow_key *key;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 25d8766..2181ffc 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -165,7 +165,7 @@ static void fl_destroy_filter(struct rcu_head *head)
kfree(f);
 }
 
-static void fl_hw_destroy_filter(struct tcf_proto *tp, u64 cookie)
+static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
 {
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
@@ -188,7 +188,7 @@ static void fl_hw_replace_filter(struct tcf_proto *tp,
 struct fl_flow_key *mask,
 struct fl_flow_key *key,
 struct tcf_exts *actions,
-u64 cookie, u32 flags)
+unsigned long cookie, u32 flags)
 {
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
@@ -219,7 +219,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
return false;
 
list_for_each_entry_safe(f, next, &head->filters, list) {
-   fl_hw_destroy_filter(tp, (u64)f);
+   fl_hw_destroy_filter(tp, (unsigned long)f);
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
@@ -554,13 +554,13 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
 &mask.key,
 &fnew->key,
 &fnew->exts,
-(u64)fnew,
+(unsigned long)fnew,
 flags);
 
if (fold) {
rhashtable_remove_fast(&head->ht, &fold->ht_node,
   head->ht_params);
-   fl_hw_destroy_filter(tp, (u64)fold);
+   fl_hw_destroy_filter(tp, (unsigned long)fold);
}
 
*arg = (unsigned long) fnew;
@@ -588,7 +588,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long 
arg)
rhashtable_remove_fast(&head->ht, &f->ht_node,
   head->ht_params);
list_del_rcu(&f->list);
-   fl_hw_destroy_filter(tp, (u64)f);
+   fl_hw_destroy_filter(tp, (unsigned long)f);
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, fl_destroy_filter);
return 0;
-- 
2.7.0



[PATCH iproute2] tc: flower: Add skip_{hw|sw} support

2016-07-04 Thread Amir Vadai
From: Amir Vadai 

On devices that support TC flower offloads, these flags enable a filter to be
added only to HW or only to SW. skip_sw and skip_hw are mutually exclusive
flags. By default without any flags, the filter is added to both HW and SW,
but no error checks are done in case of failure to add to HW.
With skip-sw, failure to add to HW is treated as an error.

Here is a sample script that adds 2 filters, one with skip_sw and the other
with skip_hw flag.

   # add ingress qdisc
   tc qdisc add dev enp0s9 ingress

   # enable hw tc offload.
   ethtool -K enp0s9 hw-tc-offload on

   # add a flower filter with skip-sw flag.
   tc filter add dev enp0s9 protocol ip parent : flower \
   ip_proto 1 indev enp0s9 skip_sw \
   action drop

   # add a flower filter with skip-hw flag.
   tc filter add dev enp0s9 protocol ip parent : flower \
   ip_proto 3 indev enp0s9 skip_hw \
   action drop

Signed-off-by: Amir Vadai 
---
 man/man8/tc-flower.8 | 11 ++-
 tc/f_flower.c| 17 +
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8
index df4d8e1..9ae10e6 100644
--- a/man/man8/tc-flower.8
+++ b/man/man8/tc-flower.8
@@ -18,7 +18,9 @@ flower \- flow based traffic control filter
 .ti -8
 .IR MATCH " := { "
 .B indev
-.IR ifname " | { "
+.IR ifname " | "
+.BR skip_sw " | " skip_hw
+.R " | { "
 .BR dst_mac " | " src_mac " } "
 .IR mac_address " | "
 .BR eth_type " { " ipv4 " | " ipv6 " | "
@@ -55,6 +57,13 @@ is the name of an interface which must exist at the time of
 .B tc
 invocation.
 .TP
+.BI skip_sw
+Do not process filter by software. If hardware has no offload support for this
+filter, or TC offload is not enabled for the interface, operation will fail.
+.TP
+.BI skip_hw
+Do not process filter by hardware.
+.TP
 .BI dst_mac " mac_address"
 .TQ
 .BI src_mac " mac_address"
diff --git a/tc/f_flower.c b/tc/f_flower.c
index fd2014b..7b46ceb 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -25,6 +25,7 @@
 static void explain(void)
 {
fprintf(stderr, "Usage: ... flower [ MATCH-LIST ]\n");
+   fprintf(stderr, "  [ skip_sw | skip_hw ]\n");
fprintf(stderr, "  [ action ACTION-SPEC ] [ classid 
CLASSID ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where: MATCH-LIST := [ MATCH-LIST ] MATCH\n");
@@ -167,6 +168,7 @@ static int flower_parse_opt(struct filter_util *qu, char 
*handle,
struct rtattr *tail;
__be16 eth_type = TC_H_MIN(t->tcm_info);
__u8 ip_proto = 0xff;
+   __u32 flags = 0;
 
if (handle) {
ret = get_u32(&t->tcm_handle, handle, 0);
@@ -196,6 +198,10 @@ static int flower_parse_opt(struct filter_util *qu, char 
*handle,
return -1;
}
addattr_l(n, MAX_MSG, TCA_FLOWER_CLASSID, &handle, 4);
+   } else if (matches(*argv, "skip_hw") == 0) {
+   flags |= TCA_CLS_FLAGS_SKIP_HW;
+   } else if (matches(*argv, "skip_sw") == 0) {
+   flags |= TCA_CLS_FLAGS_SKIP_SW;
} else if (matches(*argv, "indev") == 0) {
char ifname[IFNAMSIZ];
 
@@ -294,6 +300,8 @@ static int flower_parse_opt(struct filter_util *qu, char 
*handle,
}
 
 parse_done:
+   addattr32(n, MAX_MSG, TCA_FLOWER_FLAGS, flags);
+
ret = addattr16(n, MAX_MSG, TCA_FLOWER_KEY_ETH_TYPE, eth_type);
if (ret) {
fprintf(stderr, "Illegal \"eth_type\"(0x%x)\n",
@@ -498,6 +506,15 @@ static int flower_print_opt(struct filter_util *qu, FILE 
*f,
  tb[TCA_FLOWER_KEY_TCP_SRC],
  tb[TCA_FLOWER_KEY_UDP_SRC]);
 
+   if (tb[TCA_FLOWER_FLAGS])  {
+   __u32 flags = rta_getattr_u32(tb[TCA_FLOWER_FLAGS]);
+
+   if (flags & TCA_CLS_FLAGS_SKIP_HW)
+   fprintf(f, "\n  skip_hw");
+   if (flags & TCA_CLS_FLAGS_SKIP_SW)
+   fprintf(f, "\n  skip_sw");
+   }
+
if (tb[TCA_FLOWER_ACT]) {
tc_print_action(f, tb[TCA_FLOWER_ACT]);
}
-- 
2.9.0



[RFC net-next 1/9] net/flow_dissector: Make dissector_uses_key() and skb_flow_dissector_target() public

2016-02-01 Thread Amir Vadai
Will be used in a following patch.

Signed-off-by: Amir Vadai 
---
 include/net/flow_dissector.h | 13 +
 net/core/flow_dissector.c| 13 -
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8c8548c..d3d60dc 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -184,4 +184,17 @@ static inline bool flow_keys_have_l4(struct flow_keys 
*keys)
 
 u32 flow_hash_from_keys(struct flow_keys *keys);
 
+static inline bool dissector_uses_key(const struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+   return flow_dissector->used_keys & (1 << key_id);
+}
+
+static inline void *skb_flow_dissector_target(struct flow_dissector 
*flow_dissector,
+ enum flow_dissector_key_id key_id,
+ void *target_container)
+{
+   return ((char *)target_container) + flow_dissector->offset[key_id];
+}
+
 #endif
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d79699c..db0aa1c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,25 +19,12 @@
 #include 
 #include 
 
-static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id)
-{
-   return flow_dissector->used_keys & (1 << key_id);
-}
-
 static void dissector_set_key(struct flow_dissector *flow_dissector,
  enum flow_dissector_key_id key_id)
 {
flow_dissector->used_keys |= (1 << key_id);
 }
 
-static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
-  enum flow_dissector_key_id key_id,
-  void *target_container)
-{
-   return ((char *) target_container) + flow_dissector->offset[key_id];
-}
-
 void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 const struct flow_dissector_key *key,
 unsigned int key_count)
-- 
2.7.0



[RFC net-next 0/9] TC filter HW offloads

2016-02-01 Thread Amir Vadai
Hi,

So... just before sending that, I noted Jonh's series that
deals with tc and u32. One notable difference between the 
two approaches is that here we "normalize" the upper layer
way of describing matching and actions into a generic structure
(flow dissector, etc), which should allow to use offload different
potential consumer tools (TC flower, TC u32 subset), netfilter, etc).
Another difference is with this series uses the switchdev
framework which would allow using the proposed HW offloading
mechanisms for physical and SRIOV embedded switches too that
make use of switchdev.

This patchset introduces an infrastructure to offload matching of flows and
some basic actions to hardware, currenrtly using iproute2 / tc tool.

In this patchset, the classification is described using the flower filter, and
the supported actions are drop (using gact) and mark (using skbedit).

Flow classifcation is described using a flow dissector that is built by 
the tc filter. The filter also calls the actions to be serialized into the new
structure - switchdev_obj_port_flow_act.

The flow dissector and the serialized actions are passed using switchdev ops to
the HW driver, which parse it to hardware commands. We propose to use the
kernel flow-dissector to describe flows/ACLs in the switchdev framework which
by itself could be also used for HW offloading of other kernel networking
components.

An implementation for the above is provided using mlx5 driver and Mellanox 
ConnectX4 HW.

Some issues that will be addressed before making the final submission:
1. 'offload' should be a generic filter attribute and not flower filter
   specific.
2. Serialization of actions will be changed into a list instead of one big
   structure to describe all actions.

Few more matters to discuss 

1. Should HW offloading be done only under explicit admin directive?

2. switchdev is used today for physical switch HW and on an upcoming proposal
for SRIOV e-switch vport representors too. Here, we're doing that with a NIC, 
that can potentially serve as an uplink port for v-switch (e.g under 
Para-Virtual 
scheme).

Sample usage of the feature:

export TC=../iproute2/tc/tc
export ETH=ens9

ifconfig ens9 11.11.11.11/24 up

# add an ingress qdisc
$TC qdisc add dev $ETH ingress

# Drop ICMP (ip_proto 1) packets
$TC filter add dev $ETH protocol ip prio 20 parent : \
flower eth_type ip ip_proto 1 \
indev $ETH offload \
action drop

# Mark (with 0x1234) TCP (ip_proto 6) packets
$TC filter add dev $ETH protocol ip prio 30 parent : \
flower eth_type ip ip_proto 6 \
indev $ETH offload \
action skbedit mark 0x1234

# A NOP filter for packets that are marked (0x1234)
$TC filter add dev $ETH protocol ip prio 10 parent : \
handle 0x1234 fw action pass

# See that pings are blocked
# See that ssh is working (=TCP traffic)

# See NOP filter counters. If >0, HW marked and NOP filter catched it
$TC -s filter show dev $ETH parent :

This patchset depends on a small fix [1] that is currently under review in the
mailing list.  It was applied and tested on net-next commit 7a26019
("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")

[1] Depends on "net/mlx5_core: Set flow steering dest only for forward rules"
- http://patchwork.ozlabs.org/patch/574055/   

Thanks,
Amir

Amir Vadai (9):
  net/flow_dissector: Make dissector_uses_key() and
skb_flow_dissector_target() public
  net/switchdev: Introduce hardware offload support
  net/act: Offload support by tc actions
  net/act_skbedit: Introduce hardware offload support
  net/act_gact: Introduce hardware offload support for drop
  net/cls_flower: Introduce hardware offloading
  net/mlx5_core: Go to next flow table support
  net/mlx5e: Introduce MLX5_FLOW_NAMESPACE_OFFLOADS
  net/mlx5e: Flow steering support through switchdev

 drivers/net/ethernet/mellanox/mlx5/core/Kconfig|   7 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  10 +
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|  10 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c|   2 +
 .../net/ethernet/mellanox/mlx5/core/en_switchdev.c | 475 +
 .../net/ethernet/mellanox/mlx5/core/en_switchdev.h |  60 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  26 ++
 include/linux/mlx5/fs.h|   1 +
 include/net/act_api.h  |   3 +
 include/net/flow_dissector.h   |  13 +
 include/net/pkt_cls.h  |   2 +
 include/net/switchdev.h|  46 ++
 include/uapi/linux/pkt_cls.h   |   1 +
 net/core/flow_dissector.c  |  13 -
 net/sched/act_gact.c  

[RFC net-next 2/9] net/switchdev: Introduce hardware offload support

2016-02-01 Thread Amir Vadai
Extend the switchdev API with new operations: switchdev_port_flow_add()
and switchdev_port_flow_del().
It allows the user to add/del a hardware offloaded flow classification
and actions.
For every new flow object a cookie is supplied. This cookie will be
used later on to identify the flow when removed.

In order to make the API as flexible as possible, flow_dissector is
being used to describe the flow classifier.

Every new flow object is consists of a flow_dissector+key+mask to
describe the classifier and a switchdev_obj_port_flow_act to describe
the actions and their attributes.

object is passed to the lower layer driver to be pushed into the
hardware.

Signed-off-by: Amir Vadai 
---
 include/net/switchdev.h   | 46 ++
 net/switchdev/switchdev.c | 33 +
 2 files changed, 79 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d451122..c5a5681 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define SWITCHDEV_F_NO_RECURSE BIT(0)
 #define SWITCHDEV_F_SKIP_EOPNOTSUPPBIT(1)
@@ -69,6 +70,7 @@ enum switchdev_obj_id {
SWITCHDEV_OBJ_ID_IPV4_FIB,
SWITCHDEV_OBJ_ID_PORT_FDB,
SWITCHDEV_OBJ_ID_PORT_MDB,
+   SWITCHDEV_OBJ_ID_PORT_FLOW,
 };
 
 struct switchdev_obj {
@@ -124,6 +126,30 @@ struct switchdev_obj_port_mdb {
 #define SWITCHDEV_OBJ_PORT_MDB(obj) \
container_of(obj, struct switchdev_obj_port_mdb, obj)
 
+/* SWITCHDEV_OBJ_ID_PORT_FLOW */
+enum switchdev_obj_port_flow_action {
+   SWITCHDEV_OBJ_PORT_FLOW_ACT_DROP = 0,
+   SWITCHDEV_OBJ_PORT_FLOW_ACT_MARK = 1,
+};
+
+struct switchdev_obj_port_flow_act {
+   u32 actions; /* Bitmap of requested actions */
+   u32 mark; /* Value for mark action - if requested */
+};
+
+struct switchdev_obj_port_flow {
+   struct switchdev_obj obj;
+
+   unsigned long cookie;
+   struct flow_dissector *dissector; /* Dissector for mask and keys */
+   void *mask; /* Flow keys mask */
+   void *key;  /* Flow keys */
+   struct switchdev_obj_port_flow_act *actions;
+};
+
+#define SWITCHDEV_OBJ_PORT_FLOW(obj) \
+   container_of(obj, struct switchdev_obj_port_flow, obj)
+
 void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
  void *data, void (*destructor)(void const *),
  struct switchdev_trans_item *tritem);
@@ -223,6 +249,12 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 struct net_device *group_dev,
 bool joining);
 
+int switchdev_port_flow_add(struct net_device *dev,
+   struct flow_dissector *dissector,
+   void *mask, void *key,
+   struct switchdev_obj_port_flow_act *actions,
+   unsigned long cookie);
+int switchdev_port_flow_del(struct net_device *dev, unsigned long cookie);
 #else
 
 static inline void switchdev_deferred_process(void)
@@ -347,6 +379,20 @@ static inline void switchdev_port_fwd_mark_set(struct 
net_device *dev,
 {
 }
 
+static inline int switchdev_port_flow_add(struct net_device *dev,
+ struct flow_dissector *dissector,
+ void *mask, void *key,
+ struct switchdev_obj_port_flow_act 
*actions,
+ unsigned long cookie)
+{
+   return -EOPNOTSUPP;
+}
+
+static inline int switchdev_port_flow_del(struct net_device *dev,
+ unsigned long cookie)
+{
+   return -EOPNOTSUPP;
+}
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index ebc661d..67b4678 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1383,3 +1383,36 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
dev->offload_fwd_mark = mark;
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
+
+/* Must not be deferred, since deffering does shallow copy, which will not
+ * copy mask and key content
+ */
+int switchdev_port_flow_add(struct net_device *dev,
+   struct flow_dissector *dissector,
+   void *mask, void *key,
+   struct switchdev_obj_port_flow_act *actions,
+   unsigned long cookie)
+{
+   struct switchdev_obj_port_flow flow = {
+   .obj.id = SWITCHDEV_OBJ_ID_PORT_FLOW,
+   .cookie = cookie,
+   .dissector = dissector,
+   .mask = mask,
+   .key = key,
+   .actions = actions,
+   };
+
+   return switchdev_port_obj_add(dev, &flow.obj);
+}
+EXPORT_SYMBOL_GPL(switchdev_port_flow_add);

[RFC net-next 3/9] net/act: Offload support by tc actions

2016-02-01 Thread Amir Vadai
In order to support hardware offloading, an action should implment the
new offload_init() callback.
During filter initialization, offload_init() will be called to add
the action description to the actions object that will be used by the
filter to configure the hardware.

Signed-off-by: Amir Vadai 
---
 include/net/act_api.h |  3 +++
 include/net/pkt_cls.h |  2 ++
 net/sched/cls_api.c   | 27 +++
 3 files changed, 32 insertions(+)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 9d446f13..fcabe93 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -7,6 +7,7 @@
 
 #include 
 #include 
+#include 
 
 struct tcf_common {
struct hlist_node   tcfc_head;
@@ -108,6 +109,8 @@ struct tc_action_ops {
struct nlattr *est, struct tc_action *act, int ovr,
int bind);
int (*walk)(struct sk_buff *, struct netlink_callback *, int, 
struct tc_action *);
+   int (*offload_init)(struct tc_action *,
+   struct switchdev_obj_port_flow_act *);
 };
 
 int tcf_hash_search(struct tc_action *a, u32 index);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bc49967..7eb8ee9 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -130,6 +130,8 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
return 0;
 }
 
+int tcf_exts_offload_init(struct tcf_exts *e,
+ struct switchdev_obj_port_flow_act *actions);
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
  struct nlattr **tb, struct nlattr *rate_tlv,
  struct tcf_exts *exts, bool ovr);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a75864d..d675c31 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* The list of all installed classifier types */
 static LIST_HEAD(tcf_proto_base);
@@ -551,6 +552,32 @@ int tcf_exts_validate(struct net *net, struct tcf_proto 
*tp, struct nlattr **tb,
 }
 EXPORT_SYMBOL(tcf_exts_validate);
 
+int tcf_exts_offload_init(struct tcf_exts *e,
+ struct switchdev_obj_port_flow_act *actions)
+{
+#ifdef CONFIG_NET_CLS_ACT
+   struct tc_action *act;
+   int err = 0;
+
+   list_for_each_entry(act, &e->actions, list) {
+   if (!act->ops->offload_init) {
+   pr_err("Action %s doesn't have offload support\n",
+  act->ops->kind);
+   err = -EINVAL;
+   break;
+   }
+   err = act->ops->offload_init(act, actions);
+   if (err)
+   break;
+   }
+
+   return err;
+#else
+   return -EOPNOTSUPP;
+#endif
+}
+EXPORT_SYMBOL(tcf_exts_offload_init);
+
 void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 struct tcf_exts *src)
 {
-- 
2.7.0



[RFC net-next 5/9] net/act_gact: Introduce hardware offload support for drop

2016-02-01 Thread Amir Vadai
Enable hardware offloaded packet dropping when filter is marked with
'offload' attribute.

Signed-off-by: Amir Vadai 
---
 net/sched/act_gact.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 5c1b051..b639b18 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -183,6 +184,21 @@ nla_put_failure:
return -1;
 }
 
+static int tcf_gact_offload_init(struct tc_action *a,
+struct switchdev_obj_port_flow_act *obj)
+{
+   struct tcf_gact *gact = a->priv;
+
+   if (gact->tcf_action == TC_ACT_SHOT) {
+   obj->actions |= BIT(SWITCHDEV_OBJ_PORT_FLOW_ACT_DROP);
+
+   return 0;
+   }
+
+   pr_err("Only 'drop' is supported for offloaded gact\n");
+   return -ENOTSUPP;
+}
+
 static struct tc_action_ops act_gact_ops = {
.kind   =   "gact",
.type   =   TCA_ACT_GACT,
@@ -190,6 +206,7 @@ static struct tc_action_ops act_gact_ops = {
.act=   tcf_gact,
.dump   =   tcf_gact_dump,
.init   =   tcf_gact_init,
+   .offload_init   =   tcf_gact_offload_init,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
-- 
2.7.0



[RFC net-next 6/9] net/cls_flower: Introduce hardware offloading

2016-02-01 Thread Amir Vadai
During initialization, tcf_exts_offload_init() is called to initialize
the list of actions description. later on, the classifier description
is prepared and sent to the switchdev using switchdev_port_flow_add().

When offloaded, fl_classify() is a NOP - already done in hardware.

Signed-off-by: Amir Vadai 
---
 include/uapi/linux/pkt_cls.h |  1 +
 net/sched/cls_flower.c   | 54 ++--
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 4398737..c18e82d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -416,6 +416,7 @@ enum {
TCA_FLOWER_KEY_TCP_DST, /* be16 */
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
TCA_FLOWER_KEY_UDP_DST, /* be16 */
+   TCA_FLOWER_OFFLOAD, /* flag */
__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b0212..e36d408 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct fl_flow_key {
int indev_ifindex;
@@ -56,6 +57,7 @@ struct cls_fl_head {
struct list_head filters;
struct rhashtable_params ht_params;
struct rcu_head rcu;
+   bool offload;
 };
 
 struct cls_fl_filter {
@@ -67,6 +69,7 @@ struct cls_fl_filter {
struct list_head list;
u32 handle;
struct rcu_head rcu;
+   struct net_device *indev;
 };
 
 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
@@ -123,6 +126,9 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
 
+   if (head->offload)
+   return -1;
+
fl_clear_masked_range(&skb_key, &head->mask);
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
@@ -174,6 +180,9 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
return false;
 
list_for_each_entry_safe(f, next, &head->filters, list) {
+   if (head->offload)
+   switchdev_port_flow_del(f->indev, (unsigned long)f);
+
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
@@ -396,9 +405,11 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
 }
 
 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
+   struct cls_fl_head *head,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
-   struct nlattr *est, bool ovr)
+   struct nlattr *est, bool ovr,
+   struct switchdev_obj_port_flow_act *actions)
 {
struct tcf_exts e;
int err;
@@ -413,6 +424,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto 
*tp,
tcf_bind_filter(tp, &f->res, base);
}
 
+   head->offload = nla_get_flag(tb[TCA_FLOWER_OFFLOAD]);
+
err = fl_set_key(net, tb, &f->key, &mask->key);
if (err)
goto errout;
@@ -420,6 +433,24 @@ static int fl_set_parms(struct net *net, struct tcf_proto 
*tp,
fl_mask_update_range(mask);
fl_set_masked_key(&f->mkey, &f->key, mask);
 
+   if (head->offload) {
+   if (!f->key.indev_ifindex) {
+   pr_err("indev must be set when using offloaded 
filter\n");
+   err = -EINVAL;
+   goto errout;
+   }
+
+   f->indev = __dev_get_by_index(net, f->key.indev_ifindex);
+   if (!f->indev) {
+   err = -EINVAL;
+   goto errout;
+   }
+
+   err = tcf_exts_offload_init(&e, actions);
+   if (err)
+   goto errout;
+   }
+
tcf_exts_change(tp, &f->exts, &e);
 
return 0;
@@ -459,6 +490,7 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
struct cls_fl_filter *fnew;
struct nlattr *tb[TCA_FLOWER_MAX + 1];
struct fl_flow_mask mask = {};
+   struct switchdev_obj_port_flow_act actions = {};
int err;
 
if (!tca[TCA_OPTIONS])
@@ -486,7 +518,8 @@ static int fl_change(struct net *net, struct sk_buff 
*in_skb,
}
fnew->handle = handle;
 
-   err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+   err = fl_set_parms(net, tp, head, fnew, &mask, base, tb,
+  tca[TCA_RATE], ovr, &actions);
if (err)
goto errout;
 
@@ -494,6 +527,17 @@ static int fl_change(s

[RFC net-next 4/9] net/act_skbedit: Introduce hardware offload support

2016-02-01 Thread Amir Vadai
Currently only 'mark' operation is supported when hardware offload is
requested.

Signed-off-by: Amir Vadai 
---
 net/sched/act_skbedit.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6751b5f..3113dfc 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -173,6 +174,22 @@ nla_put_failure:
return -1;
 }
 
+static int tcf_skbedit_offload_init(struct tc_action *a,
+   struct switchdev_obj_port_flow_act *obj)
+{
+   struct tcf_skbedit *d = a->priv;
+
+   if (d->flags == SKBEDIT_F_MARK) {
+   obj->actions |= BIT(SWITCHDEV_OBJ_PORT_FLOW_ACT_MARK);
+   obj->mark = d->mark;
+
+   return 0;
+   }
+
+   pr_err("Only 'mark' is supported for offloaded skbedit\n");
+   return -ENOTSUPP;
+}
+
 static struct tc_action_ops act_skbedit_ops = {
.kind   =   "skbedit",
.type   =   TCA_ACT_SKBEDIT,
@@ -180,6 +197,7 @@ static struct tc_action_ops act_skbedit_ops = {
.act=   tcf_skbedit,
.dump   =   tcf_skbedit_dump,
.init   =   tcf_skbedit_init,
+   .offload_init   =   tcf_skbedit_offload_init,
 };
 
 MODULE_AUTHOR("Alexander Duyck, ");
-- 
2.7.0



[RFC net-next 7/9] net/mlx5_core: Go to next flow table support

2016-02-01 Thread Amir Vadai
When destination is NULL, continue processing packet in the following
table.
Will be used by the offloads table, to process the traffic before any
other table (without it knowing who is the next table)

Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6f68dba..fb3717a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -993,9 +993,27 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
   u32 flow_tag,
   struct mlx5_flow_destination *dest)
 {
+   struct mlx5_flow_destination *my_dest = NULL;
struct mlx5_flow_group *g;
struct mlx5_flow_rule *rule;
 
+   if (!dest) {
+   struct mlx5_flow_table *next_ft;
+   struct fs_prio *prio;
+
+   fs_get_obj(prio, ft->node.parent);
+   next_ft = find_next_chained_ft(prio);
+   if (!next_ft) {
+   pr_warn("There is no next flow table\n");
+   return ERR_PTR(-EINVAL);
+   }
+   my_dest = kzalloc(sizeof(*my_dest), GFP_KERNEL);
+   if (!my_dest)
+   return ERR_PTR(-ENOMEM);
+   my_dest->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+   my_dest->ft = next_ft;
+   dest = my_dest;
+   }
nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
fs_for_each_fg(g, ft)
if (compare_match_criteria(g->mask.match_criteria_enable,
@@ -1012,6 +1030,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
   match_value, action, flow_tag, dest);
 unlock:
unlock_ref_node(&ft->node);
+   kfree(my_dest);
return rule;
 }
 EXPORT_SYMBOL(mlx5_add_flow_rule);
-- 
2.7.0



[RFC net-next 9/9] net/mlx5e: Flow steering support through switchdev

2016-02-01 Thread Amir Vadai
Parse switchdev flow object into device specific commands and program
the hardware to classify and mark/drop the flow accordingly.

A new Kconfig is introduced: MLX5_EN_SWITCHDEV. This config enables to
compile the driver when switchdev is not compiled.

Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig|   7 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  10 +
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|  10 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c|   2 +
 .../net/ethernet/mellanox/mlx5/core/en_switchdev.c | 475 +
 .../net/ethernet/mellanox/mlx5/core/en_switchdev.h |  60 +++
 8 files changed, 568 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_switchdev.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_switchdev.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig 
b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index c503ea0..61a9eed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -19,3 +19,10 @@ config MLX5_CORE_EN
  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
  Ethernet and Infiniband support in ConnectX-4 are currently mutually
  exclusive.
+
+config MLX5_EN_SWITCHDEV
+   bool "MLX5 EN switchdev support"
+   depends on MLX5_CORE_EN && NET_SWITCHDEV
+   default y
+   ---help---
+ Switchdev support in Mellanox Technologies ConnectX-4 NIC.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 01c0256..b80143e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -3,6 +3,9 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o   \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o
+
+mlx5_core-$(CONFIG_MLX5_EN_SWITCHDEV) += en_switchdev.o
+
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
en_txrx.o en_clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9ea49a8..e61a67c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -39,6 +39,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include "wq.h"
 #include "transobj.h"
 #include "mlx5_core.h"
@@ -497,8 +499,16 @@ struct mlx5e_flow_table {
struct mlx5_flow_group  **g;
 };
 
+struct mlx5e_offloads_flow_table {
+   struct mlx5_flow_table  *t;
+
+   struct rhashtable_paramsht_params;
+   struct rhashtable   ht;
+};
+
 struct mlx5e_flow_tables {
struct mlx5_flow_namespace  *ns;
+   struct mlx5e_offloads_flow_table  offloads;
struct mlx5e_flow_table vlan;
struct mlx5e_flow_table main;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 80d81ab..0fbe45c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include "en.h"
+#include "en_switchdev.h"
 
 #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
 
@@ -1202,12 +1203,18 @@ int mlx5e_create_flow_tables(struct mlx5e_priv *priv)
if (err)
goto err_destroy_vlan_flow_table;
 
-   err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+   err = mlx5e_create_offloads_flow_table(priv);
if (err)
goto err_destroy_main_flow_table;
 
+   err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+   if (err)
+   goto err_destroy_offloads_flow_table;
+
return 0;
 
+err_destroy_offloads_flow_table:
+   mlx5e_destroy_offloads_flow_table(priv);
 err_destroy_main_flow_table:
mlx5e_destroy_main_flow_table(priv);
 err_destroy_vlan_flow_table:
@@ -1219,6 +1226,7 @@ err_destroy_vlan_flow_table:
 void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv)
 {
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+   mlx5e_destroy_offloads_flow_table(priv);
mlx5e_destroy_main_flow_table(priv);
mlx5e_destroy_vlan_flow_table(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5c74a73..4bc9243 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_mai

[RFC net-next 8/9] net/mlx5e: Introduce MLX5_FLOW_NAMESPACE_OFFLOADS

2016-02-01 Thread Amir Vadai
A new namespace to be populated with flow steering rules that deal with
offloading rules (matching and/or actions) set for higher level entities
such as the TC subsystem.
This namespace is located after the bypass namespace and before the
kernel.
Therefore, it precedes the HW processing done for rules set for the
kernel NIC name-space.
This would allow to conduct actions such as HW drop or HW setting of
flow tag which will later become skb->mark for packets, before matching
by the kernel name space rules used by the EN NIC.

Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 7 +++
 include/linux/mlx5/fs.h   | 1 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fb3717a..ffe1397 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -77,6 +77,10 @@
 #define KERNEL_NUM_PRIOS 1
 #define KENREL_MIN_LEVEL 2
 
+#define OFFLOADS_MAX_FT 1
+#define OFFLOADS_NUM_PRIOS 1
+#define OFFLOADS_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
+
 struct node_caps {
size_t  arr_sz;
long*caps;
@@ -100,6 +104,8 @@ static struct init_tree_node {
  
FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
  
FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, 
BY_PASS_PRIO_MAX_FT))),
+   ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
+ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, 
OFFLOADS_MAX_FT))),
ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {},
 ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, 
KERNEL_MAX_FT))),
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
@@ -1143,6 +1149,7 @@ struct mlx5_flow_namespace 
*mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 
switch (type) {
case MLX5_FLOW_NAMESPACE_BYPASS:
+   case MLX5_FLOW_NAMESPACE_OFFLOADS:
case MLX5_FLOW_NAMESPACE_KERNEL:
case MLX5_FLOW_NAMESPACE_LEFTOVERS:
prio = type;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 8230caa..40e79e2 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -50,6 +50,7 @@ static inline void build_leftovers_ft_param(int *priority,
 
 enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_BYPASS,
+   MLX5_FLOW_NAMESPACE_OFFLOADS,
MLX5_FLOW_NAMESPACE_KERNEL,
MLX5_FLOW_NAMESPACE_LEFTOVERS,
MLX5_FLOW_NAMESPACE_FDB,
-- 
2.7.0



Re: [RFC net-next 6/9] net/cls_flower: Introduce hardware offloading

2016-02-01 Thread Amir Vadai
On Mon, Feb 01, 2016 at 01:31:17AM -0800, John Fastabend wrote:
> On 16-02-01 12:34 AM, Amir Vadai wrote:
> > During initialization, tcf_exts_offload_init() is called to initialize
> > the list of actions description. later on, the classifier description
> > is prepared and sent to the switchdev using switchdev_port_flow_add().
> > 
> > When offloaded, fl_classify() is a NOP - already done in hardware.
> > 
> > Signed-off-by: Amir Vadai 
> > ---
> 
> You need to account for where the classifier is being loaded
> by passing the handle as I did in my patch set. Otherwise you may
> be offloading on egress/ingress or even some qdisc multiple layers
> down in the hierarchy.
Right. Will fix it.

> 
> .John
> 


Re: [RFC net-next 0/9] TC filter HW offloads

2016-02-01 Thread Amir Vadai
On Mon, Feb 01, 2016 at 01:21:36AM -0800, John Fastabend wrote:
> On 16-02-01 12:34 AM, Amir Vadai wrote:
> > Hi,
> > 
> > So... just before sending that, I noted Jonh's series that
> > deals with tc and u32. One notable difference between the 
> > two approaches is that here we "normalize" the upper layer
> > way of describing matching and actions into a generic structure
> > (flow dissector, etc), which should allow to use offload different
> > potential consumer tools (TC flower, TC u32 subset), netfilter, etc).
> 
> Except its not really normalizing anything in this patchset
> right? For a "real" normalizing I would expect the netdev
> needs to advertise its parse graph and headers in a protocol
> oblivious way, along with the table setup and this middle
> layer needs to map the general software side onto the hardware
> side. I tried this and I came to the conclusion I would just
> push rules down at the hardware at least for now until I get
> enough hardware implementations to see if there really is any
> advantage in this sort of generic middle layer. My main concern
> is its slow and table layout, hardware architecture both try
> to fight you when doing this. It can be done I'm just not sure
> its worth it yet.
What I was trying to do, is to find an extensible api to describe the
rules. And yes, like in your design, the device doesn't advertise its
capabilities, only if it is capable to do any offloading. The consumer
pushes the rules and the device return success/fail.

Using u32 filter is nice since it is a very universal classifier (and
you did implement parsing it in a very elegant way), but I'm not sure I
like having in device drivers a specific code for different filters. So,
if another consumer, for example the flower filter or netfilter, would
want to use this api, it will need to speak the u32 language, or have
its own implementation in the device driver?

> 
> Also just as an aside flower can be emulated with u32 which can
> be emulated with bpf, I don't think the structures here are
> generic.
This is why I used flow dissector - because it is a very abstract way to
pass the classifications.
If it is not flexible enough, maybe splitting the current flow
dissector code, into (1) a generic api to describe structures in an
abstract way (the offsets, bitmap, and structs), and (2) the code that
is used to dissect skb's. This way we could express stuff using (1) that
is not related to (2).

> 
> > Another difference is with this series uses the switchdev
> > framework which would allow using the proposed HW offloading
> > mechanisms for physical and SRIOV embedded switches too that
> > make use of switchdev.
> 
> But 'tc' infrastructure is useful even without SRIOV or any
> switching at all. I don't think it needs to go into switchdev.
> Even my vanilla 10G nic can drop/mark pkts coming onto the
> physical functions.
ok, we could work it out - as you suggested in a similar way fdb_add is
doing.

> 
> > 
> > This patchset introduces an infrastructure to offload matching of flows and
> > some basic actions to hardware, currenrtly using iproute2 / tc tool.
> > 
> > In this patchset, the classification is described using the flower filter, 
> > and
> > the supported actions are drop (using gact) and mark (using skbedit).
> > 
> 
> ditto I just didn't show the mark patch set on my side. I also would
> like to get pedit shortly.
> 
> > Flow classifcation is described using a flow dissector that is built by 
> > the tc filter. The filter also calls the actions to be serialized into the 
> > new
> > structure - switchdev_obj_port_flow_act.
> > 
> > The flow dissector and the serialized actions are passed using switchdev 
> > ops to
> > the HW driver, which parse it to hardware commands. We propose to use the
> > kernel flow-dissector to describe flows/ACLs in the switchdev framework 
> > which
> > by itself could be also used for HW offloading of other kernel networking
> > components.
> 
> I'm not sure I like this or at least I don't want to make this the
> exclusive mechanism. I think bpf/u32 are more flexible. In general
> I'm opposed to getting stuck talking about specific protocols I want
> this to be flexible so I don't need a new thing everytime folks add
> a new header/bit/field/etc. If you use flow-dissector to describe
> flows your limiting the hardware. Also I'm sure I'll want to match on
> fields that flow-dissector doesn't care about and really never should
> care about think HTTP for example.
I agree that we need a flexible way to express the classifiers. I'm not
sure that I see

Re: [PATCH net-next] net/mlx5_core: Set log_uar_page_sz for non 4K page size architecture

2015-08-06 Thread Amir Vadai
On 8/5/2015 7:05 PM, cls...@linux.vnet.ibm.com wrote:
> From: Carol L Soto 
> 
> failed to configure the page size for architectures with page size
> different than 4K.
> 
> Signed-off-by: Carol L Soto 
> ---

Please pull this patch into kernel 4.2

Fixes: 938fe83 ("net/mlx5_core: New device capabilities handling")
Acked-by: Amir Vadai 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 0/6] ConnectX-4 driver update 2015-07-23

2015-07-23 Thread Amir Vadai
Hi Dave,

This patchset introduce some performance enhancements to the ConnectX-4 driver.
1. Improving RSS distribution, and make RSS function controlable using ethtool.
2. Make memory that is written by NIC and read by host CPU allocate in the
   local NUMA to the processing CPU
3. Support tx copybreak
4. Using hardware feature called blueflame to save DMA reads when possible

Another patch by Achiad fix some cosmetic issues in the driver.

Patchset was applied and tested on top of commit 045a0fa ("ip_tunnel: Call
ip_tunnel_core_init() from inet_init()")

Thanks,
Amir

Achiad Shochat (4):
  net/mlx5e: Support TX packet copy into WQE
  net/mlx5e: TX latency optimization to save DMA reads
  net/mlx5e: Cosmetics: use BIT() instead of "1 <<", and others
  net/mlx5e: Input IPSEC.SPI into the RX RSS hash function

Saeed Mahameed (2):
  net/mlx5e: Support ETH_RSS_HASH_XOR
  net/mlx5e: Allocate DMA coherent memory on reader NUMA node

 drivers/net/ethernet/mellanox/mlx5/core/alloc.c|  48 +++-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  47 ++--
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  92 
 .../ethernet/mellanox/mlx5/core/en_flow_table.c| 258 ++---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 134 ---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c|  34 ++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  32 ++-
 drivers/net/ethernet/mellanox/mlx5/core/uar.c  |   6 +
 drivers/net/ethernet/mellanox/mlx5/core/wq.c   |  12 +-
 drivers/net/ethernet/mellanox/mlx5/core/wq.h   |   3 +-
 include/linux/mlx5/driver.h|  12 +-
 include/linux/mlx5/mlx5_ifc.h  |   6 +-
 12 files changed, 535 insertions(+), 149 deletions(-)

-- 
2.4.3.413.ga5fe668

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 6/6] net/mlx5e: Input IPSEC.SPI into the RX RSS hash function

2015-07-23 Thread Amir Vadai
From: Achiad Shochat 

In addition to the source/destination IP which are already hashed.
Only for unicast traffic for now.

Signed-off-by: Achiad Shochat 
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  4 +
 .../ethernet/mellanox/mlx5/core/en_flow_table.c| 92 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 32 
 3 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 39294f2..b710e9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -334,6 +334,10 @@ enum mlx5e_traffic_types {
MLX5E_TT_IPV6_TCP,
MLX5E_TT_IPV4_UDP,
MLX5E_TT_IPV6_UDP,
+   MLX5E_TT_IPV4_IPSEC_AH,
+   MLX5E_TT_IPV6_IPSEC_AH,
+   MLX5E_TT_IPV4_IPSEC_ESP,
+   MLX5E_TT_IPV6_IPSEC_ESP,
MLX5E_TT_IPV4,
MLX5E_TT_IPV6,
MLX5E_TT_ANY,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
index cca34f6..70ec31b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
@@ -105,6 +105,22 @@ static void mlx5e_del_eth_addr_from_flow_table(struct 
mlx5e_priv *priv,
 {
void *ft = priv->ft.main;
 
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP))
+   mlx5_del_flow_table_entry(ft,
+ ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]);
+
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP))
+   mlx5_del_flow_table_entry(ft,
+ ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]);
+
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH))
+   mlx5_del_flow_table_entry(ft,
+ ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]);
+
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH))
+   mlx5_del_flow_table_entry(ft,
+ ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]);
+
if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP))
mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]);
 
@@ -160,6 +176,10 @@ static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info 
*ai, int type)
BIT(MLX5E_TT_IPV6_TCP)   |
BIT(MLX5E_TT_IPV4_UDP)   |
BIT(MLX5E_TT_IPV6_UDP)   |
+   BIT(MLX5E_TT_IPV4_IPSEC_AH)  |
+   BIT(MLX5E_TT_IPV6_IPSEC_AH)  |
+   BIT(MLX5E_TT_IPV4_IPSEC_ESP) |
+   BIT(MLX5E_TT_IPV6_IPSEC_ESP) |
BIT(MLX5E_TT_IPV4)   |
BIT(MLX5E_TT_IPV6)   |
BIT(MLX5E_TT_ANY)|
@@ -205,6 +225,10 @@ static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info 
*ai, int type)
BIT(MLX5E_TT_IPV6_TCP)   |
BIT(MLX5E_TT_IPV4_UDP)   |
BIT(MLX5E_TT_IPV6_UDP)   |
+   BIT(MLX5E_TT_IPV4_IPSEC_AH)  |
+   BIT(MLX5E_TT_IPV6_IPSEC_AH)  |
+   BIT(MLX5E_TT_IPV4_IPSEC_ESP) |
+   BIT(MLX5E_TT_IPV6_IPSEC_ESP) |
BIT(MLX5E_TT_IPV4)   |
BIT(MLX5E_TT_IPV6)   |
BIT(MLX5E_TT_ANY)|
@@ -377,6 +401,72 @@ static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv 
*priv,
ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP);
}
 
+   MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol,
+IPPROTO_AH);
+
+   ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH];
+   if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) {
+   MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+ETH_P_IP);
+   MLX5_SET(dest_format_struct, dest, destination_id,
+tirn[MLX5E_TT_IPV4_IPSEC_AH]);
+   err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
+   match_criteria, flow_context,
+   ft_ix);
+   if (err)
+   goto err_del_ai;
+
+   ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH);
+   }
+
+   ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH];
+   if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) {
+   MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+ETH_P_IPV6);
+   MLX5_SET(dest_format_struct, dest, destination_id,
+

[PATCH net-next 2/6] net/mlx5e: Allocate DMA coherent memory on reader NUMA node

2015-07-23 Thread Amir Vadai
From: Saeed Mahameed 

By affinity hints and XPS, each mlx5e channel is assigned a CPU
core.

Channel DMA coherent memory that is written by the NIC and read
by SW (e.g CQ buffer) is allocated on the NUMA node of the CPU
core assigned for the channel.

Channel DMA coherent memory that is written by SW and read by the
NIC (e.g SQ/RQ buffer) is allocated on the NUMA node of the NIC.

Doorbell record (written by SW and read by the NIC) is an
exception since it is accessed by SW more frequently.

Signed-off-by: Saeed Mahameed 
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/alloc.c   | 48 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 --
 drivers/net/ethernet/mellanox/mlx5/core/main.c|  6 ++-
 drivers/net/ethernet/mellanox/mlx5/core/wq.c  | 12 +++---
 drivers/net/ethernet/mellanox/mlx5/core/wq.h  |  3 +-
 include/linux/mlx5/driver.h   |  8 
 6 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 0715b49..6cb3830 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -45,15 +45,34 @@
  * register it in a memory region at HCA virtual address 0.
  */
 
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf)
+static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
+  size_t size, dma_addr_t *dma_handle,
+  int node)
+{
+   struct mlx5_priv *priv = &dev->priv;
+   int original_node;
+   void *cpu_handle;
+
+   mutex_lock(&priv->alloc_mutex);
+   original_node = dev_to_node(&dev->pdev->dev);
+   set_dev_node(&dev->pdev->dev, node);
+   cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size,
+dma_handle, GFP_KERNEL);
+   set_dev_node(&dev->pdev->dev, original_node);
+   mutex_unlock(&priv->alloc_mutex);
+   return cpu_handle;
+}
+
+int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+   struct mlx5_buf *buf, int node)
 {
dma_addr_t t;
 
buf->size = size;
buf->npages   = 1;
buf->page_shift   = (u8)get_order(size) + PAGE_SHIFT;
-   buf->direct.buf   = dma_zalloc_coherent(&dev->pdev->dev,
-   size, &t, GFP_KERNEL);
+   buf->direct.buf   = mlx5_dma_zalloc_coherent_node(dev, size,
+ &t, node);
if (!buf->direct.buf)
return -ENOMEM;
 
@@ -66,6 +85,11 @@ int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, 
struct mlx5_buf *buf)
 
return 0;
 }
+
+int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf)
+{
+   return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
+}
 EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
 
 void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
@@ -75,7 +99,8 @@ void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf 
*buf)
 }
 EXPORT_SYMBOL_GPL(mlx5_buf_free);
 
-static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device)
+static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
+int node)
 {
struct mlx5_db_pgdir *pgdir;
 
@@ -84,8 +109,9 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct 
device *dma_device)
return NULL;
 
bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE);
-   pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
-   &pgdir->db_dma, GFP_KERNEL);
+
+   pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE,
+  &pgdir->db_dma, node);
if (!pgdir->db_page) {
kfree(pgdir);
return NULL;
@@ -118,7 +144,7 @@ static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir 
*pgdir,
return 0;
 }
 
-int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
+int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node)
 {
struct mlx5_db_pgdir *pgdir;
int ret = 0;
@@ -129,7 +155,7 @@ int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db 
*db)
if (!mlx5_alloc_db_from_pgdir(pgdir, db))
goto out;
 
-   pgdir = mlx5_alloc_db_pgdir(&(dev->pdev->dev));
+   pgdir = mlx5_alloc_db_pgdir(dev, node);
if (!pgdir) {
ret = -ENOMEM;
goto out;
@@ -145,6 +171,12 @@ out:
 
return ret;
 }
+EXPORT_SYMBOL_GPL(mlx5_db_alloc_node);
+
+int mlx5_db_alloc(struct mlx5_co

[PATCH net-next 4/6] net/mlx5e: TX latency optimization to save DMA reads

2015-07-23 Thread Amir Vadai
From: Achiad Shochat 

A regular TX WQE execution involves two or more DMA reads -
one to fetch the WQE, and another one per WQE gather entry.

These DMA reads obviously increase the TX latency.
There are two mlx5 mechanisms to bypass these DMA reads:
1) Inline WQE
2) Blue Flame (BF)

An inline WQE contains a whole packet, thus saves the DMA read/s
of the regular WQE gather entry/s. Inline WQE support was already
added in the previous commit.

A BF WQE is written directly to the device I/O mapped memory, thus
enables saving the DMA read that fetches the WQE.

The BF WQE I/O write must be in cache line granularity, thus uses
the CPU write combining mechanism.
A BF WQE I/O write acts also as a TX doorbell for notifying the
device of new TX WQEs.
A BF WQE is written to the same I/O mapped address as the regular TX
doorbell, thus this address is being mapped twice - once by ioremap()
and once by io_mapping_map_wc().

While both mechanisms reduce the TX latency, they both consume more CPU
cycles than a regular WQE:
- A BF WQE must still be written to host memory, in addition to being
  written directly to the device I/O mapped memory.
- An inline WQE involves copying the SKB data into it.

To handle this tradeoff, we introduce here a heuristic algorithm that
strives to avoid using these two mechanisms in case the TX queue is
being back-pressured by the device, and limit their usage rate otherwise.

An inline WQE will always be "Blue Flamed" (written directly to the
device I/O mapped memory) while a BF WQE may not be inlined (may contain
gather entries).

Preliminary testing using netperf UDP_RR shows that the latency goes down
from 17.5us to 16.9us, while the message rate (tested with pktgen) stays
the same.

Signed-off-by: Achiad Shochat 
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 24 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   | 26 ++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c| 26 +--
 drivers/net/ethernet/mellanox/mlx5/core/uar.c |  6 ++
 include/linux/mlx5/driver.h   |  4 +++-
 6 files changed, 79 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d9dc506..b66edd2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -60,6 +60,7 @@
 
 #define MLX5E_TX_CQ_POLL_BUDGET128
 #define MLX5E_UPDATE_STATS_INTERVAL200 /* msecs */
+#define MLX5E_SQ_BF_BUDGET 16
 
 static const char vport_strings[][ETH_GSTRING_LEN] = {
/* vport statistics */
@@ -268,7 +269,9 @@ struct mlx5e_sq {
/* dirtied @xmit */
u16pc cacheline_aligned_in_smp;
u32dma_fifo_pc;
-   u32bf_offset;
+   u16bf_offset;
+   u16prev_cc;
+   u8 bf_budget;
struct mlx5e_sq_stats  stats;
 
struct mlx5e_cqcq;
@@ -281,9 +284,10 @@ struct mlx5e_sq {
struct mlx5_wq_cyc wq;
u32dma_fifo_mask;
void __iomem  *uar_map;
+   void __iomem  *uar_bf_map;
struct netdev_queue   *txq;
u32sqn;
-   u32bf_buf_size;
+   u16bf_buf_size;
u16max_inline;
u16edge;
struct device *pdev;
@@ -493,8 +497,10 @@ int mlx5e_update_priv_params(struct mlx5e_priv *priv,
 struct mlx5e_params *new_params);
 
 static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
- struct mlx5e_tx_wqe *wqe)
+ struct mlx5e_tx_wqe *wqe, int bf_sz)
 {
+   u16 ofst = MLX5_BF_OFFSET + sq->bf_offset;
+
/* ensure wqe is visible to device before updating doorbell record */
dma_wmb();
 
@@ -505,9 +511,15 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
 */
wmb();
 
-   mlx5_write64((__be32 *)&wqe->ctrl,
-sq->uar_map + MLX5_BF_OFFSET + sq->bf_offset,
-NULL);
+   if (bf_sz) {
+   __iowrite64_copy(sq->uar_bf_map + ofst, &wqe->ctrl, bf_sz);
+
+   /* flush the write-combining mapped buffer */
+   wmb();
+
+   } else {
+   mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL);
+   }
 
sq->bf_offset ^= sq->bf_buf_size;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
inde

[PATCH net-next 5/6] net/mlx5e: Cosmetics: use BIT() instead of "1 <<", and others

2015-07-23 Thread Amir Vadai
From: Achiad Shochat 

No logical change in this commit.

Signed-off-by: Achiad Shochat 
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  16 +-
 .../ethernet/mellanox/mlx5/core/en_flow_table.c| 166 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  20 +--
 3 files changed, 104 insertions(+), 98 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b66edd2..39294f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -330,14 +330,14 @@ struct mlx5e_channel {
 };
 
 enum mlx5e_traffic_types {
-   MLX5E_TT_IPV4_TCP = 0,
-   MLX5E_TT_IPV6_TCP = 1,
-   MLX5E_TT_IPV4_UDP = 2,
-   MLX5E_TT_IPV6_UDP = 3,
-   MLX5E_TT_IPV4 = 4,
-   MLX5E_TT_IPV6 = 5,
-   MLX5E_TT_ANY  = 6,
-   MLX5E_NUM_TT  = 7,
+   MLX5E_TT_IPV4_TCP,
+   MLX5E_TT_IPV6_TCP,
+   MLX5E_TT_IPV4_UDP,
+   MLX5E_TT_IPV6_UDP,
+   MLX5E_TT_IPV4,
+   MLX5E_TT_IPV6,
+   MLX5E_TT_ANY,
+   MLX5E_NUM_TT,
 };
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
index 120db80..cca34f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
@@ -105,25 +105,25 @@ static void mlx5e_del_eth_addr_from_flow_table(struct 
mlx5e_priv *priv,
 {
void *ft = priv->ft.main;
 
-   if (ai->tt_vec & (1 << MLX5E_TT_IPV6_TCP))
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP))
mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]);
 
-   if (ai->tt_vec & (1 << MLX5E_TT_IPV4_TCP))
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP))
mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]);
 
-   if (ai->tt_vec & (1 << MLX5E_TT_IPV6_UDP))
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP))
mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]);
 
-   if (ai->tt_vec & (1 << MLX5E_TT_IPV4_UDP))
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP))
mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]);
 
-   if (ai->tt_vec & (1 << MLX5E_TT_IPV6))
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV6))
mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]);
 
-   if (ai->tt_vec & (1 << MLX5E_TT_IPV4))
+   if (ai->tt_vec & BIT(MLX5E_TT_IPV4))
mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]);
 
-   if (ai->tt_vec & (1 << MLX5E_TT_ANY))
+   if (ai->tt_vec & BIT(MLX5E_TT_ANY))
mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]);
 }
 
@@ -156,33 +156,33 @@ static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info 
*ai, int type)
switch (eth_addr_type) {
case MLX5E_UC:
ret =
-   (1 << MLX5E_TT_IPV4_TCP) |
-   (1 << MLX5E_TT_IPV6_TCP) |
-   (1 << MLX5E_TT_IPV4_UDP) |
-   (1 << MLX5E_TT_IPV6_UDP) |
-   (1 << MLX5E_TT_IPV4) |
-   (1 << MLX5E_TT_IPV6) |
-   (1 << MLX5E_TT_ANY)  |
+   BIT(MLX5E_TT_IPV4_TCP)   |
+   BIT(MLX5E_TT_IPV6_TCP)   |
+   BIT(MLX5E_TT_IPV4_UDP)   |
+   BIT(MLX5E_TT_IPV6_UDP)   |
+   BIT(MLX5E_TT_IPV4)   |
+   BIT(MLX5E_TT_IPV6)   |
+   BIT(MLX5E_TT_ANY)|
0;
break;
 
case MLX5E_MC_IPV4:
ret =
-   (1 << MLX5E_TT_IPV4_UDP) |
-   (1 << MLX5E_TT_IPV4) |
+   BIT(MLX5E_TT_IPV4_UDP)   |
+   BIT(MLX5E_TT_IPV4)   |
0;
break;
 
case MLX5E_MC_IPV6:
ret =
-   (1 << MLX5E_TT_IPV6_UDP) |
-   (1 << MLX5E_TT_IPV6) |
+   BIT(MLX5E_TT_IPV6_UDP)   |
+   BIT(MLX5E_TT_IPV6)   |
0;
break;
 
case MLX5E_MC_OTHER:
ret =
-   

[PATCH net-next 1/6] net/mlx5e: Support ETH_RSS_HASH_XOR

2015-07-23 Thread Amir Vadai
From: Saeed Mahameed 

The ConnectX-4 HW implements inverted XOR8.
To make it act as XOR we re-order the HW RSS indirection table.

Set XOR to be the default RSS hash function and add ethtool API to
control it.

Signed-off-by: Saeed Mahameed 
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  1 +
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 39 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 46 +-
 include/linux/mlx5/mlx5_ifc.h  |  6 +--
 4 files changed, 79 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 3d23bd6..61d8433 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -195,6 +195,7 @@ struct mlx5e_params {
u16 rx_hash_log_tbl_sz;
bool lro_en;
u32 lro_wqe_sz;
+   u8  rss_hfunc;
 };
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 3889384..cb28535 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -662,6 +662,43 @@ out:
return err;
 }
 
+static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+   struct mlx5e_priv *priv = netdev_priv(netdev);
+
+   if (hfunc)
+   *hfunc = priv->params.rss_hfunc;
+
+   return 0;
+}
+
+static int mlx5e_set_rxfh(struct net_device *netdev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+   struct mlx5e_priv *priv = netdev_priv(netdev);
+   int err = 0;
+
+   if (hfunc == ETH_RSS_HASH_NO_CHANGE)
+   return 0;
+
+   if ((hfunc != ETH_RSS_HASH_XOR) &&
+   (hfunc != ETH_RSS_HASH_TOP))
+   return -EINVAL;
+
+   mutex_lock(&priv->state_lock);
+
+   priv->params.rss_hfunc = hfunc;
+   if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+   mlx5e_close_locked(priv->netdev);
+   err = mlx5e_open_locked(priv->netdev);
+   }
+
+   mutex_unlock(&priv->state_lock);
+
+   return err;
+}
+
 const struct ethtool_ops mlx5e_ethtool_ops = {
.get_drvinfo   = mlx5e_get_drvinfo,
.get_link  = ethtool_op_get_link,
@@ -676,4 +713,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
.set_coalesce  = mlx5e_set_coalesce,
.get_settings  = mlx5e_get_settings,
.set_settings  = mlx5e_set_settings,
+   .get_rxfh  = mlx5e_get_rxfh,
+   .set_rxfh  = mlx5e_set_rxfh,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 40206da..07d3627 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1158,6 +1158,24 @@ static void mlx5e_close_tises(struct mlx5e_priv *priv)
mlx5e_close_tis(priv, tc);
 }
 
+static int mlx5e_rx_hash_fn(int hfunc)
+{
+   return (hfunc == ETH_RSS_HASH_TOP) ?
+  MLX5_RX_HASH_FN_TOEPLITZ :
+  MLX5_RX_HASH_FN_INVERTED_XOR8;
+}
+
+static int mlx5e_bits_invert(unsigned long a, int size)
+{
+   int inv = 0;
+   int i;
+
+   for (i = 0; i < size; i++)
+   inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+
+   return inv;
+}
+
 static int mlx5e_open_rqt(struct mlx5e_priv *priv)
 {
struct mlx5_core_dev *mdev = priv->mdev;
@@ -1166,11 +1184,10 @@ static int mlx5e_open_rqt(struct mlx5e_priv *priv)
void *rqtc;
int inlen;
int err;
-   int sz;
+   int log_tbl_sz = priv->params.rx_hash_log_tbl_sz;
+   int sz = 1 << log_tbl_sz;
int i;
 
-   sz = 1 << priv->params.rx_hash_log_tbl_sz;
-
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
in = mlx5_vzalloc(inlen);
if (!in)
@@ -1182,8 +1199,12 @@ static int mlx5e_open_rqt(struct mlx5e_priv *priv)
MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 
for (i = 0; i < sz; i++) {
-   int ix = i % priv->params.num_channels;
+   int ix = i;
+
+   if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR)
+   ix = mlx5e_bits_invert(i, log_tbl_sz);
 
+   ix = ix % priv->params.num_channels;
MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
}
 
@@ -1254,12 +1275,16 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv 
*priv, u32 *tirc, int tt)
MLX5_SET(tirc, tirc, indirect_table,
 priv->rqtn);
MLX5_SET(tirc, tirc, rx_hash_fn,
-MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);

[PATCH net-next 3/6] net/mlx5e: Support TX packet copy into WQE

2015-07-23 Thread Amir Vadai
From: Achiad Shochat 

AKA inline WQE.
A TX latency optimization to save data gather DMA reads.
Controlled by ETHTOOL_TX_COPYBREAK.

Signed-off-by: Achiad Shochat 
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  2 +
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 53 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 13 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c| 10 +++-
 4 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 61d8433..d9dc506 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -196,6 +196,7 @@ struct mlx5e_params {
bool lro_en;
u32 lro_wqe_sz;
u8  rss_hfunc;
+   u16 tx_max_inline;
 };
 
 enum {
@@ -520,3 +521,4 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
 }
 
 extern const struct ethtool_ops mlx5e_ethtool_ops;
+u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cb28535..14fd82c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -699,6 +699,57 @@ static int mlx5e_set_rxfh(struct net_device *netdev, const 
u32 *indir,
return err;
 }
 
+static int mlx5e_get_tunable(struct net_device *dev,
+const struct ethtool_tunable *tuna,
+void *data)
+{
+   const struct mlx5e_priv *priv = netdev_priv(dev);
+   int err = 0;
+
+   switch (tuna->id) {
+   case ETHTOOL_TX_COPYBREAK:
+   *(u32 *)data = priv->params.tx_max_inline;
+   break;
+   default:
+   err = -EINVAL;
+   break;
+   }
+
+   return err;
+}
+
+static int mlx5e_set_tunable(struct net_device *dev,
+const struct ethtool_tunable *tuna,
+const void *data)
+{
+   struct mlx5e_priv *priv = netdev_priv(dev);
+   struct mlx5_core_dev *mdev = priv->mdev;
+   struct mlx5e_params new_params;
+   u32 val;
+   int err = 0;
+
+   switch (tuna->id) {
+   case ETHTOOL_TX_COPYBREAK:
+   val = *(u32 *)data;
+   if (val > mlx5e_get_max_inline_cap(mdev)) {
+   err = -EINVAL;
+   break;
+   }
+
+   mutex_lock(&priv->state_lock);
+   new_params = priv->params;
+   new_params.tx_max_inline = val;
+   err = mlx5e_update_priv_params(priv, &new_params);
+   mutex_unlock(&priv->state_lock);
+   break;
+   default:
+   err = -EINVAL;
+   break;
+   }
+
+   return err;
+}
+
 const struct ethtool_ops mlx5e_ethtool_ops = {
.get_drvinfo   = mlx5e_get_drvinfo,
.get_link  = ethtool_op_get_link,
@@ -715,4 +766,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
.set_settings  = mlx5e_set_settings,
.get_rxfh  = mlx5e_get_rxfh,
.set_rxfh  = mlx5e_set_rxfh,
+   .get_tunable   = mlx5e_get_tunable,
+   .set_tunable   = mlx5e_set_tunable,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 57cc896..c55fad4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -41,6 +41,7 @@ struct mlx5e_rq_param {
 struct mlx5e_sq_param {
u32sqc[MLX5_ST_SZ_DW(sqc)];
struct mlx5_wq_param   wq;
+   u16max_inline;
 };
 
 struct mlx5e_cq_param {
@@ -514,6 +515,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
sq->wq.db   = &sq->wq.db[MLX5_SND_DBR];
sq->uar_map = sq->uar.map;
sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
+   sq->max_inline  = param->max_inline;
 
err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu));
if (err)
@@ -1020,6 +1022,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
MLX5_SET(wq, wq, pd,priv->pdn);
 
param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
+   param->max_inline = priv->params.tx_max_inline;
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -1703,6 +1706,15 @@ static int mlx5e_check_required_hca_cap(struct 
mlx5_core_dev *mdev)
return 0;
 }
 
+u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
+{
+   int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
+
+   return bf_buf_size -

[PATCH net-next 4/4] net/mlx4_en: Add support for hardware accelerated 802.1ad vlan

2015-07-27 Thread Amir Vadai
From: Hadar Hen Zion 

To enable device support in accelerated 802.1ad vlan, the port
capability "packet has vlan enable" (phv_en) should be set.
Firmware won't work properly, in case phv_en is not set.

The user can enable "phv_en" port capability with the new ethtool
private flag phv-bit. The phv-bit private flag default value is OFF,
users who are interested in 802.1ad hardware acceleration should turn ON
the phv-bit private flag:
$ ethtool --set-priv-flags eth1 phv-bit on

Once the private flag is set, the device is ready for 802.1ad vlan
acceleration.

The user should also change the interface device features and turn on
"tx-vlan-stag-hw-insert" which is off by default:
$ ethtool -K eth1  tx-vlan-stag-hw-insert on

"phv-bit" private flag setting is available only for Physical
Functions(PF), the Virtual Function (VF) will be able to use the feature
by setting "tx-vlan-stag-hw-insert" ethtool device feature only if the
feature was enabled by the Hypervisor.

Signed-off-by: Hadar Hen Zion 
Signed-off-by: Amir Vadai 
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 16 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c  | 46 +
 drivers/net/ethernet/mellanox/mlx4/en_rx.c  | 16 -
 drivers/net/ethernet/mellanox/mlx4/en_tx.c  | 13 ---
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h|  1 +
 include/linux/mlx4/cq.h |  1 +
 include/linux/mlx4/qp.h |  1 +
 7 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 70f6553..f79d812 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -102,6 +102,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct 
ethtool_drvinfo *drvinfo)
 
 static const char mlx4_en_priv_flags[][ETH_GSTRING_LEN] = {
"blueflame",
+   "phv-bit"
 };
 
 static const char main_strings[][ETH_GSTRING_LEN] = {
@@ -1797,9 +1798,13 @@ static int mlx4_en_get_ts_info(struct net_device *dev,
 static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
 {
struct mlx4_en_priv *priv = netdev_priv(dev);
+   struct mlx4_en_dev *mdev = priv->mdev;
bool bf_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_BLUEFLAME);
bool bf_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_BLUEFLAME);
+   bool phv_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_PHV);
+   bool phv_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_PHV);
int i;
+   int ret = 0;
 
if (bf_enabled_new != bf_enabled_old) {
if (bf_enabled_new) {
@@ -1825,6 +1830,17 @@ static int mlx4_en_set_priv_flags(struct net_device 
*dev, u32 flags)
bf_enabled_new ?  "Enabled" : "Disabled");
}
 
+   if (phv_enabled_new != phv_enabled_old) {
+   ret = set_phv_bit(mdev->dev, priv->port, (int)phv_enabled_new);
+   if (ret)
+   return ret;
+   else if (phv_enabled_new)
+   priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV;
+   else
+   priv->pflags &= ~MLX4_EN_PRIV_FLAGS_PHV;
+   en_info(priv, "PHV bit %s\n",
+   phv_enabled_new ?  "Enabled" : "Disabled");
+   }
return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c 
b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index e0de2fd..4726122 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2184,6 +2184,25 @@ static int mlx4_en_ioctl(struct net_device *dev, struct 
ifreq *ifr, int cmd)
}
 }
 
+static netdev_features_t mlx4_en_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+   struct mlx4_en_priv *en_priv = netdev_priv(netdev);
+   struct mlx4_en_dev *mdev = en_priv->mdev;
+
+   /* Since there is no support for separate RX C-TAG/S-TAG vlan accel
+* enable/disable make sure S-TAG flag is always in same state as
+* C-TAG.
+*/
+   if (features & NETIF_F_HW_VLAN_CTAG_RX &&
+   !(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN))
+   features |= NETIF_F_HW_VLAN_STAG_RX;
+   else
+   features &= ~NETIF_F_HW_VLAN_STAG_RX;
+
+   return features;
+}
+
 static int mlx4_en_set_features(struct net_device *netdev,
netdev_features_t features)
 {
@@ -2218,6 +2237,10 @@ static int mlx4_en_set_features(struct net_device 
*netdev,
en_info(priv, "Turn %s TX vlan strip offload\n",
   

  1   2   3   >