On 05/01/2017 02:57, Joe Stringer wrote:
On 25 December 2016 at 03:39, Paul Blakey<pa...@mellanox.com>  wrote:
Add tc flower interface that will be used to offload flows via tc
flower classifier. Depending on the flag used (skip_sw/hw) flower
will pass those to HW or handle them itself.

Signed-off-by: Shahar Klein<shah...@mellanox.com>
Signed-off-by: Paul Blakey<pa...@mellanox.com>
Reviewed-by: Roi Dayan<r...@mellanox.com>
---
Was Shahar also a co-author? Perhaps you should place the co-author tag as well.
Yes he is, We'll add that tag.
Another question: What happens if someone manually configures
additional flower (or non-flower?) filters on devices? Does OVS
complain constantly in the logs, or handle it ok? Overwrite the user
configuration?
For now additional flower are interpreted back to OVS rules and will be generated a new UFID.
Non flower filters are ignored by dump.

  lib/automake.mk |   2 +
  lib/tc.c        | 996 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  lib/tc.h        | 107 ++++++
  3 files changed, 1105 insertions(+)
  create mode 100644 lib/tc.c
  create mode 100644 lib/tc.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 9345cee..bcc7813 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -351,6 +351,8 @@ if LINUX
  lib_libopenvswitch_la_SOURCES += \
         lib/dpif-netlink.c \
         lib/dpif-netlink.h \
+       lib/tc.h \
+       lib/tc.c \
         lib/if-notifier.c \
         lib/if-notifier.h \
         lib/netdev-linux.c \
diff --git a/lib/tc.c b/lib/tc.c
new file mode 100644
index 0000000..b5f6603
--- /dev/null
+++ b/lib/tc.c
@@ -0,0 +1,996 @@
+/*
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+#include <linux/rtnetlink.h>
+#include <net/if.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <linux/tc_act/tc_vlan.h>
+#include <linux/tc_act/tc_tunnel_key.h>
I think that some of these cause userspace dependency on very new
kernel headers. Could we provide a copy of them in the OVS tree as per
discussion in cover letter?
Yes, thanks.
<snip>

+/* Returns tc handle 'major':'minor'. */
+static unsigned int
+tc_make_handle(unsigned int major, unsigned int minor)
+{
+    return TC_H_MAKE(major << 16, minor);
+}
+
+static struct tcmsg *
+tc_make_req(int ifindex, int type, unsigned int flags, struct ofpbuf *request)
+{
+    struct tcmsg *tcmsg;
+    struct nlmsghdr *nlmsghdr;
+
+    ofpbuf_init(request, 512);
+
+    nl_msg_reserve(request, NLMSG_HDRLEN + sizeof *tcmsg);
+    nlmsghdr = nl_msg_put_uninit(request, NLMSG_HDRLEN);
+    nlmsghdr->nlmsg_len = 0;
+    nlmsghdr->nlmsg_type = type;
+    nlmsghdr->nlmsg_flags = NLM_F_REQUEST | flags;
+    nlmsghdr->nlmsg_seq = 0;
+    nlmsghdr->nlmsg_pid = 0;
+
+    tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg);
+    tcmsg->tcm_family = AF_UNSPEC;
+    tcmsg->tcm_ifindex = ifindex;
+
+    return tcmsg;
+}
+
+static int
+tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
+{
+    int error = nl_transact(NETLINK_ROUTE, request, replyp);
+
+    ofpbuf_uninit(request);
+    return error;
+}
The above few functions are practically the same as the versions in
lib/netdev-linux.c. Could you separate these into a separate patch
which refactors these functions into lib/tc.c, exports them in
lib/tc.h and reuses them frome lib/netdev-linux.c ?
Sure.
+static int
+__nl_parse_flower_eth(struct nlattr **attrs, struct tc_flow *tc_flow)
Typically ovs uses foo__() for variations on functions; but in many of
these cases it seems that the underscores are unnecessary. You can
just name thin nl_parse_flower_eth. (We don't name things specially to
indicate that they're private in the file either).

+{
+    const struct eth_addr *eth = 0;
+
+    if (attrs[TCA_FLOWER_KEY_ETH_SRC_MASK]) {
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC], ETH_ALEN);
+        memcpy(&tc_flow->key.src_mac, eth, sizeof tc_flow->key.src_mac);
+
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC_MASK], ETH_ALEN);
+        memcpy(&tc_flow->mask.src_mac, eth, sizeof tc_flow->mask.src_mac);
+    }
+    if (attrs[TCA_FLOWER_KEY_ETH_DST_MASK]) {
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST], ETH_ALEN);
+        memcpy(&tc_flow->key.dst_mac, eth, sizeof tc_flow->key.dst_mac);
+
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST_MASK], ETH_ALEN);
+        memcpy(&tc_flow->mask.dst_mac, eth, sizeof tc_flow->mask.dst_mac);
+    }
+    return 0;
+}
A bunch of these functions only ever return 0. Why return anything?

+static int
+__nl_parse_single_action(struct nlattr *action, struct tc_flow *tc_flow)
+{
+    struct nlattr *act_options;
+    struct nlattr *act_stats;
+    const struct nlattr *stats_basic;
+    const char *act_kind;
+    struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
+    struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
+    struct ovs_flow_stats *stats = &tc_flow->stats;
+    const struct gnet_stats_basic *bs;
+
+    if (!nl_parse_nested(action, act_policy, action_attrs,
+                         ARRAY_SIZE(act_policy))) {
+        VLOG_ERR("failed to parse single action options");
+        return EPROTO;
+    }
+
+    act_kind = nl_attr_get_string(action_attrs[TCA_ACT_KIND]);
+    act_options = action_attrs[TCA_ACT_OPTIONS];
+
+    if (!strcmp(act_kind, "gact")) {
+        __nl_parse_act_drop(act_options, tc_flow);
+    } else if (!strcmp(act_kind, "mirred")) {
+        __nl_parse_act_mirred(act_options, tc_flow);
+    } else if (!strcmp(act_kind, "vlan")) {
+        __nl_parse_act_vlan(act_options, tc_flow);
+    } else if (!strcmp(act_kind, "tunnel_key")) {
+        __nl_parse_act_tunnel_key(act_options, tc_flow);
+    } else {
+        VLOG_ERR("unknown tc action, kind %s", act_kind);
I could see in degenerate cases, errors like this flooding the logs.
There are VLOG_*_RL(...) variations on these log functions that take a
ratelimiter.

+        return EINVAL;
+    }
+
+    act_stats = action_attrs[TCA_ACT_STATS];
+
+    if (!nl_parse_nested(act_stats, stats_policy, stats_attrs,
+                         ARRAY_SIZE(stats_policy))) {
+        VLOG_ERR("failed to parse action stats policy");
+        return EPROTO;
+    }
+
+    stats_basic = stats_attrs[TCA_STATS_BASIC];
+    bs = nl_attr_get_unspec(stats_basic, sizeof *bs);
+
+    stats->n_packets.lo = bs->packets;
+    stats->n_packets.hi = 0;
+    stats->n_bytes.hi = bs->bytes >> 32;
+    stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF;
Is it not enough to just set stats->n_bytes = bs->bytes ?

+
+    return 0;
+}
+
+static int
+__nl_parse_flower_actions(struct nlattr **attrs, struct tc_flow *tc_flow)
+{
+    const struct nlattr *actions = attrs[TCA_FLOWER_ACT];
+    static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO + 1] = { };
+    struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
+
+    for (int i = 0; i < TCA_ACT_MAX_PRIO + 1; i++) {
Perhaps reuse ARRAY_SIZE(...)?

+        actions_orders_policy[i].type = NL_A_NESTED;
+        actions_orders_policy[i].optional = true;
+    }
+
+    if (!nl_parse_nested(actions, actions_orders_policy, actions_orders,
+                         ARRAY_SIZE(actions_orders_policy))) {
+        VLOG_ERR("failed to parse flower order of actions");
+        return EPROTO;
+    }
+
+    for (int i = 1; i < TCA_ACT_MAX_PRIO + 1; i++) {
Same here.

Also, why start from offset 1?
Since there is no action with priority/index 0 (see act_api.c +:617).
+        if (actions_orders[i]) {
+            int err = __nl_parse_single_action(actions_orders[i], tc_flow);
+            if (err) {
+                return err;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+__nl_parse_flower_options(struct nlattr *nl_options, struct tc_flow *tc_flow)
+{
+    struct nlattr *attrs[ARRAY_SIZE(tca_flower_policy)];
+    int err = 0;
+
+    if (!nl_parse_nested(nl_options, tca_flower_policy,
+                         attrs, ARRAY_SIZE(tca_flower_policy))) {
+        VLOG_ERR("failed to parse flower classifier options");
+        return EPROTO;
+    }
+
+    err = __nl_parse_flower_eth(attrs, tc_flow);
+    err = err ? err : __nl_parse_flower_vlan(attrs, tc_flow);
+    err = err ? err : __nl_parse_flower_ip(attrs, tc_flow);
+    err = err ? err : __nl_parse_flower_tunnel(attrs, tc_flow);
+    err = err ? err : __nl_parse_flower_actions(attrs, tc_flow);
Most of these functions don't perform any error checking, so these
"err = err ? err : ..." contortions are (mostly?) unnecessary.

+int
+tc_dump_flower_start(int ifindex, struct nl_dump *dump)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+
+    tcmsg = tc_make_req(ifindex, RTM_GETTFILTER, NLM_F_DUMP, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(0, 0);
+    tcmsg->tcm_handle = 0;
+
+    nl_dump_start(dump, NETLINK_ROUTE, &request);
+    ofpbuf_uninit(&request);
+
+    return 0;
+}
+
+int
+tc_flush_flower(int ifindex)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+
+    tcmsg = tc_make_req(ifindex, RTM_DELTFILTER, NLM_F_ACK, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(0, 0);
+
+    return tc_transact(&request, 0);
+}
+
+int
+tc_del_flower(int ifindex, int handle, int prio)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+
+    tcmsg = tc_make_req(ifindex, RTM_DELTFILTER, NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, 0);
+    tcmsg->tcm_handle = handle;
+
+    return tc_transact(&request, &reply);
+}
Are these functions very similar to other functions in
lib/netdev-linux.c ? Could they be reused?
Yes,  we will move them.
+static void
+__nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid, uint8_t prio)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "vlan");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_vlan parm = { 0 };
+
+        parm.action = TC_ACT_PIPE;
+        parm.v_action = TCA_VLAN_ACT_PUSH;
Is there a reason not to initialize these on the declaration line like this?

struct tc_vlan parm = {
     .action = TC_ACT_PIPE,
     .v_action = TCA_VLAN_ACT_PUSH,
};

(Same for several other functions)

+static void
+__nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flow *tc_flow)
+{
+    size_t offset;
+    size_t act_offset;
+
+    offset = nl_msg_start_nested(request, TCA_FLOWER_ACT);
+    {
+        uint16_t act_index = 1;
+        bool done = false;
+
+        while (!done) {
+            act_offset = nl_msg_start_nested(request, act_index);
+            {
Why the extra brace/indentation?
To clearly see what is nested (especially when its nested multiple times) and not forget to finish the nested part.
We can change that if you'd like.
+                /* vlan push/pop can only be first, only one output */
+                if (tc_flow->set.set && act_index == 1) {
+                    __nl_msg_put_act_tunnel_key_set(request, tc_flow->set.id,
+                                                    tc_flow->set.ipv4_src,
+                                                    tc_flow->set.ipv4_dst,
+                                                    tc_flow->set.tp_dst);
+                } else if (tc_flow->tunnel.tunnel && act_index == 1) {
+                    __nl_msg_put_act_tunnel_key_release(request);
+                } else if (tc_flow->vlan_push_id && act_index == 1) {
+                    __nl_msg_put_act_push_vlan(request,
+                                               tc_flow->vlan_push_id,
+                                               tc_flow->vlan_push_prio);
+                } else if (tc_flow->vlan_pop && act_index == 1) {
+                    __nl_msg_put_act_pop_vlan(request);
All of the above seems to require act_index == 1, perhaps that could
be a single common if with the other checks nested underneath?

+int
+tc_replace_flower(struct tc_flow *tc_flow, uint16_t prio)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error = 0;
+    size_t basic_offset;
+
+    tcmsg = tc_make_req(tc_flow->ifindex, RTM_NEWTFILTER,
+                        NLM_F_CREATE | NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle((OVS_FORCE uint16_t) prio,
+                                     (OVS_FORCE uint16_t) 
tc_flow->key.eth_type);
+    tcmsg->tcm_handle = tc_flow->handle;
+
+    /* flower */
+    nl_msg_put_string(&request, TCA_KIND, "flower");
+    basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
+    {
+        __nl_msg_put_flower_options(&request, tc_flow);
+    }
+    nl_msg_end_nested(&request, basic_offset);
+
+    error = tc_transact(&request, &reply);
+    if (!error) {
+        struct tcmsg *tc =
+            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+
+        tc_flow->prio = TC_H_MAJ(tc->tcm_info) >> 16;
+        tc_flow->handle = tc->tcm_handle;
+    }
+
+    return error;
+}
Again, maybe there's some common code to be shared.
This is flower specific.

diff --git a/lib/tc.h b/lib/tc.h
new file mode 100644
index 0000000..1b84f35
--- /dev/null
+++ b/lib/tc.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TC_H
+#define TC_H 1
+
+#include "odp-netlink.h"
+#include "netlink-socket.h"
+
+struct netdev;
Is this necessary here?

<snip>
About the rest, we will fix those.
Again, thanks for the review.

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to