On Sun, Jul 20, 2014 at 7:30 PM, Joe Stringer <[email protected]> wrote:
> Converting the flow key and mask back into netlink format during each
> flow dump is fairly expensive. By caching the netlink versions of these
> the first time a flow is dumped, and copying the memory directly during
> subsequent dumps, we are able to support up to 15% more flows in the
> datapath.
>
> Perf of single revalidator thread before, many flows in datapath:
> 14.59% ovs-vswitchd [kernel.kallsyms] [k] memcpy
> 8.21% ovs-vswitchd [kernel.kallsyms] [k] memset
> 3.67% ovs-vswitchd [kernel.kallsyms] [k] __nla_reserve
> 3.55% ovs-vswitchd ovs-vswitchd [.] revalidate.isra.14
> 2.89% ovs-vswitchd [kernel.kallsyms] [k] _raw_spin_lock_bh
> ...
>
> Perf of single revalidator thread after:
> 8.70% ovs-vswitchd [kernel.kallsyms] [k] memcpy
> 5.09% ovs-vswitchd ovs-vswitchd [.] revalidate.isra.14
> 4.36% ovs-vswitchd [kernel.kallsyms] [k] ovs_nla_put_flow
> 4.27% ovs-vswitchd [kernel.kallsyms] [k] memset
> 3.41% ovs-vswitchd [kernel.kallsyms] [k] _raw_spin_lock_bh
> ...
>
> Signed-off-by: Joe Stringer <[email protected]>
> ---
> v4: Construct the cache during the first flow dump.
> v3: Add error checking to ovs_nla_copy_flow() and its users.
> Allow nl_match_cache to fail allocation in ovs_flow_alloc().
> v2: Copy mask from userspace rather than constructing from sw_flow.
> RFC: First post.
> ---
> datapath/datapath.c | 44 ++++++++++++++++++++++++++++++++++++++++++--
> datapath/flow.h | 1 +
> datapath/flow_table.c | 2 ++
> 3 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index 065356f..b6161ce 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -684,11 +684,16 @@ static void get_dp_stats(struct datapath *dp, struct
> ovs_dp_stats *stats,
> }
> }
>
> +static size_t ovs_nl_match_size(void)
> +{
> + return nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
> + + nla_total_size(key_attr_size()); /* OVS_FLOW_ATTR_MASK */
> +}
> +
> static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
> {
> return NLMSG_ALIGN(sizeof(struct ovs_header))
> - + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
> - + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
> + + ovs_nl_match_size()
> + nla_total_size(sizeof(struct ovs_flow_stats)) /*
> OVS_FLOW_ATTR_STATS */
> + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
> + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
> @@ -703,6 +708,15 @@ static int ovs_flow_cmd_fill_match(struct datapath *dp,
> struct nlattr *nla;
> int err;
>
> + if (flow->nl_match_cache) {
> + if (skb_tailroom(skb) < flow->nl_match_cache->len)
> + return -EMSGSIZE;
> +
> + return skb_copy_bits(flow->nl_match_cache, 0,
> + skb_put(skb, flow->nl_match_cache->len),
> + flow->nl_match_cache->len);
> + }
> +
> /* Fill flow key. */
> nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
> if (!nla)
> @@ -826,6 +840,29 @@ error:
> return err;
> }
>
> +/* Must be called with RCU read lock. */
> +static void ovs_flow_fill_nl_cache(struct datapath *dp, struct sw_flow *flow)
> +{
> + struct sk_buff *nl_cache;
> + int retval;
> +
> + nl_cache = alloc_skb(ovs_nl_match_size(), GFP_KERNEL);
> + if (!nl_cache)
> + return;
> +
> + retval = ovs_flow_cmd_fill_match(dp, flow, nl_cache);
> + BUG_ON(retval < 0);
> +
> + ovs_lock();
> + if (likely(!flow->nl_match_cache)) {
> + flow->nl_match_cache = nl_cache;
> + nl_cache = NULL;
> + }
> + ovs_unlock();
> +
> + kfree_skb(nl_cache);
> +}
> +
kernel GFP_KERNEL allocation and mutex lock can sleep, but flow dump
is RCU read operation that can not sleep holding rcu lock.
> /* May not be called with RCU read lock. */
> static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions
> *acts,
> struct genl_info *info,
> @@ -1269,6 +1306,9 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb,
> struct netlink_callback *cb)
> if (!flow)
> break;
>
> + if (!flow->nl_match_cache)
> + ovs_flow_fill_nl_cache(dp, flow);
> +
> if (ovs_flow_cmd_fill_info(dp, flow, ovs_header->dp_ifindex,
> skb,
> NETLINK_CB(cb->skb).portid,
> cb->nlh->nlmsg_seq, NLM_F_MULTI,
> diff --git a/datapath/flow.h b/datapath/flow.h
> index f6afa48..f1bf289 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -189,6 +189,7 @@ struct sw_flow {
> struct sw_flow_key unmasked_key;
> struct sw_flow_mask *mask;
> struct sw_flow_actions __rcu *sf_acts;
> + struct sk_buff *nl_match_cache;
> struct flow_stats __rcu *stats[]; /* One for each NUMA node. First
> one
> * is allocated at flow creation
> time,
> * the rest are allocated on demand
> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
> index 9ab1020..e97d749 100644
> --- a/datapath/flow_table.c
> +++ b/datapath/flow_table.c
> @@ -92,6 +92,7 @@ struct sw_flow *ovs_flow_alloc(void)
>
> flow->sf_acts = NULL;
> flow->mask = NULL;
> + flow->nl_match_cache = NULL;
> flow->stats_last_writer = NUMA_NO_NODE;
>
> /* Initialize the default stat node. */
> @@ -146,6 +147,7 @@ static void flow_free(struct sw_flow *flow)
> {
> int node;
>
> + kfree_skb(flow->nl_match_cache);
> kfree((struct sw_flow_actions __force *)flow->sf_acts);
> for_each_node(node)
> if (flow->stats[node])
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> [email protected]
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev