On Sun, Jul 20, 2014 at 7:30 PM, Joe Stringer <[email protected]> wrote:
> Converting the flow key and mask back into netlink format during each
> flow dump is fairly expensive. By caching the netlink versions of these
> the first time a flow is dumped, and copying the memory directly during
> subsequent dumps, we are able to support up to 15% more flows in the
> datapath.
>
> Perf of single revalidator thread before, many flows in datapath:
>  14.59%  ovs-vswitchd  [kernel.kallsyms]   [k] memcpy
>   8.21%  ovs-vswitchd  [kernel.kallsyms]   [k] memset
>   3.67%  ovs-vswitchd  [kernel.kallsyms]   [k] __nla_reserve
>   3.55%  ovs-vswitchd  ovs-vswitchd        [.] revalidate.isra.14
>   2.89%  ovs-vswitchd  [kernel.kallsyms]   [k] _raw_spin_lock_bh
> ...
>
> Perf of single revalidator thread after:
>  8.70%  ovs-vswitchd  [kernel.kallsyms]   [k] memcpy
>  5.09%  ovs-vswitchd  ovs-vswitchd        [.] revalidate.isra.14
>  4.36%  ovs-vswitchd  [kernel.kallsyms]   [k] ovs_nla_put_flow
>  4.27%  ovs-vswitchd  [kernel.kallsyms]   [k] memset
>  3.41%  ovs-vswitchd  [kernel.kallsyms]   [k] _raw_spin_lock_bh
> ...
>
> Signed-off-by: Joe Stringer <[email protected]>
> ---
> v4: Construct the cache during the first flow dump.
> v3: Add error checking to ovs_nla_copy_flow() and its users.
>     Allow nl_match_cache to fail allocation in ovs_flow_alloc().
> v2: Copy mask from userspace rather than constructing from sw_flow.
> RFC: First post.
> ---
>  datapath/datapath.c   |   44 ++++++++++++++++++++++++++++++++++++++++++--
>  datapath/flow.h       |    1 +
>  datapath/flow_table.c |    2 ++
>  3 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index 065356f..b6161ce 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -684,11 +684,16 @@ static void get_dp_stats(struct datapath *dp, struct 
> ovs_dp_stats *stats,
>         }
>  }
>
> +static size_t ovs_nl_match_size(void)
> +{
> +       return nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
> +               + nla_total_size(key_attr_size()); /* OVS_FLOW_ATTR_MASK */
> +}
> +
>  static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
>  {
>         return NLMSG_ALIGN(sizeof(struct ovs_header))
> -               + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
> -               + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
> +               + ovs_nl_match_size()
>                 + nla_total_size(sizeof(struct ovs_flow_stats)) /* 
> OVS_FLOW_ATTR_STATS */
>                 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
>                 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
> @@ -703,6 +708,15 @@ static int ovs_flow_cmd_fill_match(struct datapath *dp,
>         struct nlattr *nla;
>         int err;
>
> +       if (flow->nl_match_cache) {
> +               if (skb_tailroom(skb) < flow->nl_match_cache->len)
> +                       return -EMSGSIZE;
> +
> +               return skb_copy_bits(flow->nl_match_cache, 0,
> +                                    skb_put(skb, flow->nl_match_cache->len),
> +                                    flow->nl_match_cache->len);
> +       }
> +
>         /* Fill flow key. */
>         nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
>         if (!nla)
> @@ -826,6 +840,29 @@ error:
>         return err;
>  }
>
> +/* Must be called with RCU read lock. */
> +static void ovs_flow_fill_nl_cache(struct datapath *dp, struct sw_flow *flow)
> +{
> +       struct sk_buff *nl_cache;
> +       int retval;
> +
> +       nl_cache = alloc_skb(ovs_nl_match_size(), GFP_KERNEL);
> +       if (!nl_cache)
> +               return;
> +
> +       retval = ovs_flow_cmd_fill_match(dp, flow, nl_cache);
> +       BUG_ON(retval < 0);
> +
> +       ovs_lock();
> +       if (likely(!flow->nl_match_cache)) {
> +               flow->nl_match_cache = nl_cache;
> +               nl_cache = NULL;
> +       }
> +       ovs_unlock();
> +
> +       kfree_skb(nl_cache);
> +}
> +
kernel GFP_KERNEL allocation and mutex lock can sleep, but flow dump
is RCU read operation that can not sleep holding rcu lock.

>  /* May not be called with RCU read lock. */
>  static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions 
> *acts,
>                                                struct genl_info *info,
> @@ -1269,6 +1306,9 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, 
> struct netlink_callback *cb)
>                 if (!flow)
>                         break;
>
> +               if (!flow->nl_match_cache)
> +                       ovs_flow_fill_nl_cache(dp, flow);
> +
>                 if (ovs_flow_cmd_fill_info(dp, flow, ovs_header->dp_ifindex, 
> skb,
>                                            NETLINK_CB(cb->skb).portid,
>                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
> diff --git a/datapath/flow.h b/datapath/flow.h
> index f6afa48..f1bf289 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -189,6 +189,7 @@ struct sw_flow {
>         struct sw_flow_key unmasked_key;
>         struct sw_flow_mask *mask;
>         struct sw_flow_actions __rcu *sf_acts;
> +       struct sk_buff *nl_match_cache;
>         struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First 
> one
>                                            * is allocated at flow creation 
> time,
>                                            * the rest are allocated on demand
> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
> index 9ab1020..e97d749 100644
> --- a/datapath/flow_table.c
> +++ b/datapath/flow_table.c
> @@ -92,6 +92,7 @@ struct sw_flow *ovs_flow_alloc(void)
>
>         flow->sf_acts = NULL;
>         flow->mask = NULL;
> +       flow->nl_match_cache = NULL;
>         flow->stats_last_writer = NUMA_NO_NODE;
>
>         /* Initialize the default stat node. */
> @@ -146,6 +147,7 @@ static void flow_free(struct sw_flow *flow)
>  {
>         int node;
>
> +       kfree_skb(flow->nl_match_cache);
>         kfree((struct sw_flow_actions __force *)flow->sf_acts);
>         for_each_node(node)
>                 if (flow->stats[node])
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> [email protected]
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev

Reply via email to