On Mon, Apr 20, 2020 at 1:29 AM Pravin Shelar <pravin....@gmail.com> wrote:
>
> On Sat, Apr 18, 2020 at 10:25 AM <xiangxia.m....@gmail.com> wrote:
> >
> > From: Tonghao Zhang <xiangxia.m....@gmail.com>
> >
> > In kernel datapath of Open vSwitch, there are only 1024
> > buckets of meter in one dp. If installing more than 1024
> > (e.g. 8192) meters, it may lead to the performance drop.
> > But in some case, for example, Open vSwitch used as edge
> > gateway, there should be 200,000+ at least, meters used for
> > IP address bandwidth limitation.
> >
> > [Open vSwitch userspace datapath has this issue too.]
> >
> > For more scalable meter, this patch expands the buckets
> > when necessary, so we can install more meters in the datapath.
> > Introducing the struct *dp_meter_instance*, it's easy to
> > expand meter though changing the *ti* point in the struct
> > *dp_meter_table*.
> >
> > Cc: Pravin B Shelar <pshe...@ovn.org>
> > Cc: Andy Zhou <az...@ovn.org>
> > Signed-off-by: Tonghao Zhang <xiangxia.m....@gmail.com>
> > ---
> >  net/openvswitch/datapath.h |   2 +-
> >  net/openvswitch/meter.c    | 200 +++++++++++++++++++++++++++++--------
> >  net/openvswitch/meter.h    |  15 ++-
> >  3 files changed, 169 insertions(+), 48 deletions(-)
> >
> > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > index e239a46c2f94..785105578448 100644
> > --- a/net/openvswitch/datapath.h
> > +++ b/net/openvswitch/datapath.h
> > @@ -82,7 +82,7 @@ struct datapath {
> >         u32 max_headroom;
> >
> >         /* Switch meters. */
> > -       struct hlist_head *meters;
> > +       struct dp_meter_table *meters;
> lets define it as part of this struct to avoid indirection.
>
> >  };
> >
> >  /**
> > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > index 5010d1ddd4bd..494a0014ecd8 100644
> > --- a/net/openvswitch/meter.c
> > +++ b/net/openvswitch/meter.c
> > @@ -19,8 +19,6 @@
> >  #include "datapath.h"
> >  #include "meter.h"
> >
> > -#define METER_HASH_BUCKETS 1024
> > -
> >  static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
> >         [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
> >         [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
> > @@ -39,6 +37,11 @@ static const struct nla_policy 
> > band_policy[OVS_BAND_ATTR_MAX + 1] = {
> >         [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
> >  };
> >
> > +static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
> > +{
> > +       return id % ti->n_meters;
> > +}
> > +
> >  static void ovs_meter_free(struct dp_meter *meter)
> >  {
> >         if (!meter)
> > @@ -47,40 +50,141 @@ static void ovs_meter_free(struct dp_meter *meter)
> >         kfree_rcu(meter, rcu);
> >  }
> >
> > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > -                                           u32 meter_id)
> > -{
> > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > -}
> > -
> >  /* Call with ovs_mutex or RCU read lock. */
> > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> >                                      u32 meter_id)
> >  {
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > +       u32 hash = meter_hash(ti, meter_id);
> >         struct dp_meter *meter;
> > -       struct hlist_head *head;
> >
> > -       head = meter_hash_bucket(dp, meter_id);
> > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > -                               lockdep_ovsl_is_held()) {
> > -               if (meter->id == meter_id)
> > -                       return meter;
> > -       }
> > +       meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
> > +       if (meter && likely(meter->id == meter_id))
> > +               return meter;
> > +
> >         return NULL;
> >  }
> >
> > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > +static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
> > +{
> > +       struct dp_meter_instance *ti;
> > +
> > +       ti = kvzalloc(sizeof(*ti) +
> > +                     sizeof(struct dp_meter *) * size,
> > +                     GFP_KERNEL);
> > +       if (!ti)
> > +               return NULL;
> Given this is a kernel space array we need to have hard limit inplace.
In patch 2, I limited the meter number, should we add hard limit here ?
> > +
> > +       ti->n_meters = size;
> > +
> > +       return ti;
> > +}
> > +
> > +static void dp_meter_instance_free(struct dp_meter_instance *ti)
> > +{
> > +       kvfree(ti);
> > +}
> > +
> > +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> > +{
> > +       struct dp_meter_instance *ti;
> > +
> > +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> > +       kvfree(ti);
> > +}
> > +
> > +static int
> > +dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
> > +{
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > +       int n_meters = min(size, ti->n_meters);
> > +       struct dp_meter_instance *new_ti;
> > +       int i;
> > +
> > +       new_ti = dp_meter_instance_alloc(size);
> > +       if (!new_ti)
> > +               return -ENOMEM;
> > +
> > +       for (i = 0; i < n_meters; i++)
> > +               new_ti->dp_meters[i] =
> > +                       rcu_dereference_ovsl(ti->dp_meters[i]);
> > +
> > +       rcu_assign_pointer(tbl->ti, new_ti);
> > +       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> > +
> > +       return 0;
> > +}
> > +
> > +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> > +                                    struct dp_meter *meter)
> > +{
> > +       u32 hash;
> > +
> > +       hash = meter_hash(ti, meter->id);
> > +       rcu_assign_pointer(ti->dp_meters[hash], meter);
> > +}
> > +
> > +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> > +                                    struct dp_meter *meter)
> >  {
> > -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> > +       u32 hash;
> >
> > -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> > +       hash = meter_hash(ti, meter->id);
> > +       RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
> >  }
> >
> > -static void detach_meter(struct dp_meter *meter)
> > +static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> >  {
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > +       u32 hash = meter_hash(ti, meter->id);
> > +
> > +       /*
> > +        * In generally, slot selected should be empty, because
> > +        * OvS uses id-pool to fetch a available id.
> > +        */
> > +       if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
> > +               return -EINVAL;
> we could return -EBUSY instead.
> > +
> > +       dp_meter_instance_insert(ti, meter);
> > +
> > +       /* That function is thread-safe. */
> > +       if (++tbl->count >= ti->n_meters)
> > +               if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
> > +                       goto expand_err;
> > +
> > +       return 0;
> > +
> > +expand_err:
> > +       dp_meter_instance_remove(ti, meter);
> > +       tbl->count--;
> > +       return -ENOMEM;
> > +}
> > +
> > +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter 
> > *meter)
> > +{
> > +       struct dp_meter_instance *ti;
> > +
> >         ASSERT_OVSL();
> > -       if (meter)
> > -               hlist_del_rcu(&meter->dp_hash_node);
> > +       if (!meter)
> > +               return;
> > +
> > +       ti = rcu_dereference_ovsl(tbl->ti);
> > +       dp_meter_instance_remove(ti, meter);
> > +
> > +       tbl->count--;
> > +
> > +       /* Shrink the meter array if necessary. */
> > +       if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
> > +           tbl->count <= (ti->n_meters / 4)) {
> > +               int half_size = ti->n_meters / 2;
> > +               int i;
> > +
> Lets add a comment about this.
> > +               for (i = half_size; i < ti->n_meters; i++)
> > +                       if (rcu_dereference_ovsl(ti->dp_meters[i]))
> > +                               return;
> > +
> > +               dp_meter_instance_realloc(tbl, half_size);
> > +       }
> >  }
> >
> >  static struct sk_buff *
> > @@ -303,9 +407,13 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, 
> > struct genl_info *info)
> >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> >
> >         /* Cannot fail after this. */
> > -       old_meter = lookup_meter(dp, meter_id);
> > -       detach_meter(old_meter);
> > -       attach_meter(dp, meter);
> > +       old_meter = lookup_meter(dp->meters, meter_id);
> in new scheme this can fail due to hash collision, lets check for NULL.
If old_meter is NULL, detach_meter will do nothing.
> > +       detach_meter(dp->meters, old_meter);
> > +
> > +       err = attach_meter(dp->meters, meter);
> > +       if (err)
> > +               goto exit_unlock;
> > +
> >         ovs_unlock();
> >
> >         /* Build response with the meter_id and stats from
> > @@ -365,7 +473,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, 
> > struct genl_info *info)
> >         }
> >
> >         /* Locate meter, copy stats. */
> > -       meter = lookup_meter(dp, meter_id);
> > +       meter = lookup_meter(dp->meters, meter_id);
> >         if (!meter) {
> >                 err = -ENOENT;
> >                 goto exit_unlock;
> > @@ -416,13 +524,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, 
> > struct genl_info *info)
> >                 goto exit_unlock;
> >         }
> >
> > -       old_meter = lookup_meter(dp, meter_id);
> > +       old_meter = lookup_meter(dp->meters, meter_id);
> >         if (old_meter) {
> >                 spin_lock_bh(&old_meter->lock);
> >                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> >                 WARN_ON(err);
> >                 spin_unlock_bh(&old_meter->lock);
> > -               detach_meter(old_meter);
> > +               detach_meter(dp->meters, old_meter);
> >         }
> >         ovs_unlock();
> >         ovs_meter_free(old_meter);
> > @@ -452,7 +560,7 @@ bool ovs_meter_execute(struct datapath *dp, struct 
> > sk_buff *skb,
> >         int i, band_exceeded_max = -1;
> >         u32 band_exceeded_rate = 0;
> >
> > -       meter = lookup_meter(dp, meter_id);
> > +       meter = lookup_meter(dp->meters, meter_id);
> >         /* Do not drop the packet when there is no meter. */
> >         if (!meter)
> >                 return false;
> > @@ -570,32 +678,36 @@ struct genl_family dp_meter_genl_family 
> > __ro_after_init = {
> >
> >  int ovs_meters_init(struct datapath *dp)
> >  {
> > -       int i;
> > +       struct dp_meter_instance *ti;
> > +       struct dp_meter_table *tbl;
> > +
> > +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> > +       if (!tbl)
> > +               return -ENOMEM;
> >
> > -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> > -                                  sizeof(struct hlist_head), GFP_KERNEL);
> > +       tbl->count = 0;
> >
> > -       if (!dp->meters)
> > +       ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> > +       if (!ti) {
> > +               kfree(tbl);
> >                 return -ENOMEM;
> > +       }
> >
> > -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> > -               INIT_HLIST_HEAD(&dp->meters[i]);
> > +       rcu_assign_pointer(tbl->ti, ti);
> > +       dp->meters = tbl;
> >
> >         return 0;
> >  }
> >
> >  void ovs_meters_exit(struct datapath *dp)
> >  {
> > +       struct dp_meter_table *tbl = dp->meters;
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> >         int i;
> >
> > -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> > -               struct hlist_head *head = &dp->meters[i];
> > -               struct dp_meter *meter;
> > -               struct hlist_node *n;
> > -
> > -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> > -                       kfree(meter);
> > -       }
> > +       for (i = 0; i < ti->n_meters; i++)
> > +               ovs_meter_free(ti->dp_meters[i]);
> >
> > -       kfree(dp->meters);
> > +       dp_meter_instance_free(ti);
> > +       kfree(tbl);
> >  }
> > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > index f645913870bd..d91940383bbe 100644
> > --- a/net/openvswitch/meter.h
> > +++ b/net/openvswitch/meter.h
> > @@ -18,6 +18,7 @@
> >  struct datapath;
> >
> >  #define DP_MAX_BANDS           1
> > +#define DP_METER_ARRAY_SIZE_MIN        (1ULL << 10)
> >
> >  struct dp_meter_band {
> >         u32 type;
> > @@ -30,9 +31,6 @@ struct dp_meter_band {
> >  struct dp_meter {
> >         spinlock_t lock;    /* Per meter lock */
> >         struct rcu_head rcu;
> > -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> > -                                        * hash table.
> > -                                        */
> >         u32 id;
> >         u16 kbps:1, keep_stats:1;
> >         u16 n_bands;
> > @@ -42,6 +40,17 @@ struct dp_meter {
> >         struct dp_meter_band bands[];
> >  };
> >
> > +struct dp_meter_instance {
> > +       struct rcu_head rcu;
> > +       u32 n_meters;
> > +       struct dp_meter __rcu *dp_meters[];
> > +};
> > +
> > +struct dp_meter_table {
> > +       struct dp_meter_instance __rcu *ti;
> > +       u32 count;
> > +};
> > +
> >  extern struct genl_family dp_meter_genl_family;
> >  int ovs_meters_init(struct datapath *dp);
> >  void ovs_meters_exit(struct datapath *dp);
> > --
> > 2.23.0
> >



-- 
Best regards, Tonghao
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to