On Thu, Apr 30, 2020 at 07:00:36PM +0800, xiangxia.m....@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m....@gmail.com>
> 
> For now, ovs-vswitchd use the array of the dp_meter struct
> to store meter's data, and at most, there are only 65536
> (defined by MAX_METERS) meters that can be used. But in some
> case, for example, in the edge gateway, we should use 200,000,
> at least, meters for IP address bandwidth limitation.
> Every one IP address will use two meters for its rx and tx
> path[1]. In other way, ovs-vswitchd should support meter-offload
> (rte_mtr_xxx api introduced by dpdk.), but there are more than
> 65536 meters in the hardware, such as Mellanox ConnectX-6.
> 
> This patch use array to manage the meter, but it can ben expanded.
> 
> [1].
> $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0

I'm just curious why you need so many 'unique' meters?
can you share the meter id if their settings are the same?

ex:
$ in_port=p0,ip,ip_dst=1.1.1.x action=meter:X ,output:p1
$ in_port=p1,ip,ip_src=1.1.1.x action=meter:X, output:p0

if both flows have the same meter setup in X.

> 
> Cc: Ilya Maximets <i.maxim...@ovn.org>
> Cc: William Tu <u9012...@gmail.com>
> Cc: Jarno Rajahalme <ja...@ovn.org>
> Cc: Ben Pfaff <b...@ovn.org>
> Cc: Andy Zhou <az...@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m....@gmail.com>
> ---

Thanks for working on both kernel and userspace datapath!
Also add Pravin in the loop.

>  lib/dpif-netdev.c | 320 
> ++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 251 insertions(+), 69 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index ef14e83b5f06..17c0241aa2e2 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -98,9 +98,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
>  
>  /* Configuration parameters. */
>  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
> -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> +
> +/* Maximum number of meters in the table. */
> +#define METER_ENTRY_MAX (200000ULL)
> +/* Maximum number of bands / meter. */
> +#define METER_BAND_MAX  (8)
> +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
>  
>  COVERAGE_DEFINE(datapath_drop_meter);
>  COVERAGE_DEFINE(datapath_drop_upcall_error);
> @@ -283,12 +286,25 @@ struct dp_meter {
>      uint16_t flags;
>      uint16_t n_bands;
>      uint32_t max_delta_t;
> +    uint32_t id;
> +    struct ovs_mutex lock;
>      uint64_t used;
>      uint64_t packet_count;
>      uint64_t byte_count;
>      struct dp_meter_band bands[];
>  };
>  
> +struct dp_meter_instance {
> +    uint32_t n_meters;
> +    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
usually we add a comments here, saying
        /* Followed by
     *    struct dp_meter[n];
     * where n is the n_meters.
     */

> +};
> +
> +struct dp_meter_table {
> +    OVSRCU_TYPE(struct dp_meter_instance *) ti;
> +    uint32_t count;
> +    struct ovs_mutex lock;
> +};
> +
>  struct pmd_auto_lb {
>      bool auto_lb_requested;     /* Auto load balancing requested by user. */
>      bool is_enabled;            /* Current status of Auto load balancing. */
> @@ -329,8 +345,7 @@ struct dp_netdev {
>      atomic_uint32_t tx_flush_interval;
>  
>      /* Meters. */
> -    struct ovs_mutex meter_locks[N_METER_LOCKS];

Why removing the multiple locks and using only one lock?
Do you see any performance overhead when switching to sinlge lock?

> -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> +    struct dp_meter_table meter_tbl;
>  
>      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
>      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> @@ -378,19 +393,6 @@ struct dp_netdev {
>      struct pmd_auto_lb pmd_alb;
>  };
>  
> -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -
>  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev 
> *dp,
>                                                      odp_port_t)
>      OVS_REQUIRES(dp->port_mutex);
> @@ -1523,6 +1525,9 @@ choose_port(struct dp_netdev *dp, const char *name)
>      return ODPP_NONE;
>  }
>  
> +static void dp_netdev_meter_init(struct dp_meter_table *tbl);
> +static void dp_netdev_meter_destroy(struct dp_meter_table *tbl);
Maybe move them up?

> +
>  static int
>  create_dp_netdev(const char *name, const struct dpif_class *class,
>                   struct dp_netdev **dpp)
> @@ -1556,9 +1561,7 @@ create_dp_netdev(const char *name, const struct 
> dpif_class *class,
>      dp->reconfigure_seq = seq_create();
>      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
>  
> -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_init(&dp->meter_tbl);
>  
>      /* Disable upcalls by default. */
>      dp_netdev_disable_upcall(dp);
> @@ -1647,16 +1650,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
>      fat_rwlock_destroy(&dp->upcall_rwlock);
>  }
>  
> -static void
> -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    if (dp->meters[meter_id]) {
> -        free(dp->meters[meter_id]);
> -        dp->meters[meter_id] = NULL;
> -    }
> -}
> -
>  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
>   * through the 'dp_netdevs' shash while freeing 'dp'. */
>  static void
> @@ -1694,16 +1687,7 @@ dp_netdev_free(struct dp_netdev *dp)
>      /* Upcalls must be disabled at this point */
>      dp_netdev_destroy_upcall_lock(dp);
>  
> -    int i;
> -
> -    for (i = 0; i < MAX_METERS; ++i) {
> -        meter_lock(dp, i);
> -        dp_delete_meter(dp, i);
> -        meter_unlock(dp, i);
> -    }
> -    for (i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_destroy(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_destroy(&dp->meter_tbl);
>  
>      free(dp->pmd_cmask);
>      free(CONST_CAST(char *, dp->name));
> @@ -5713,14 +5697,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp)
>  
>  
>  /* Meters */
> +static uint32_t
> +meter_hash(struct dp_meter_instance *ti, uint32_t id)
> +{
> +    uint32_t n_meters = ti->n_meters;
> +
> +    return id % n_meters;
> +}
> +
> +static void
> +dp_meter_free(struct dp_meter *meter)
> +{
> +    ovs_mutex_destroy(&meter->lock);
> +    free(meter);
> +}
> +
> +static struct dp_meter_instance *
> +dp_meter_instance_alloc(const uint32_t size)
> +{
> +    struct dp_meter_instance *ti;
> +
> +    ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size);
> +    ti->n_meters = size;
> +
> +    return ti;
> +}
> +
> +static void
> +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
> +{
> +    struct dp_meter_instance *new_ti;
> +    struct dp_meter_instance *ti;
> +    int n_meters;
> +    int i;
> +
> +    new_ti = dp_meter_instance_alloc(size);
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> +    n_meters = MIN(size, ti->n_meters);
> +
> +    for (i = 0; i < n_meters; i++) {
> +        if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> +            new_ti->dp_meters[i] = ti->dp_meters[i];
> +        }
> +    }
> +
> +    ovsrcu_set(&tbl->ti, new_ti);
> +    ovsrcu_postpone(free, ti);
> +}
> +
> +static void
> +dp_meter_instance_insert(struct dp_meter_instance *ti,
> +                         struct dp_meter *meter)
> +{
> +    uint32_t hash;
> +
> +    hash = meter_hash(ti, meter->id);
> +    ovsrcu_set(&ti->dp_meters[hash], meter);
> +}
> +
> +static void
> +dp_meter_instance_remove(struct dp_meter_instance *ti,
> +                         struct dp_meter *meter)
> +{
> +    uint32_t hash;
> +
> +    hash = meter_hash(ti, meter->id);
> +    ovsrcu_set(&ti->dp_meters[hash], NULL);
> +}
> +
> +static void
> +dp_netdev_meter_init(struct dp_meter_table *tbl)
> +{
> +    struct dp_meter_instance *ti;
> +
> +    ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> +    ovsrcu_set(&tbl->ti, ti);
> +
> +    ovs_mutex_init(&tbl->lock);
> +    tbl->count = 0;
> +}
> +
> +static void
> +dp_netdev_meter_destroy(struct dp_meter_table *tbl)
> +{
> +    struct dp_meter_instance *ti;
> +    int i;
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> +    for (i = 0; i < ti->n_meters; i++) {
> +        struct dp_meter *meter;
> +
> +        meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]);
> +        if (meter) {
> +            ovsrcu_postpone(dp_meter_free, meter);
> +        }
> +    }
> +
> +    ovsrcu_postpone(free, ti);
> +    ovs_mutex_destroy(&tbl->lock);
> +}
> +
> +static struct dp_meter *
> +dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> +{
> +    struct dp_meter_instance *ti;
> +    struct dp_meter *meter;
> +    uint32_t hash;
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> +    hash = meter_hash(ti, meter_id);
> +
> +    meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]);
> +    if (meter && meter->id == meter_id) {
> +        return meter;
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> +                     OVS_REQUIRES(meter_tbl->lock)
> +{
> +    struct dp_meter_instance *ti;
> +    struct dp_meter *meter;
> +
> +    meter = dp_meter_lookup(meter_tbl, meter_id);
> +    if (!meter) {
> +        return;
> +    }
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> +    dp_meter_instance_remove(ti, meter);
> +    ovsrcu_postpone(dp_meter_free, meter);
> +
> +    meter_tbl->count--;
> +    /* Shrink the meter array if necessary. */
> +    if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
> +        meter_tbl->count <= (ti->n_meters / 4)) {
> +        int half_size = ti->n_meters / 2;
> +        int i;
> +
> +        /* Avoid hash collision, don't move slots to other place.
> +         * Make sure there are no references of meters in array
> +         * which will be released.
> +         */
> +        for (i = half_size; i < ti->n_meters; i++) {
> +            if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> +                return;
> +            }
> +        }
> +
> +        dp_meter_instance_realloc(meter_tbl, half_size);
> +    }
> +}
> +
> +static int
> +dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter)
> +                OVS_REQUIRES(meter_tbl->lock)
> +{
> +    struct dp_meter_instance *ti;
> +    uint32_t hash;
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> +    hash = meter_hash(ti, meter->id);
> +
> +    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
> +                                &ti->dp_meters[hash]))) {
> +        VLOG_WARN("When installing meter to %u/%u slot, but it's used.\n",
> +                  hash, ti->n_meters);
how about
VLOG_WANR("Failed to attach meter id %u to slot %u/%u.\n"

> +        return EBUSY;
> +    }
> +
> +    dp_meter_instance_insert(ti, meter);
> +
> +    meter_tbl->count++;
> +    if (meter_tbl->count >= ti->n_meters) {
> +        dp_meter_instance_realloc(meter_tbl, ti->n_meters *2);
space missing between "*2"

> +    }
> +
> +    return 0;
> +}
> +
>  static void
>  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
>                                 struct ofputil_meter_features *features)
>  {
> -    features->max_meters = MAX_METERS;
> +    features->max_meters = METER_ENTRY_MAX;
>      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
>      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> -    features->max_bands = MAX_BANDS;
> +    features->max_bands = METER_BAND_MAX;
>      features->max_color = 0;
>  }
>  
> @@ -5742,14 +5909,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
> dp_packet_batch *packets_,
>      uint32_t exceeded_rate[NETDEV_MAX_BURST];
>      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
>  
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return;
>      }
>  
> -    meter_lock(dp, meter_id);
> -    meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
>      if (!meter) {
> -        goto out;
> +        return;
>      }
>  
>      /* Initialize as negative values. */
> @@ -5757,6 +5923,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
> dp_packet_batch *packets_,
>      /* Initialize as zeroes. */
>      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
>  
> +    ovs_mutex_lock(&meter->lock);

do you want to use multiple lock, like the current design?

>      /* All packets will hit the meter at the same time. */
>      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
>  
> @@ -5874,8 +6041,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
> dp_packet_batch *packets_,
>              dp_packet_batch_refill(packets_, packet, j);
>          }
>      }
> - out:
> -    meter_unlock(dp, meter_id);
> +
> +    ovs_mutex_unlock(&meter->lock);
>  }
>  
>  /* Meter set/get/del processing is still single-threaded. */
> @@ -5884,11 +6051,12 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> ofproto_meter_id meter_id,
>                        struct ofputil_meter_config *config)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
>      uint32_t mid = meter_id.uint32;
>      struct dp_meter *meter;
> -    int i;
> +    int err, i;
>  
> -    if (mid >= MAX_METERS) {
> +    if (mid >= METER_ENTRY_MAX) {
>          return EFBIG; /* Meter_id out of range. */
>      }
>  
> @@ -5896,7 +6064,7 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> ofproto_meter_id meter_id,
>          return EBADF; /* Unsupported flags set */
>      }
>  
> -    if (config->n_bands > MAX_BANDS) {
> +    if (config->n_bands > METER_BAND_MAX) {
>          return EINVAL;
>      }
>  
> @@ -5917,6 +6085,8 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> ofproto_meter_id meter_id,
>      meter->n_bands = config->n_bands;
>      meter->max_delta_t = 0;
>      meter->used = time_usec();
> +    meter->id = mid;
> +    ovs_mutex_init(&meter->lock);
>  
>      /* set up bands */
>      for (i = 0; i < config->n_bands; ++i) {
> @@ -5942,12 +6112,22 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> ofproto_meter_id meter_id,
>          }
>      }
>  
> -    meter_lock(dp, mid);
> -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> -    dp->meters[mid] = meter;
> -    meter_unlock(dp, mid);
> +    ovs_mutex_lock(&meter_tbl->lock);
> +
> +    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */
> +    err = dp_meter_attach(meter_tbl, meter);
> +    if (err) {
> +        goto unlock_out;
> +    }
> +
> +    ovs_mutex_unlock(&meter_tbl->lock);
>  
>      return 0;
> +
> +unlock_out:
> +    ovs_mutex_unlock(&meter_tbl->lock);
> +    dp_meter_free(meter);
> +    return err;
>  }
>  
>  static int
> @@ -5955,23 +6135,24 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>                        ofproto_meter_id meter_id_,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
> -    const struct dp_netdev *dp = get_dp_netdev(dpif);
We can still keep the 'const' 

> +    struct dp_netdev *dp = get_dp_netdev(dpif);
>      uint32_t meter_id = meter_id_.uint32;
> -    int retval = 0;
> +    const struct dp_meter *meter;
>  
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return EFBIG;
>      }
>  
> -    meter_lock(dp, meter_id);
> -    const struct dp_meter *meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
>      if (!meter) {
> -        retval = ENOENT;
> -        goto done;
> +        return ENOENT;
>      }
> +
>      if (stats) {
>          int i = 0;
>  
> +        ovs_mutex_lock(&meter->lock);
> +
remove extra newline
>          stats->packet_in_count = meter->packet_count;
>          stats->byte_in_count = meter->byte_count;
>  
> @@ -5980,12 +6161,12 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>              stats->bands[i].byte_count = meter->bands[i].byte_count;
>          }
>  
> +        ovs_mutex_unlock(&meter->lock);
> +
>          stats->n_bands = i;
>      }
>  
> -done:
> -    meter_unlock(dp, meter_id);
> -    return retval;
> +    return 0;
>  }
>  
>  static int
> @@ -5994,15 +6175,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
>      int error;
>  
>      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
>      if (!error) {
>          uint32_t meter_id = meter_id_.uint32;
>  
> -        meter_lock(dp, meter_id);
> -        dp_delete_meter(dp, meter_id);
> -        meter_unlock(dp, meter_id);
> +        ovs_mutex_lock(&meter_tbl->lock);
> +        dp_meter_detach_free(meter_tbl, meter_id);
> +        ovs_mutex_unlock(&meter_tbl->lock);
>      }
>      return error;
>  }
> -- 
> 1.8.3.1
> 
Thanks,
William
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to