On Mon, Mar 16, 2020 at 12:06 PM Tonghao Zhang <xiangxia.m....@gmail.com> wrote:
>
> On Mon, Mar 16, 2020 at 11:07 AM Yanqin Wei <yanqin....@arm.com> wrote:
> >
> > Hi Xiangxia,
> >
> > The meter id is allocated by id_pool,  which can always return the lowest 
> > available id.
> Yes, I added 70000+ meters, and it works fine.
> > There is a light scalable data struct which supports direct address lookup. 
> > It can achieve several times performance improvement than cmap_find. And it 
> > has not copy memory when expanding.
> > https://patchwork.ozlabs.org/patch/1253447/
> >
> > I suggest using this light data struct for >65535 meter instance. Would you 
> > like to take a look at it?
> Yes, and I also wait Ilya to review, and offer a suggestion.
Hi maintainers,
Will you have a plan to help me to review those patches, thanks.

http://patchwork.ozlabs.org/patch/1254968/
http://patchwork.ozlabs.org/patch/1254969/
http://patchwork.ozlabs.org/patch/1255408/

> > Best Regards,
> > Wei Yanqin
> >
> > > -----Original Message-----
> > > From: dev <ovs-dev-boun...@openvswitch.org> On Behalf Of
> > > xiangxia.m....@gmail.com
> > > Sent: Sunday, March 15, 2020 2:43 PM
> > > To: d...@openvswitch.org
> > > Cc: Ilya Maximets <i.maxim...@ovn.org>
> > > Subject: [ovs-dev] [PATCH v2 1/2] dpif-netdev: Expand the meter capacity
> > > using cmap
> > >
> > > From: Tonghao Zhang <xiangxia.m....@gmail.com>
> > >
> > > For now, ovs-vswitchd use the array of the dp_meter struct to store 
> > > meter's
> > > data, and at most, there are only 65536 (defined by MAX_METERS) meters
> > > that can be used. But in some case, for example, in the edge gateway, we
> > > should use 200,000+, at least, meters for IP address bandwidth limitation.
> > > Every one IP address will use two meters for its rx and tx path[1]. In 
> > > other way,
> > > ovs-vswitchd should support meter-offload (rte_mtr_xxx api introduced by
> > > dpdk.), but there are more than
> > > 65536 meters in the hardware, such as Mellanox ConnectX-6.
> > >
> > > This patch use cmap to manage the meter, instead of the array.
> > >
> > > * Insertion performance, ovs-ofctl add-meter 1000+ meters,
> > >   the cmap takes abount 4000ms, as same as previous implementation.
> > > * Lookup performance in datapath, we add 1000+ meter which rate is
> > >   10G (the NIC cards are 10Gb, so netdev-datapath will not drop the
> > >   packets.), and a flow which only forwarding the packets from p0
> > >   to p1, with meter action[2]. On other server, the pktgen-dpdk
> > >   will generate 64B packets to p0.
> > >   The forwarding performance is 4,814,400 pps. Without this path,
> > >   4,935,584 pps. There are about 1% performance loss. For addressing
> > >   this issue, next patch add a meter cache.
> > >
> > > [1].
> > > $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> > > $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
> > >
> > > [2].
> > > $ in_port=p0 action=meter:100,output:p1
> > >
> > > Cc: Ben Pfaff <b...@ovn.org>
> > > Cc: Jarno Rajahalme <ja...@ovn.org>
> > > Cc: Ilya Maximets <i.maxim...@ovn.org>
> > > Cc: Andy Zhou <az...@ovn.org>
> > > Signed-off-by: Tonghao Zhang <xiangxia.m....@gmail.com>
> > > ---
> > >  lib/dpif-netdev.c | 199 
> > > +++++++++++++++++++++++++++++++++++-----------------
> > > --
> > >  1 file changed, 130 insertions(+), 69 deletions(-)
> > >
> > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index d393aab..5474d52
> > > 100644
> > > --- a/lib/dpif-netdev.c
> > > +++ b/lib/dpif-netdev.c
> > > @@ -98,9 +98,11 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t,
> > > recirc_depth, 0)
> > >
> > >  /* Configuration parameters. */
> > >  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table.
> > > */
> > > -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> > > -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> > > -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> > > +
> > > +/* Maximum number of meters in the table. */ #define METER_ENTRY_MAX
> > > (1
> > > +<< 19)
> > > +/* Maximum number of bands / meter. */
> > > +#define METER_BAND_MAX (8)
> > >
> > >  COVERAGE_DEFINE(datapath_drop_meter);
> > >  COVERAGE_DEFINE(datapath_drop_upcall_error);
> > > @@ -280,6 +282,9 @@ struct dp_meter_band {  };
> > >
> > >  struct dp_meter {
> > > +    struct cmap_node node;
> > > +    struct ovs_mutex lock;
> > > +    uint32_t id;
> > >      uint16_t flags;
> > >      uint16_t n_bands;
> > >      uint32_t max_delta_t;
> > > @@ -289,6 +294,12 @@ struct dp_meter {
> > >      struct dp_meter_band bands[];
> > >  };
> > >
> > > +struct dp_netdev_meter {
> > > +    struct cmap table OVS_GUARDED;
> > > +    struct ovs_mutex lock;  /* Used for meter table. */
> > > +    uint32_t hash_basis;
> > > +};
> > > +
> > >  struct pmd_auto_lb {
> > >      bool auto_lb_requested;     /* Auto load balancing requested by 
> > > user. */
> > >      bool is_enabled;            /* Current status of Auto load 
> > > balancing. */
> > > @@ -329,8 +340,7 @@ struct dp_netdev {
> > >      atomic_uint32_t tx_flush_interval;
> > >
> > >      /* Meters. */
> > > -    struct ovs_mutex meter_locks[N_METER_LOCKS];
> > > -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> > > +    struct dp_netdev_meter *meter;
> > >
> > >      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
> > >      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> > > @@ -378,19 +388,6 @@ struct dp_netdev {
> > >      struct pmd_auto_lb pmd_alb;
> > >  };
> > >
> > > -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> > > -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > > -{
> > > -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > > -}
> > > -
> > > -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> > > -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > > -{
> > > -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > > -}
> > > -
> > > -
> > >  static struct dp_netdev_port *dp_netdev_lookup_port(const struct
> > > dp_netdev *dp,
> > >                                                      odp_port_t)
> > >      OVS_REQUIRES(dp->port_mutex);
> > > @@ -1523,6 +1520,84 @@ choose_port(struct dp_netdev *dp, const char
> > > *name)
> > >      return ODPP_NONE;
> > >  }
> > >
> > > +static uint32_t
> > > +dp_meter_hash(uint32_t meter_id, uint32_t basis) {
> > > +    return hash_bytes32(&meter_id, sizeof meter_id, basis); }
> > > +
> > > +static void
> > > +dp_netdev_meter_init(struct dp_netdev *dp) {
> > > +    struct dp_netdev_meter *dp_meter;
> > > +
> > > +    dp_meter = xmalloc(sizeof *dp_meter);
> > > +
> > > +    cmap_init(&dp_meter->table);
> > > +    ovs_mutex_init(&dp_meter->lock);
> > > +    dp_meter->hash_basis = random_uint32();
> > > +
> > > +    dp->meter = dp_meter;
> > > +}
> > > +
> > > +static void
> > > +dp_netdev_meter_destroy(struct dp_netdev *dp) {
> > > +    struct dp_netdev_meter *dp_meter = dp->meter;
> > > +    struct dp_meter *meter;
> > > +
> > > +    ovs_mutex_lock(&dp_meter->lock);
> > > +    CMAP_FOR_EACH (meter, node, &dp_meter->table) {
> > > +            cmap_remove(&dp_meter->table, &meter->node,
> > > +                        dp_meter_hash(meter->id,
> > > + dp_meter->hash_basis));
> > > +
> > > +            ovsrcu_postpone(free, meter);
> > > +    }
> > > +
> > > +    cmap_destroy(&dp_meter->table);
> > > +    ovs_mutex_unlock(&dp_meter->lock);
> > > +
> > > +    ovs_mutex_destroy(&dp_meter->lock);
> > > +    free(dp_meter);
> > > +}
> > > +
> > > +static struct dp_meter*
> > > +dp_meter_lookup(struct dp_netdev_meter *dp_meter, uint32_t meter_id) {
> > > +    uint32_t hash = dp_meter_hash(meter_id, dp_meter->hash_basis);
> > > +    struct dp_meter *meter;
> > > +
> > > +    CMAP_FOR_EACH_WITH_HASH (meter, node, hash, &dp_meter->table) {
> > > +        if (meter->id == meter_id) {
> > > +            return meter;
> > > +        }
> > > +    }
> > > +
> > > +    return NULL;
> > > +}
> > > +
> > > +static void
> > > +dp_meter_detach_free(struct dp_netdev_meter *dp_meter, uint32_t
> > > meter_id)
> > > +                     OVS_REQUIRES(dp_meter->lock) {
> > > +    struct dp_meter *meter;
> > > +
> > > +    meter = dp_meter_lookup(dp_meter, meter_id);
> > > +    if (meter) {
> > > +            cmap_remove(&dp_meter->table, &meter->node,
> > > +                        dp_meter_hash(meter_id, dp_meter->hash_basis));
> > > +            ovsrcu_postpone(free, meter);
> > > +    }
> > > +}
> > > +
> > > +static void
> > > +dp_meter_attach(struct dp_netdev_meter *dp_meter, struct dp_meter
> > > *meter)
> > > +                OVS_REQUIRES(dp_meter->lock) {
> > > +    cmap_insert(&dp_meter->table, &meter->node,
> > > +                dp_meter_hash(meter->id, dp_meter->hash_basis)); }
> > > +
> > >  static int
> > >  create_dp_netdev(const char *name, const struct dpif_class *class,
> > >                   struct dp_netdev **dpp) @@ -1556,9 +1631,8 @@
> > > create_dp_netdev(const char *name, const struct dpif_class *class,
> > >      dp->reconfigure_seq = seq_create();
> > >      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
> > >
> > > -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> > > -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> > > -    }
> > > +    /* Init meter resource. */
> > > +    dp_netdev_meter_init(dp);
> > >
> > >      /* Disable upcalls by default. */
> > >      dp_netdev_disable_upcall(dp);
> > > @@ -1647,16 +1721,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev
> > > *dp)
> > >      fat_rwlock_destroy(&dp->upcall_rwlock);
> > >  }
> > >
> > > -static void
> > > -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> > > -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > > -{
> > > -    if (dp->meters[meter_id]) {
> > > -        free(dp->meters[meter_id]);
> > > -        dp->meters[meter_id] = NULL;
> > > -    }
> > > -}
> > > -
> > >  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
> > >   * through the 'dp_netdevs' shash while freeing 'dp'. */  static void @@ 
> > > -
> > > 1694,16 +1758,7 @@ dp_netdev_free(struct dp_netdev *dp)
> > >      /* Upcalls must be disabled at this point */
> > >      dp_netdev_destroy_upcall_lock(dp);
> > >
> > > -    int i;
> > > -
> > > -    for (i = 0; i < MAX_METERS; ++i) {
> > > -        meter_lock(dp, i);
> > > -        dp_delete_meter(dp, i);
> > > -        meter_unlock(dp, i);
> > > -    }
> > > -    for (i = 0; i < N_METER_LOCKS; ++i) {
> > > -        ovs_mutex_destroy(&dp->meter_locks[i]);
> > > -    }
> > > +    dp_netdev_meter_destroy(dp);
> > >
> > >      free(dp->pmd_cmask);
> > >      free(CONST_CAST(char *, dp->name)); @@ -5708,10 +5763,10 @@ static
> > > void  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
> > >                                 struct ofputil_meter_features *features)  
> > > {
> > > -    features->max_meters = MAX_METERS;
> > > +    features->max_meters = METER_ENTRY_MAX;
> > >      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
> > >      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> > > -    features->max_bands = MAX_BANDS;
> > > +    features->max_bands = METER_BAND_MAX;
> > >      features->max_color = 0;
> > >  }
> > >
> > > @@ -5732,14 +5787,13 @@ dp_netdev_run_meter(struct dp_netdev *dp,
> > > struct dp_packet_batch *packets_,
> > >      uint32_t exceeded_rate[NETDEV_MAX_BURST];
> > >      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
> > >
> > > -    if (meter_id >= MAX_METERS) {
> > > +    if (meter_id >= METER_ENTRY_MAX) {
> > >          return;
> > >      }
> > >
> > > -    meter_lock(dp, meter_id);
> > > -    meter = dp->meters[meter_id];
> > > +    meter = dp_meter_lookup(dp->meter, meter_id);
> > >      if (!meter) {
> > > -        goto out;
> > > +        return;
> > >      }
> > >
> > >      /* Initialize as negative values. */ @@ -5747,6 +5801,7 @@
> > > dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch
> > > *packets_,
> > >      /* Initialize as zeroes. */
> > >      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
> > >
> > > +    ovs_mutex_lock(&meter->lock);
> > >      /* All packets will hit the meter at the same time. */
> > >      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
> > >
> > > @@ -5860,8 +5915,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct
> > > dp_packet_batch *packets_,
> > >              dp_packet_batch_refill(packets_, packet, j);
> > >          }
> > >      }
> > > - out:
> > > -    meter_unlock(dp, meter_id);
> > > +
> > > +    ovs_mutex_unlock(&meter->lock);
> > >  }
> > >
> > >  /* Meter set/get/del processing is still single-threaded. */ @@ -5870,11
> > > +5925,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id
> > > meter_id,
> > >                        struct ofputil_meter_config *config)  {
> > >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > > +    struct dp_netdev_meter *dp_meter = dp->meter;
> > >      uint32_t mid = meter_id.uint32;
> > >      struct dp_meter *meter;
> > >      int i;
> > >
> > > -    if (mid >= MAX_METERS) {
> > > +    if (mid >= METER_ENTRY_MAX) {
> > >          return EFBIG; /* Meter_id out of range. */
> > >      }
> > >
> > > @@ -5882,7 +5938,7 @@ dpif_netdev_meter_set(struct dpif *dpif,
> > > ofproto_meter_id meter_id,
> > >          return EBADF; /* Unsupported flags set */
> > >      }
> > >
> > > -    if (config->n_bands > MAX_BANDS) {
> > > +    if (config->n_bands > METER_BAND_MAX) {
> > >          return EINVAL;
> > >      }
> > >
> > > @@ -5903,6 +5959,8 @@ dpif_netdev_meter_set(struct dpif *dpif,
> > > ofproto_meter_id meter_id,
> > >      meter->n_bands = config->n_bands;
> > >      meter->max_delta_t = 0;
> > >      meter->used = time_usec();
> > > +    meter->id = mid;
> > > +    ovs_mutex_init(&meter->lock);
> > >
> > >      /* set up bands */
> > >      for (i = 0; i < config->n_bands; ++i) { @@ -5928,10 +5986,12 @@
> > > dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> > >          }
> > >      }
> > >
> > > -    meter_lock(dp, mid);
> > > -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> > > -    dp->meters[mid] = meter;
> > > -    meter_unlock(dp, mid);
> > > +    ovs_mutex_lock(&dp_meter->lock);
> > > +
> > > +    dp_meter_detach_free(dp_meter, mid); /* Free existing meter, if any 
> > > */
> > > +    dp_meter_attach(dp_meter, meter);
> > > +
> > > +    ovs_mutex_unlock(&dp_meter->lock);
> > >
> > >      return 0;
> > >  }
> > > @@ -5941,22 +6001,23 @@ dpif_netdev_meter_get(const struct dpif *dpif,
> > >                        ofproto_meter_id meter_id_,
> > >                        struct ofputil_meter_stats *stats, uint16_t 
> > > n_bands)  {
> > > -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> > > +    struct dp_netdev *dp = get_dp_netdev(dpif);
> > >      uint32_t meter_id = meter_id_.uint32;
> > > -    int retval = 0;
> > > +    const struct dp_meter *meter;
> > >
> > > -    if (meter_id >= MAX_METERS) {
> > > +    if (meter_id >= METER_ENTRY_MAX) {
> > >          return EFBIG;
> > >      }
> > >
> > > -    meter_lock(dp, meter_id);
> > > -    const struct dp_meter *meter = dp->meters[meter_id];
> > > +    meter = dp_meter_lookup(dp->meter, meter_id);
> > >      if (!meter) {
> > > -        retval = ENOENT;
> > > -        goto done;
> > > +        return ENOENT;
> > >      }
> > > +
> > >      if (stats) {
> > > -        int i = 0;
> > > +        int i;
> > > +
> > > +        ovs_mutex_lock(&meter->lock);
> > >
> > >          stats->packet_in_count = meter->packet_count;
> > >          stats->byte_in_count = meter->byte_count; @@ -5966,12 +6027,11 @@
> > > dpif_netdev_meter_get(const struct dpif *dpif,
> > >              stats->bands[i].byte_count = meter->bands[i].byte_count;
> > >          }
> > >
> > > +        ovs_mutex_unlock(&meter->lock);
> > >          stats->n_bands = i;
> > >      }
> > >
> > > -done:
> > > -    meter_unlock(dp, meter_id);
> > > -    return retval;
> > > +    return 0;
> > >  }
> > >
> > >  static int
> > > @@ -5980,15 +6040,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
> > >                        struct ofputil_meter_stats *stats, uint16_t 
> > > n_bands)  {
> > >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > > +    struct dp_netdev_meter *dp_meter = dp->meter;
> > >      int error;
> > >
> > >      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
> > >      if (!error) {
> > >          uint32_t meter_id = meter_id_.uint32;
> > >
> > > -        meter_lock(dp, meter_id);
> > > -        dp_delete_meter(dp, meter_id);
> > > -        meter_unlock(dp, meter_id);
> > > +        ovs_mutex_lock(&dp_meter->lock);
> > > +        dp_meter_detach_free(dp_meter, meter_id);
> > > +        ovs_mutex_unlock(&dp_meter->lock);
> > >      }
> > >      return error;
> > >  }
> > > --
> > > 1.8.3.1
> > >
> > > _______________________________________________
> > > dev mailing list
> > > d...@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > IMPORTANT NOTICE: The contents of this email and any attachments are 
> > confidential and may also be privileged. If you are not the intended 
> > recipient, please notify the sender immediately and do not disclose the 
> > contents to any other person, use it for any purpose, or store or copy the 
> > information in any medium. Thank you.
>
>
>
> --
> Thanks,
> Tonghao



-- 
Best regards, Tonghao
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to