Thanks for the patch comments inline
On Thu, Aug 1, 2019 at 3:10 PM Yi-Hung Wei <yihung....@gmail.com> wrote: > This patch consumes the CT_Zone and CT_Timeout_Policy tables, maintains > the zone-based timeout policy in the vswitchd. Whenever there is a > database change, vswitchd will read the datapath, CT_Zone, and > CT_Timeout_Policy tables from ovsdb to detect if the is any timeout > policy changes. > > If a new timeout policy is added, it stores the information in > per datapath type internal datapath structure in struct dpif-backer, > and pushes down the conntrack timeout policy into the datapath via dpif > interface. > > If a timeout policy is no longer used, vswitchd may not be able to > remove it from datapath immediately since the datapath flow may still > reference that. Instead, we keep an timeout policy kill list, that > vswitchd will goes back to the list regularly and try to kill the > unused timeout policies. > > Signed-off-by: Yi-Hung Wei <yihung....@gmail.com> > --- > ofproto/ofproto-dpif.c | 266 > +++++++++++++++++++++++++++++++++++++++++++++ > ofproto/ofproto-dpif.h | 10 ++ > ofproto/ofproto-provider.h | 8 ++ > ofproto/ofproto.c | 20 ++++ > ofproto/ofproto.h | 4 + > vswitchd/bridge.c | 41 +++++++ > 6 files changed, 349 insertions(+) > > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index 751535249e21..6336494e0bc8 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -156,6 +156,25 @@ struct ofport_dpif { > size_t n_qdscp; > }; > > +struct ct_timeout_policy { > + struct uuid uuid; > + unsigned int last_used_seqno; > + struct ct_dpif_timeout_policy cdtp; > + struct cmap_node node; /* Element in struct dpif_backer's > + * "ct_tps" cmap. */ > This looks like a layering violation should be in dpif-netlink or netlink-conntrack for kernel side > + struct ovs_list list_node; /* Element in struct dpif_backer's > + * "ct_tp_kill_list" list. */ > +}; > + > +struct ct_zone { > + uint16_t id; > + unsigned int last_used_seqno; > + struct uuid tp_uuid; /* uuid that identifies a timeout > policy in > + * struct dpif_backer's "ct_tps" > cmap. */ > + struct cmap_node node; /* Element in struct dpif_backer's > + * "ct_zones" cmap. */ > +}; > + > static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *, > ofp_port_t); > > @@ -196,6 +215,8 @@ static struct hmap all_ofproto_dpifs_by_uuid = > > static bool ofproto_use_tnl_push_pop = true; > static void ofproto_unixctl_init(void); > +static void destroy_ct_zone_timeout_policy(struct dpif_backer *backer); > +static void init_ct_zone_timeout_policy(struct dpif_backer *backer); > > static inline struct ofproto_dpif * > ofproto_dpif_cast(const struct ofproto *ofproto) > @@ -683,6 +704,7 @@ close_dpif_backer(struct dpif_backer *backer, bool del) > } > dpif_close(backer->dpif); > id_pool_destroy(backer->meter_ids); > + destroy_ct_zone_timeout_policy(backer); > free(backer); > } > > @@ -694,6 +716,8 @@ struct odp_garbage { > > static void check_support(struct dpif_backer *backer); > > +#define MAX_TIMEOUT_POLICY_ID UINT32_MAX > + > static int > open_dpif_backer(const char *type, struct dpif_backer **backerp) > { > @@ -811,6 +835,8 @@ open_dpif_backer(const char *type, struct dpif_backer > **backerp) > backer->meter_ids = NULL; > } > > + init_ct_zone_timeout_policy(backer); > + > /* Make a pristine snapshot of 'support' into 'boottime_support'. > * 'boottime_support' can be checked to prevent 'support' to be > changed > * beyond the datapath capabilities. In case 'support' is changed by > @@ -5086,6 +5112,244 @@ ct_flush(const struct ofproto *ofproto_, const > uint16_t *zone) > ct_dpif_flush(ofproto->backer->dpif, zone, NULL); > } > > +static struct ct_zone * > +ct_zone_lookup(struct cmap *ct_zones, uint16_t zone_id) > +{ > + struct ct_zone *zone; > + > + CMAP_FOR_EACH_WITH_HASH (zone, node, hash_int(zone_id, 0), ct_zones) { > + if (zone->id == zone_id) { > + return zone; > + } > + } > + return NULL; > +} > + > +static struct ct_zone * > +ct_zone_alloc(uint16_t zone_id) > +{ > + struct ct_zone *zone; > + > + zone = xzalloc(sizeof *zone); > + zone->id = zone_id; > + > + return zone; > +} > + > +static void > +ct_zone_remove_and_destroy(struct dpif_backer *backer, struct ct_zone > *zone) > +{ > + cmap_remove(&backer->ct_zones, &zone->node, hash_int(zone->id, 0)); > + ovsrcu_postpone(free, zone); > +} > + > +static struct ct_timeout_policy * > +ct_timeout_policy_lookup(struct cmap *ct_tps, struct uuid *uuid) > +{ > + struct ct_timeout_policy *tp; > + > + CMAP_FOR_EACH_WITH_HASH (tp, node, uuid_hash(uuid), ct_tps) { > + if (uuid_equals(&tp->uuid, uuid)) { > + return tp; > + } > + } > + return NULL; > +} > + > +static struct ct_timeout_policy * > +ct_timeout_policy_alloc(struct ovsrec_ct_timeout_policy *tp_cfg, > + struct id_pool *tp_ids) > +{ > + struct ct_timeout_policy *tp; > + size_t i; > + > + tp = xzalloc(sizeof *tp); > + tp->uuid = tp_cfg->header_.uuid; > + for (i = 0; i < tp_cfg->n_timeouts; i++) { > + ct_dpif_set_timeout_policy_attr_by_name(&tp->cdtp, > + tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]); > + } > + if (!id_pool_alloc_id(tp_ids, &tp->cdtp.id)) { > + VLOG_WARN_RL(&rl, "failed to allocate timeout policy id."); > + free(tp); > + return NULL; > + } > + > + return tp; > +} > + > +static void > +ct_timeout_policy_destroy(struct dpif_backer *backer, > + struct ct_timeout_policy *tp) > +{ > + id_pool_free_id(backer->timeout_policy_ids, tp->cdtp.id); > + ovsrcu_postpone(free, tp); > +} > + > +static bool > +ct_timeout_policy_update(struct ovsrec_ct_timeout_policy *tp_cfg, > + struct ct_timeout_policy *tp) > +{ > + size_t i; > + bool changed = false; > + > + for (i = 0; i < tp_cfg->n_timeouts; i++) { > + changed |= ct_dpif_set_timeout_policy_attr_by_name(&tp->cdtp, > + tp_cfg->key_timeouts[i], > tp_cfg->value_timeouts[i]); > + } > + return changed; > +} > + > +static void > +init_ct_zone_timeout_policy(struct dpif_backer *backer) > +{ > + cmap_init(&backer->ct_zones); > + cmap_init(&backer->ct_tps); > + backer->timeout_policy_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID); > + ovs_list_init(&backer->ct_tp_kill_list); > + > + /* Remove existing timeout policy from datapath. */ > + struct ct_dpif_timeout_policy cdtp; > + struct ct_timeout_policy *tp; > + void *state; > + int err; > don't need 'err' > + > + err = ct_dpif_timeout_policy_dump_start(backer->dpif, &state); > + if (err) { > + return; > + } > + > + while (!(err = ct_dpif_timeout_policy_dump_next(backer->dpif, state, > + &cdtp))) { > + tp = xzalloc(sizeof *tp); > + tp->cdtp = cdtp; > + id_pool_add(backer->timeout_policy_ids, cdtp.id); > + ovs_list_insert(&backer->ct_tp_kill_list, &tp->list_node); > + } > + > + ct_dpif_timeout_policy_dump_done(backer->dpif, state); > +} > + > +static void > +destroy_ct_zone_timeout_policy(struct dpif_backer *backer) > +{ > + struct ct_timeout_policy *tp; > + struct ct_zone *zone; > + > + CMAP_FOR_EACH (zone, node, &backer->ct_zones) { > + ct_zone_remove_and_destroy(backer, zone); > + } > + > + CMAP_FOR_EACH (tp, node, &backer->ct_tps) { > + cmap_remove(&backer->ct_tps, &tp->node, uuid_hash(&tp->uuid)); > + ct_timeout_policy_destroy(backer, tp); > + } > + > + cmap_destroy(&backer->ct_zones); > + cmap_destroy(&backer->ct_tps); > + id_pool_destroy(backer->timeout_policy_ids); > +} > + > +static void > +ct_zone_timeout_policy_revalidate(struct dpif_backer *backer, > + unsigned int idl_seqno) > Looks like layering violation 'implementation' should be in dpif-netlink or netlink-conntrack for kernel side > +{ > + struct ct_timeout_policy *tp; > + struct ct_zone *zone; > + > + CMAP_FOR_EACH (zone, node, &backer->ct_zones) { > + if (zone->last_used_seqno != idl_seqno) { > + ct_zone_remove_and_destroy(backer, zone); > + } > + } > + > + CMAP_FOR_EACH (tp, node, &backer->ct_tps) { > + if (tp->last_used_seqno != idl_seqno) { > + /* Even timeout policy is not referenced by any ct_zone in the > + * database, vswitchd may not be able to delete it right away > + * since the datapath flow may still reference to the timeout > + * policy. Move the timeout policy to ct_tp_kill_list and try > to > + * delete it when possible. */ > + cmap_remove(&backer->ct_tps, &tp->node, uuid_hash(&tp->uuid)); > + ovs_list_push_back(&backer->ct_tp_kill_list, &tp->list_node); > + } > + } > +} > + > +static void > +ct_zone_timeout_policy_sweep(const struct ofproto *ofproto) > Looks like layering violation implementation should be in dpif-netlink or netlink-conntrack for kernel side > +{ > + struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer; > + struct ct_timeout_policy *tp; > + int err; > + > + if (!ovs_list_is_empty(&backer->ct_tp_kill_list)) { > + LIST_FOR_EACH (tp, list_node, &backer->ct_tp_kill_list) { > + err = ct_dpif_del_timeout_policy(backer->dpif, tp->cdtp.id); > + if (!err) { > + ovs_list_remove(&tp->list_node); > + ct_timeout_policy_destroy(backer, tp); > + } else { > + VLOG_INFO_RL(&rl, "Failed to delete timeout policy id = " > + "%"PRIu32" %s", tp->cdtp.id, ovs_strerror(err)); > + } > + } > + } > +} > + > +static void > +ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto, > + const struct ovsrec_datapath *dp_cfg, > + unsigned int idl_seqno) > +{ > + struct dpif_backer *backer = ofproto_dpif_cast(ofproto)->backer; > + struct ovsrec_ct_timeout_policy *tp_cfg; > + struct ovsrec_ct_zone *zone_cfg; > + struct ct_timeout_policy *tp; > + struct ct_zone *zone; > + uint16_t zone_id; > + bool new_zone; > + size_t i; > + > + for (i = 0; i < dp_cfg->n_ct_zones; i++) { > + /* Update ct_zone config. */ > + zone_cfg = dp_cfg->value_ct_zones[i]; > + zone_id = dp_cfg->key_ct_zones[i]; > + zone = ct_zone_lookup(&backer->ct_zones, zone_id); > + if (!zone) { > + new_zone = true; > + zone = ct_zone_alloc(zone_id); > + } else { > + new_zone = false; > + } > + zone->last_used_seqno = idl_seqno; > + > + /* Update timeout policy. */ > + tp_cfg = zone_cfg->timeout_policy; > + tp = ct_timeout_policy_lookup(&backer->ct_tps, > &tp_cfg->header_.uuid); > + if (!tp) { > + tp = ct_timeout_policy_alloc(tp_cfg, > backer->timeout_policy_ids); > + if (tp) { > + cmap_insert(&backer->ct_tps, &tp->node, > uuid_hash(&tp->uuid)); > + ct_dpif_set_timeout_policy(backer->dpif, &tp->cdtp); > + } > + } else { > + if (ct_timeout_policy_update(tp_cfg, tp)) { > + ct_dpif_set_timeout_policy(backer->dpif, &tp->cdtp); > + } > + } > + tp->last_used_seqno = idl_seqno; > + > + /* Link zone with new timeout policy. */ > + zone->tp_uuid = tp_cfg->header_.uuid; > + if (new_zone) { > + cmap_insert(&backer->ct_zones, &zone->node, hash_int(zone_id, > 0)); > + } > + } > + ct_zone_timeout_policy_revalidate(backer, idl_seqno); > + ct_zone_timeout_policy_sweep(ofproto); > +} > + > static bool > set_frag_handling(struct ofproto *ofproto_, > enum ofputil_frag_handling frag_handling) > @@ -6189,4 +6453,6 @@ const struct ofproto_class ofproto_dpif_class = { > get_datapath_version, /* get_datapath_version */ > type_set_config, > ct_flush, /* ct_flush */ > + ct_zone_timeout_policy_reconfig, > + ct_zone_timeout_policy_sweep, > }; > diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h > index cd5321eb942c..170edf0e0e70 100644 > --- a/ofproto/ofproto-dpif.h > +++ b/ofproto/ofproto-dpif.h > @@ -245,6 +245,16 @@ struct dpif_backer { > /* Meter. */ > struct id_pool *meter_ids; /* Datapath meter allocation. */ > > + /* Connection tracking */ > Looks like layering violation should be in dpif-netlink or netlink-conntrack for kernel side userspace datapath could use uuid directly > + struct id_pool *timeout_policy_ids; /* Datapath timeout policy id > + * allocation. */ > > + struct cmap ct_zones; /* "struct ct_zone"s indexed > by > + * zone id. */ > + struct cmap ct_tps; /* "struct ct_timeout_policy"s > + * indexed by uuid. */ > Looks like layering violation should be in dpif-netlink or netlink-conntrack for kernel side + struct ovs_list ct_tp_kill_list; /* a list of timeout policy to > be > + * deleted. */ > > + > /* Version string of the datapath stored in OVSDB. */ > char *dp_version_string; > > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h > index 7907d4bfb416..41e07f0ee23e 100644 > --- a/ofproto/ofproto-provider.h > +++ b/ofproto/ofproto-provider.h > @@ -58,6 +58,7 @@ > #include "tun-metadata.h" > #include "versions.h" > #include "vl-mff-map.h" > +#include "vswitch-idl.h" > > struct match; > struct ofputil_flow_mod; > @@ -1872,6 +1873,13 @@ struct ofproto_class { > /* Flushes the connection tracking tables. If 'zone' is not NULL, > * only deletes connections in '*zone'. */ > void (*ct_flush)(const struct ofproto *, const uint16_t *zone); > + > + /* Reconfigures the zone-based conntrack tiemout policy based on the > + * ovsdb configuration. */ > + void (*ct_zone_timeout_policy_reconfig)(const struct ofproto > *ofproto_, > + const struct ovsrec_datapath *dp_cfg, unsigned int idl_seqno); > + /* Cleans up the to be deleted timeout policy in the pending kill > list. */ > + void (*ct_zone_timeout_policy_sweep)(const struct ofproto *ofproto_); > }; > > extern const struct ofproto_class ofproto_dpif_class; > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c > index 1d6fc00696f8..373b8a4eba0c 100644 > --- a/ofproto/ofproto.c > +++ b/ofproto/ofproto.c > @@ -935,6 +935,26 @@ ofproto_get_flow_restore_wait(void) > return flow_restore_wait; > } > > +/* Connection tracking configuration. */ > +void > +ofproto_ct_zone_timeout_policy_reconfig(const struct ofproto *ofproto, > + const struct ovsrec_datapath > *dp_cfg, > + unsigned int idl_seqno) > +{ > + if (ofproto->ofproto_class->ct_zone_timeout_policy_reconfig) { > + ofproto->ofproto_class->ct_zone_timeout_policy_reconfig( > + ofproto, dp_cfg, idl_seqno); > + } > +} > + > +void > +ofproto_ct_zone_timeout_policy_sweep(const struct ofproto *ofproto) > +{ > + if (ofproto->ofproto_class->ct_zone_timeout_policy_sweep) { > + ofproto->ofproto_class->ct_zone_timeout_policy_sweep(ofproto); > + } > +} > + > > /* Spanning Tree Protocol (STP) configuration. */ > > diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h > index 6e4afffa17e0..2ae42374be36 100644 > --- a/ofproto/ofproto.h > +++ b/ofproto/ofproto.h > @@ -52,6 +52,7 @@ struct ovs_list; > struct lldp_status; > struct aa_settings; > struct aa_mapping_settings; > +struct ovsrec_datapath; > > /* Needed for the lock annotations. */ > extern struct ovs_mutex ofproto_mutex; > @@ -362,6 +363,9 @@ int ofproto_get_stp_status(struct ofproto *, struct > ofproto_stp_status *); > int ofproto_set_rstp(struct ofproto *, const struct ofproto_rstp_settings > *); > int ofproto_get_rstp_status(struct ofproto *, struct ofproto_rstp_status > *); > void ofproto_set_vlan_limit(int vlan_limit); > +void ofproto_ct_zone_timeout_policy_reconfig(const struct ofproto > *ofproto, > + const struct ovsrec_datapath *dp_cfg, unsigned int idl_seqno); > +void ofproto_ct_zone_timeout_policy_sweep(const struct ofproto *ofproto); > > /* Configuration of ports. */ > void ofproto_port_unregister(struct ofproto *, ofp_port_t ofp_port); > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index 2976771aeaba..39b7de7d8182 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -220,6 +220,11 @@ static long long int stats_timer = LLONG_MIN; > #define AA_REFRESH_INTERVAL (1000) /* In milliseconds. */ > static long long int aa_refresh_timer = LLONG_MIN; > > +/* Each time this timer expires, the bridge tries to delete the timeout > + * policies in the kill list. */ > +#define CT_ZONE_TIMEOUT_POLICY_TIMER_INTERVAL (5000) /* In milliseconds. > */ > +static long long int ct_zone_timeout_policy_timer = LLONG_MIN; > + > /* Whenever system interfaces are added, removed or change state, the > bridge > * will be reconfigured. > */ > @@ -588,6 +593,40 @@ config_ofproto_types(const struct smap *other_config) > } > > static void > +reconfigure_conntrack_timeout_policy(const struct ovsrec_open_vswitch > *cfg) > +{ > + struct bridge *br; > + int i; > + > + for (i = 0; i < cfg->n_datapaths; i++) { > + const struct ovsrec_datapath *dp_cfg = cfg->value_datapaths[i]; > + char *key = cfg->key_datapaths[i]; > + > + HMAP_FOR_EACH (br, node, &all_bridges) { > + if (!strcmp(br->type, key)) { > + ofproto_ct_zone_timeout_policy_reconfig(br->ofproto, > dp_cfg, > + idl_seqno); > + } > + break; > + } > + } > +} > + > +static void > +run_ct_zone_timeout_policy_sweep(void) > +{ > + struct bridge *br; > + > + if (time_msec() >= ct_zone_timeout_policy_timer) { > + HMAP_FOR_EACH (br, node, &all_bridges) { > + ofproto_ct_zone_timeout_policy_sweep(br->ofproto); > + } > + ct_zone_timeout_policy_timer = time_msec() + > + > CT_ZONE_TIMEOUT_POLICY_TIMER_INTERVAL; > + } > +} > + > +static void > bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) > { > struct sockaddr_in *managers; > @@ -669,6 +708,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch > *ovs_cfg) > } > > reconfigure_system_stats(ovs_cfg); > + reconfigure_conntrack_timeout_policy(ovs_cfg); > > /* Complete the configuration. */ > sflow_bridge_number = 0; > @@ -3082,6 +3122,7 @@ bridge_run(void) > run_stats_update(); > run_status_update(); > run_system_stats(); > + run_ct_zone_timeout_policy_sweep(); > } > > void > -- > 2.7.4 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev