On Mon, Oct 28, 2019 at 5:07 PM Mark Michelson <mmich...@redhat.com> wrote: > > On 10/28/19 10:37 AM, Dumitru Ceara wrote: > > On Mon, Oct 28, 2019 at 1:46 PM Mark Michelson <mmich...@redhat.com> wrote: > >> > >> On 10/28/19 7:46 AM, Dumitru Ceara wrote: > >>> On Fri, Oct 25, 2019 at 11:07 PM Mark Michelson <mmich...@redhat.com> > >>> wrote: > >>>> > >>>> There is a maximum number of resubmits that can occur during packet > >>>> processing. Deliberately creating a flow table that might cross this > >>>> threshold is irresponsible. > >>>> > >>>> This commit causes the ofctrl code to track the maximum width and depth > >>>> of conjunctions in the desired flow table. By multiplying them, we have > >>>> a possible worst case for number of resubmits required. If this number > >>>> exceeds a quarter of the maximum resubmits allowed, then we fall back to > >>>> forcing crossproducts. > >>>> > >>>> The resulting flow table can end up being a mixture of conjunction and > >>>> non-conjunction flows if the limit is exceeded. > >>>> > >>>> Signed-off-by: Mark Michelson <mmich...@redhat.com> > >>> > >>> Hi Mark, > >>> > >>> I've been testing the series on a setup with: > >>> - 100 logical routers connected to a logical gateway router > >>> - 100 logical switches (each connected to a logical router) > >>> - 200 VIFs (2 per logical switch) > >>> - 2 NAT rules on each router > >>> > >>> I bound all VIFs to OVS internal interfaces on the machine when > >>> ovn-northd is running. > >>> > >>> If I start ovn-controller on the same node, without your series I see > >>> ovn-controller taking more than 100s for a single flow computation > >>> run. > >>> > >>> If I apply your series, the maximum duration of a flow computation run > >>> in the same scenario drops to ~4s. > >>> > >>> This is really nice, however, I see some issues with flow removal (see > >>> below). > >> > >> Thanks for the test, Dumitru. > >> > >>> > >>>> --- > >>>> controller/lflow.c | 11 +++++++++ > >>>> controller/ofctrl.c | 69 > >>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++ > >>>> controller/ofctrl.h | 6 +++++ > >>>> include/ovn/expr.h | 2 ++ > >>>> lib/expr.c | 6 +++++ > >>>> 5 files changed, 94 insertions(+) > >>>> > >>>> diff --git a/controller/lflow.c b/controller/lflow.c > >>>> index 34b7c36a6..318066465 100644 > >>>> --- a/controller/lflow.c > >>>> +++ b/controller/lflow.c > >>>> @@ -37,6 +37,13 @@ > >>>> VLOG_DEFINE_THIS_MODULE(lflow); > >>>> > >>>> COVERAGE_DEFINE(lflow_run); > >>>> + > >>>> +/* Limit maximum conjunctions to a quarter of the max > >>>> + * resubmits. Unfortunatley, max resubmits is not publicly > >>>> + * defined in a header file, so if max resubmits is changed > >>>> + * from 4096, we have to make sure to update this as well > >>>> + */ > >>>> +#define MAX_CONJUNCTIONS (4096 / 4) > >>>> > >>>> /* Symbol table. */ > >>>> > >>>> @@ -602,6 +609,10 @@ consider_logical_flow( > >>>> struct expr *prereqs; > >>>> char *error; > >>>> > >>>> + uint32_t conj_worst_case; > >>>> + conj_worst_case = flow_table->max_conj_width * > >>>> flow_table->max_conj_depth; > >>>> + expr_set_force_crossproduct(conj_worst_case >= MAX_CONJUNCTIONS); > >>>> + > >>>> error = ovnacts_parse_string(lflow->actions, &pp, &ovnacts, > >>>> &prereqs); > >>>> if (error) { > >>>> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > >>>> diff --git a/controller/ofctrl.c b/controller/ofctrl.c > >>>> index afb0036f1..2b2fde3c9 100644 > >>>> --- a/controller/ofctrl.c > >>>> +++ b/controller/ofctrl.c > >>>> @@ -686,6 +686,64 @@ ofctrl_check_and_add_flow(struct > >>>> ovn_desired_flow_table *flow_table, > >>>> f->uuid_hindex_node.hash); > >>>> } > >>>> > >>>> +static void > >>>> +get_conjunction_dimensions(const struct ovn_flow *f, uint32_t > >>>> *conj_width_p, > >>>> + uint32_t *conj_depth_p) > >>>> +{ > >>>> + struct ofpact *ofpact; > >>>> + uint32_t conj_width = 0; > >>>> + uint32_t conj_depth = 0; > >>>> + OFPACT_FOR_EACH (ofpact, f->ofpacts, f->ofpacts_len) { > >>>> + if (ofpact->type == OFPACT_CONJUNCTION) { > >>>> + struct ofpact_conjunction *conj = > >>>> ofpact_get_CONJUNCTION(ofpact); > >>>> + if (conj->n_clauses > conj_depth) { > >>>> + conj_depth = conj->n_clauses; > >>>> + } > >>>> + conj_width++; > >>>> + } > >>>> + } > >>>> + > >>>> + *conj_width_p = conj_width; > >>>> + *conj_depth_p = conj_depth; > >>>> +} > >>>> + > >>>> +static void > >>>> +adjust_conjunction_count(struct ovn_desired_flow_table *desired_flows, > >>>> + const struct ovn_flow *f, bool add) > >>>> +{ > >>>> + uint32_t conj_width; > >>>> + uint32_t conj_depth; > >>>> + > >>>> + get_conjunction_dimensions(f, &conj_width, &conj_depth); > >>>> + > >>>> + if (add) { > >>>> + if (conj_width > desired_flows->max_conj_width) { > >>>> + desired_flows->max_conj_width = conj_width; > >>>> + } > >>>> + if (conj_depth > desired_flows->max_conj_depth) { > >>>> + desired_flows->max_conj_depth = conj_depth; > >>>> + } > >>>> + } else if (desired_flows->max_conj_width <= conj_width || > >>>> + desired_flows->max_conj_depth <= conj_depth) { > >>>> + /* Removing either the widest or deepest conjunction. > >>>> + * Walk the table and recalculate maximums > >>>> + */ > >>>> + struct ovn_flow *iter; > >>>> + desired_flows->max_conj_width = 0; > >>>> + desired_flows->max_conj_depth = 0; > >>>> + HMAP_FOR_EACH (iter, match_hmap_node, > >>>> + &desired_flows->match_flow_table) { > >>>> + get_conjunction_dimensions(iter, &conj_width, &conj_depth); > >>>> + if (conj_width > desired_flows->max_conj_width) { > >>>> + desired_flows->max_conj_width = conj_width; > >>>> + } > >>>> + if (conj_depth > desired_flows->max_conj_depth) { > >>>> + desired_flows->max_conj_depth = conj_depth; > >>>> + } > >>>> + } > >>> > >>> I think this is quite expensive because we change single flow removal > >>> complexity from amortized O(1) to O(n). > >>> > >>> On the same setup as above if I continue testing and remove all > >>> logical switches from the NB database it will trigger all flows to be > >>> removed. > >>> > >>> In this scenario, with your series I see that two flow computation > >>> runs take up to 17s. > >>> On the other hand, if I do the deletion test without applying your > >>> third patch, flow computation runs never take longer than 5s. > >>> > >>> I agree that it's good to limit the number of conjunctive flows but I > >>> think we need to find a better way to track the max_conj_width/depth. > >>> I guess we could track max flow conj_width/depth by storing ovn_flows > >>> in a max-heap but that would change both flow add and remove > >>> operations' complexities to O(log(n)). > >>> > >>> Or do you think we can have a different heuristic for disabling > >>> conjunctive flows? > >> > >> Short version: we probably should just remove the limitation. > >> > >> Long version: > >> > >> My calculation method could stand to be improved. > >> > >> 1) The max_width * max_depth calculation assumes that the flow with the > >> max_width correlates with the conjunction with max depth. This may not > >> be the case. > >> 2) The max_width * max_depth calculation only accounts for one possible > >> conjunction's contribution to the resubmit count. If there are multiple > >> conjunctive match flows, we don't account for that. > >> 3) As you found, removing a flow requires an expensive traversal of the > >> desired flows table. There could be some ways to mitigate this, but no > >> matter what, the O(1) algorithm is toast. > >> 4) I more-or-less guessed what the upper bound should be before setting > >> force_crossproduct. > >> > >> So given all that, the proper things to do would be to either remove the > >> limitation checks or come up with a different calculation. > >> > >> When it comes to calculations, we can either perform a running > >> calculation (like I've done), or we can do a once-per-loop estimation > >> based on configuration. > > > > An alternative would be to do the current check periodically instead > > of at every flow addition/deletion. On the long run that would keep > > complexity to O(1) as we just walk the whole hashtable one more time > > on a timer. The disadvantage is that we might need to reinstall the > > existing conjunctive flows once we determine that the max_depth/width > > was reached. > > My main concern with doing a periodic recalculation is similar to what I > brought up with doing an all-at-once calculation on each loop. It's > difficult to create a hybrid flow table that has both conjunctions and > crossproducts. If you find that there are too many conjunctions during > the calculation, I'm not sure how you can install only a certain number > of conjunctions. I think it becomes all or nothing.
Actually, what I meant was that if on the periodic htable run we figure out that the conjunction limit was reached in either direction we could call expr_set_force_crossproduct(true/false), then trigger a full recompute of the incremental processing engine (engine_set_force_recompute(true)) and poll_immediate_wake(). We might need some hysteresis to avoid rapid changes between crossproduct and conjunction modes when we're close to the limit. Also some logs to inform the user that we switched to/from the "crossproduct mode" might be useful. Wouldn't that work? > > One idea I had is that we could only re-calculate on flow removal if the > current worst case for conjunctions is above our upper bound. If we > don't need to install crossproducts, and our worst case just got better, > then we don't need to spend the time recalculating. We obviously can > just use our previously calculated upper bound. The issue here is that > we might end up with a false positive for our worst case if we go a long > time without a full recalculation. So one way to avoid this might be to > recalculate on every, say, 10th removal no matter what. This counter > would reset each time we recalculate the worst case for conjunctions, > and it would reset each time the incremental engine does a full > recompute of the flow table. > > What do you think? > > > > > >> > >> The running calculation has the advantage of potentially being more > >> accurate. It also allows for us to enable force_crossproduct partway > >> through an ovn-controller loop. We can have some conjunctions, and some > >> crossproducts in the resulting flow table. The problem is that this > >> requires extra CPU time in order to keep track of the current > >> conjunction count. The amount can be better than what I've put forth, > >> but it's still going to be a downgrade from what it currently is. > >> > >> The one-time calculation might work by scanning the configured ACLs, > >> Address_Sets, and Port_Groups to make an educated guess at the worst > >> case for flow traversal. Unless we're willing to do some early parsing > >> of ACLs, this likely would be only a rough estimate for the worst case. > >> If we calculate that the number of potential conjunctions would exceed > >> our upper bound, then we likely would need to completely disable > >> conjunctions when calculating flows. > >> > >> The problem of the resubmit count is hypothetical. It's hard to know how > >> likely it is for real systems to exceed the max resubmit count. It's > >> also hard to estimate what the proper upper bound for conjunctions > >> should be. I think that it would take an extreme network hitting the > >> absolute worst cases to hit the resubmit limit. I think it's safe to > >> continue without the limitations. If we ever get reports about max > >> resubmit issues due to conjunctions, we can examine the individual > >> scenarios and try to formulate some good heuristics. > > > > This is also an option. And it's basically what OVN had before > > conjunctions were disabled. > > It's slightly different, though, since we risk more resubmits by having > multiple conjunction actions in the same flow. > > I'm leaning towards doing this for this patch series. It may be worth > submitting the limitation calculation as a separate patch series since > it'll be super nice to get conjunctions reintroduced. > > > > > Thanks, > > Dumitru > > > >> > >>> > >>> Thanks, > >>> Dumitru > >>> > >> > >> > >> > >>>> + } > >>>> +} > >>>> + > >>>> /* Removes a bundles of flows from the flow table. */ > >>>> void > >>>> ofctrl_remove_flows(struct ovn_desired_flow_table *flow_table, > >>>> @@ -697,6 +755,7 @@ ofctrl_remove_flows(struct ovn_desired_flow_table > >>>> *flow_table, > >>>> &flow_table->uuid_flow_table) { > >>>> if (uuid_equals(&f->sb_uuid, sb_uuid)) { > >>>> ovn_flow_log(f, "ofctrl_remove_flow"); > >>>> + adjust_conjunction_count(flow_table, f, false); > >>>> hmap_remove(&flow_table->match_flow_table, > >>>> &f->match_hmap_node); > >>>> hindex_remove(&flow_table->uuid_flow_table, > >>>> &f->uuid_hindex_node); > >>>> @@ -747,6 +806,12 @@ ofctrl_add_or_append_flow(struct > >>>> ovn_desired_flow_table *desired_flows, > >>>> existing->ofpacts = xmemdup(compound.data, compound.size); > >>>> existing->ofpacts_len = compound.size; > >>>> > >>>> + /* It's a bit of a cheat that we only call > >>>> adjust_conjunction_count in > >>>> + * this function and not in ofctrl_add(). However, we know that > >>>> + * conjunctions are only ever added with this function > >>>> + */ > >>>> + adjust_conjunction_count(desired_flows, existing, true); > >>>> + > >>>> ovn_flow_destroy(f); > >>>> } else { > >>>> hmap_insert(&desired_flows->match_flow_table, > >>>> &f->match_hmap_node, > >>>> @@ -862,6 +927,8 @@ ovn_desired_flow_table_init(struct > >>>> ovn_desired_flow_table *flow_table) > >>>> { > >>>> hmap_init(&flow_table->match_flow_table); > >>>> hindex_init(&flow_table->uuid_flow_table); > >>>> + flow_table->max_conj_width = 0; > >>>> + flow_table->max_conj_depth = 0; > >>>> } > >>>> > >>>> void > >>>> @@ -874,6 +941,8 @@ ovn_desired_flow_table_clear(struct > >>>> ovn_desired_flow_table *flow_table) > >>>> hindex_remove(&flow_table->uuid_flow_table, > >>>> &f->uuid_hindex_node); > >>>> ovn_flow_destroy(f); > >>>> } > >>>> + flow_table->max_conj_width = 0; > >>>> + flow_table->max_conj_depth = 0; > >>>> } > >>>> > >>>> void > >>>> diff --git a/controller/ofctrl.h b/controller/ofctrl.h > >>>> index 21d2ce648..c295c04ed 100644 > >>>> --- a/controller/ofctrl.h > >>>> +++ b/controller/ofctrl.h > >>>> @@ -37,6 +37,12 @@ struct ovn_desired_flow_table { > >>>> > >>>> /* SB uuid index for the nodes in match_flow_table.*/ > >>>> struct hindex uuid_flow_table; > >>>> + > >>>> + /* Highest known number of conjunction() actions in a single flow */ > >>>> + uint32_t max_conj_width; > >>>> + > >>>> + /* Largest number of clauses for a conjunction in the table */ > >>>> + uint32_t max_conj_depth; > >>>> }; > >>>> > >>>> /* Interface for OVN main loop. */ > >>>> diff --git a/include/ovn/expr.h b/include/ovn/expr.h > >>>> index 22f633e57..714e21add 100644 > >>>> --- a/include/ovn/expr.h > >>>> +++ b/include/ovn/expr.h > >>>> @@ -425,6 +425,8 @@ bool expr_evaluate(const struct expr *, const struct > >>>> flow *uflow, > >>>> bool (*lookup_port)(const void *aux, const char > >>>> *port_name, > >>>> unsigned int *portp), > >>>> const void *aux); > >>>> + > >>>> +void expr_set_force_crossproduct(bool should_force); > >>>> > >>>> /* Converting expressions to OpenFlow flows. */ > >>>> > >>>> diff --git a/lib/expr.c b/lib/expr.c > >>>> index 71de61543..4c164caed 100644 > >>>> --- a/lib/expr.c > >>>> +++ b/lib/expr.c > >>>> @@ -3278,6 +3278,12 @@ expr_evaluate(const struct expr *e, const struct > >>>> flow *uflow, > >>>> OVS_NOT_REACHED(); > >>>> } > >>>> } > >>>> + > >>>> +void > >>>> +expr_set_force_crossproduct(bool should_force) > >>>> +{ > >>>> + force_crossproduct = should_force; > >>>> +} > >>>> > >>>> /* Action parsing helper. */ > >>>> > >>>> -- > >>>> 2.14.5 > >>>> > >>>> _______________________________________________ > >>>> dev mailing list > >>>> d...@openvswitch.org > >>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev > >> > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev