When there are only one subtable in the megaflow cache, CD does not benefit. In such case, CD actually hurts the performance because of the extra CD lookup process.
This patch implements an adaptive turn on/off CD mechanism. The average iterated subtable count will be collected for every 5 seconds, and depending on this count, CD will be turned on or off during run-time. This patch depends on previous patches. CC: Darrell Ball <dball at vmware.com> CC: Jan Scheurich <jan.scheurich at ericsson.com> Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com> Signed-off-by: Antonio Fischetti <antonio.fischetti at intel.com> Co-authored-by: Antonio Fischetti <antonio.fischetti at intel.com> --- lib/dpif-netdev.c | 160 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 105 insertions(+), 55 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 5245cb5..425dfc4 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -227,6 +227,9 @@ typedef uint16_t cd_sig_t; */ #define SUB_PTR_LEN 256 +/* Time in ms between the decisions of turning on or off CD. */ +#define DPCLS_CD_OPTIMIZATION_INTERVAL 5000 + /* The bucket struct for cuckoo distributor*/ struct cd_bucket { cd_sig_t sig[CD_ENTRIES]; /* 2-byte long signature. */ @@ -258,6 +261,7 @@ struct dpcls { struct cmap subtables_map; struct pvector subtables; struct cd_cache *cdtable; /* The cuckoo distributor. */ + uint8_t cd_on; /* Turn on or off CD during runtime. */ struct dpcls_subtable *sub_ptrs[SUB_PTR_LEN]; /* Subtable pointer array. */ }; @@ -643,6 +647,8 @@ struct dp_netdev_pmd_thread { struct cmap classifiers; /* Periodically sort subtable vectors according to hit frequencies */ long long int next_optimization; + /* Periodically decide if we should turn on/off CD */ + long long int next_cd_optimization; /* End of the next time interval for which processing cycles are stored for each polled rxq. */ long long int rxq_interval; @@ -2866,12 +2872,14 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node), dp_netdev_flow_hash(&flow->ufid)); /* Insert to CD here. */ - if (OVS_LIKELY(key != NULL)) { - struct dpcls_subtable *subtable = dpcls_find_subtable(cls, &mask); - int index = find_index_in_sub_ptrs(cls, subtable); + if (cls->cd_on) { + if (OVS_LIKELY(key != NULL)) { + struct dpcls_subtable *subtable = dpcls_find_subtable(cls, &mask); + int index = find_index_in_sub_ptrs(cls, subtable); - if (index != 0) { - cd_insert(cls->cdtable, key, index); + if (index != 0) { + cd_insert(cls->cdtable, key, index); + } } } @@ -6290,6 +6298,12 @@ struct dpcls_subtable { struct cmap rules; /* Contains "struct dpcls_rule"s. */ uint32_t hit_cnt; /* Number of match hits in subtable in current optimization interval. */ + uint32_t access_cnt; /* With CD implemented, hit_cnt should be subtable + * hits that miss in CD, so the ranking mechanism + * which is based on hit_cnt still works properly. + * We have the access_cnt as total access count to + * each subtable to consider if we should turn on + * or turn off CD. */ struct netdev_flow_key mask; /* Wildcards for fields (const). */ /* 'mask' must be the last field, additional space is allocated here. */ }; @@ -6309,6 +6323,7 @@ dpcls_init(struct dpcls *cls) VLOG_ERR("Create cuckoo distributor failed"); } cd_init(cls->cdtable); + cls->cd_on = 1; for (i = 0; i < SUB_PTR_LEN; i++) { cls->sub_ptrs[i] = 0; } @@ -6356,6 +6371,7 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask) - sizeof subtable->mask.mf + mask->len); cmap_init(&subtable->rules); subtable->hit_cnt = 0; + subtable->access_cnt = 0; netdev_flow_key_clone(&subtable->mask, mask); cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash); /* Add the new subtable at the end of the pvector (with no hits yet) */ @@ -6432,6 +6448,34 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd, pmd->next_optimization = now + DPCLS_OPTIMIZATION_INTERVAL; } } + if (now > pmd->next_cd_optimization) { + CMAP_FOR_EACH (cls, node, &pmd->classifiers) { + struct pvector *pvec = &cls->subtables; + struct dpcls_subtable *subtable; + float avg_table_cnt = 0; + int cnt = 0; + uint32_t total = 0; + uint32_t sum = 0; + PVECTOR_FOR_EACH (subtable,pvec) { + sum += subtable->access_cnt * cnt; + total += subtable->access_cnt; + subtable->access_cnt = 0; + cnt++; + } + /* If total access is too small, we keep previous decision */ + if (total > cnt * 5) { + avg_table_cnt = (float)sum / total; + } else { + avg_table_cnt = -1; + } + if (avg_table_cnt >= 1) { + cls->cd_on = 1; + } else if (avg_table_cnt != -1) { + cls->cd_on = 0; + } + } + pmd->next_cd_optimization = now + DPCLS_CD_OPTIMIZATION_INTERVAL; + } } /* Insert 'rule' into 'cls'. */ @@ -6522,57 +6566,60 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[], * The cuckoo distributor lookup pass begin first before go to megaflow * cache. CD hit will return a subtable index to the subtable to lookup. */ - int i; - int cd_match = 0; - int data[MAP_BITS]; + if (cls->cd_on) { + int i; + int cd_match = 0; + int data[MAP_BITS]; - cd_lookup_bulk_pipe(cls, keys, cnt, &found_map, data); + cd_lookup_bulk_pipe(cls, keys, cnt, &found_map, data); - ULLONG_FOR_EACH_1(i, found_map) { - hashes[i] = netdev_flow_key_hash_in_mask(&keys[i], - &(cls->sub_ptrs[data[i]])->mask); - nodes[i] = cmap_find(&((cls->sub_ptrs[data[i]])->rules), - hashes[i]); - if (nodes[i] != NULL) { - struct dpcls_rule *rule; - CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { - if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) { - rules[i] = rule; - lookups_match += 1; - cd_match += 1; - goto scnext; + ULLONG_FOR_EACH_1(i, found_map) { + hashes[i] = netdev_flow_key_hash_in_mask(&keys[i], + &(cls->sub_ptrs[data[i]])->mask); + nodes[i] = cmap_find(&((cls->sub_ptrs[data[i]])->rules), + hashes[i]); + if (nodes[i] != NULL) { + struct dpcls_rule *rule; + CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { + if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) { + rules[i] = rule; + lookups_match += 1; + cls->sub_ptrs[data[i]]->access_cnt++; + cd_match += 1; + goto scnext; + } } + ULLONG_SET0(found_map, i); /* Did not match. */ + /* + * Here means key same in subtable but not same rule + * since CD find the correct subtable, + * we dont need to insert to CD. + */ + + scnext: + ; + } else if (nodes[i] == NULL) { + ULLONG_SET0(found_map, i); + /* Here means in CD but not in the target subtable. + * meaning that it matches to a same (but wrong) key in CD. + * we should insert it into CD later when we know + * which subtable it hits. + */ } - ULLONG_SET0(found_map, i); /* Did not match. */ - /* - * Here means key same in subtable but not same rule - * since CD find the correct subtable, - * we dont need to insert to CD. - */ - - scnext: - ; - } else if (nodes[i] == NULL) { - ULLONG_SET0(found_map, i); - /* Here means in CD but not in the target subtable. - * meaning that it matches to a same (but wrong) key in CD. - * we should insert it into CD later when we know - * which subtable it hits. - */ } - } - keys_map &= ~found_map; + keys_map &= ~found_map; - if (!keys_map) { - if (num_lookups_p) { - *num_lookups_p = lookups_match; - } - if (cd_hit) { - *cd_hit = cd_match; - } + if (!keys_map) { + if (num_lookups_p) { + *num_lookups_p = lookups_match; + } + if (cd_hit) { + *cd_hit = cd_match; + } - return true; /* All found in CD. */ + return true; /* All found in CD. */ + } } /* The Datapath classifier - aka dpcls - is composed of subtables. @@ -6608,6 +6655,7 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[], /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap * within one second optimization interval. */ subtable->hit_cnt++; + subtable->access_cnt++; lookups_match += subtable_pos; goto next; } @@ -6622,13 +6670,15 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[], * If we find things here, it means it misses in CD. * We should insert into CD. */ - int index = find_index_in_sub_ptrs(cls, subtable); - /* - * If 0, means not in sub_ptrs, then no need to insert - * into CD. - */ - if (index != 0) { - cd_insert(cls->cdtable, &keys[i], index); + if (cls->cd_on) { + int index = find_index_in_sub_ptrs(cls, subtable); + /* + * If 0, means not in sub_ptrs, then no need to insert + * into CD. + */ + if (index != 0) { + cd_insert(cls->cdtable, &keys[i], index); + } } } keys_map &= ~found_map; /* Clear the found rules. */ -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev