When EMC hit rate goes down start start shedding load from the EMC. --- lib/dpif-netdev.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 103 insertions(+), 4 deletions(-)
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e3a5590..f77e79a 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -158,6 +158,13 @@ struct netdev_flow_key { #define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX / \ DEFAULT_EM_FLOW_INSERT_INV_PROB) +struct emc_shed_state { + unsigned long long last_hit_cnt; + unsigned long long last_miss_cnt; + unsigned long long last_skip_cnt; + uint32_t shed_threshold; +}; + struct emc_entry { struct dp_netdev_flow *flow; struct netdev_flow_key key; /* key.hash used for emc hash value. */ @@ -166,6 +173,7 @@ struct emc_entry { struct emc_cache { struct emc_entry entries[EM_FLOW_HASH_ENTRIES]; int sweep_idx; /* For emc_cache_slow_sweep(). */ + struct emc_shed_state emc_shed_state; }; /* Iterate in the exact match cache through every entry that might contain a @@ -337,6 +345,7 @@ enum dp_stat_type { DP_STAT_LOST, /* Packets not passed up to the client. */ DP_STAT_LOOKUP_HIT, /* Number of subtable lookups for flow table hits */ + DP_STAT_EXACT_SKIPPED, /* Packets where EMC lookup skipped */ DP_N_STATS }; @@ -733,6 +742,10 @@ emc_cache_init(struct emc_cache *flow_cache) int i; flow_cache->sweep_idx = 0; + flow_cache->emc_shed_state.last_hit_cnt = 0; + flow_cache->emc_shed_state.last_miss_cnt = 0; + flow_cache->emc_shed_state.last_skip_cnt = 0; + flow_cache->emc_shed_state.shed_threshold = 0; for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { flow_cache->entries[i].flow = NULL; flow_cache->entries[i].key.hash = 0; @@ -749,6 +762,10 @@ emc_cache_uninit(struct emc_cache *flow_cache) for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { emc_clear_entry(&flow_cache->entries[i]); } + flow_cache->emc_shed_state.last_hit_cnt = 0; + flow_cache->emc_shed_state.last_miss_cnt = 0; + flow_cache->emc_shed_state.last_skip_cnt = 0; + flow_cache->emc_shed_state.shed_threshold = 0; } /* Check and clear dead flow references slowly (one entry at each @@ -839,11 +856,28 @@ pmd_info_show_stats(struct ds *reply, } ds_put_cstr(reply, ":\n"); + /* XXX some added items added here are for debug */ ds_put_format(reply, "\temc hits:%llu\n\tmegaflow hits:%llu\n" + "\tshed thresh:0x%08X\n" + "\temc skips:%llu\n" + "\temc hit rate (nett) :%llu%%\n" + "\temc hit rate (gross):%llu%%\n" "\tavg. subtable lookups per hit:%.2f\n" "\tmiss:%llu\n\tlost:%llu\n", stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT], + pmd->flow_cache.emc_shed_state.shed_threshold, + stats[DP_STAT_EXACT_SKIPPED], + (stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT] - + stats[DP_STAT_EXACT_SKIPPED]) + ? ((stats[DP_STAT_EXACT_HIT] * 100) / + (stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT] - + stats[DP_STAT_EXACT_SKIPPED])) + : 0, + (stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT]) + ? ((stats[DP_STAT_EXACT_HIT] * 100) / + (stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT])) + : 0, stats[DP_STAT_MASKED_HIT] > 0 ? (1.0*stats[DP_STAT_LOOKUP_HIT])/stats[DP_STAT_MASKED_HIT] : 0, @@ -1470,6 +1504,8 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) stats->n_hit += n; atomic_read_relaxed(&pmd->stats.n[DP_STAT_EXACT_HIT], &n); stats->n_hit += n; + atomic_read_relaxed(&pmd->stats.n[DP_STAT_EXACT_SKIPPED], &n); + stats->n_hit += n; atomic_read_relaxed(&pmd->stats.n[DP_STAT_MISS], &n); stats->n_missed += n; atomic_read_relaxed(&pmd->stats.n[DP_STAT_LOST], &n); @@ -4849,6 +4885,54 @@ dp_netdev_queue_batches(struct dp_packet *pkt, packet_batch_per_flow_update(batch, pkt, mf); } +#define SHED_ADJ_INTERVAL_PKTS (3e6) +#define SHED_ADJ_QUANTUM (0x10000000) +#define SHED_THRESH_MAX (SHED_ADJ_QUANTUM + \ + (SHED_ADJ_QUANTUM << 1) + \ + (SHED_ADJ_QUANTUM << 2) + \ + (SHED_ADJ_QUANTUM << 3)) +/* XXX use cost of EMC lookup & miss in cycles to replace hard bounds */ +#define SHED_HIT_RATE_LOWER_PC (50) +#define SHED_HIT_RATE_UPPER_PC (70) + + +static inline void +adjust_emc_shedding (struct dp_netdev_pmd_thread *pmd) +{ + struct emc_cache *emc = &pmd->flow_cache; + unsigned long long emc_hit_cnt = pmd->stats.n[DP_STAT_EXACT_HIT] - + emc->emc_shed_state.last_hit_cnt; + unsigned long long emc_miss_cnt = pmd->stats.n[DP_STAT_MASKED_HIT] - + emc->emc_shed_state.last_miss_cnt; + + if (emc_hit_cnt + emc_miss_cnt > SHED_ADJ_INTERVAL_PKTS) { + /* XXX protect against counter wrap around */ + unsigned long long emc_skip_cnt = pmd->stats.n[DP_STAT_EXACT_SKIPPED] - + emc->emc_shed_state.last_skip_cnt; + unsigned long long emc_offered_cnt = + emc_hit_cnt + emc_miss_cnt - emc_skip_cnt; + + unsigned int hit_rate_pc = (emc_hit_cnt * 100) / emc_offered_cnt; + + emc->emc_shed_state.last_hit_cnt = pmd->stats.n[DP_STAT_EXACT_HIT]; + emc->emc_shed_state.last_miss_cnt = pmd->stats.n[DP_STAT_MASKED_HIT]; + emc->emc_shed_state.last_skip_cnt = + pmd->stats.n[DP_STAT_EXACT_SKIPPED]; + + /* As hit rate goes down shed thresh goes up (more is shed from EMC) */ + /* XXX consider increment more if further out of bounds */ + if (hit_rate_pc > SHED_HIT_RATE_UPPER_PC && \ + emc->emc_shed_state.shed_threshold >= SHED_ADJ_QUANTUM) { + emc->emc_shed_state.shed_threshold -= SHED_ADJ_QUANTUM; + } else if (hit_rate_pc < SHED_HIT_RATE_LOWER_PC && \ + emc->emc_shed_state.shed_threshold < SHED_THRESH_MAX) { + emc->emc_shed_state.shed_threshold += SHED_ADJ_QUANTUM; + } + } +} + + + /* Try to process all ('cnt') the 'packets' using only the exact match cache * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the * miniflow is copied into 'keys' and the packet pointer is moved at the @@ -4869,7 +4953,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, { struct emc_cache *flow_cache = &pmd->flow_cache; struct netdev_flow_key *key = &keys[0]; - size_t n_missed = 0, n_dropped = 0; + size_t n_missed = 0, n_dropped = 0, n_skipped = 0; struct dp_packet *packet; const size_t size = dp_packet_batch_size(packets_); uint32_t cur_min; @@ -4900,8 +4984,17 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, key->len = 0; /* Not computed yet. */ key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf); + adjust_emc_shedding(pmd); + /* If EMC is disabled skip emc_lookup */ - flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key); + if ((key->hash > flow_cache->emc_shed_state.shed_threshold) && + cur_min) { + flow = emc_lookup(flow_cache, key); + } else { + flow = NULL; + n_skipped++; + } + if (OVS_LIKELY(flow)) { dp_netdev_queue_batches(packet, flow, &key->mf, batches, n_batches); @@ -4916,6 +5009,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, } } + dp_netdev_count_packet(pmd, DP_STAT_EXACT_SKIPPED, + n_skipped); dp_netdev_count_packet(pmd, DP_STAT_EXACT_HIT, size - n_dropped - n_missed); @@ -4986,7 +5081,9 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, add_actions->size); } ovs_mutex_unlock(&pmd->flow_mutex); - emc_probabilistic_insert(pmd, key, netdev_flow); + if (key->hash > pmd->flow_cache.emc_shed_state.shed_threshold) { + emc_probabilistic_insert(pmd, key, netdev_flow); + } } } @@ -5079,7 +5176,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, flow = dp_netdev_flow_cast(rules[i]); - emc_probabilistic_insert(pmd, &keys[i], flow); + if (keys[i].hash > pmd->flow_cache.emc_shed_state.shed_threshold) { + emc_probabilistic_insert(pmd, &keys[i], flow); + } dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); } -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev