From: Fan Zhang <roy.fan.zh...@intel.com> This patch is to adding hash table operations for key signature computed on lookup ("do-sig") for LRU hash tables and Extendible buckets.
Signed-off-by: Fan Zhang <roy.fan.zhang at intel.com> --- lib/librte_table/rte_table_hash.h | 8 + lib/librte_table/rte_table_hash_key16.c | 358 +++++++++++++++++++++++++++++++- 2 files changed, 363 insertions(+), 3 deletions(-) diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h index e2c60e1..9d17516 100644 --- a/lib/librte_table/rte_table_hash.h +++ b/lib/librte_table/rte_table_hash.h @@ -271,6 +271,10 @@ struct rte_table_hash_key16_lru_params { /** LRU hash table operations for pre-computed key signature */ extern struct rte_table_ops rte_table_hash_key16_lru_ops; +/** LRU hash table operations for key signature computed on lookup + ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key16_lru_dosig_ops; + /** Extendible bucket hash table parameters */ struct rte_table_hash_key16_ext_params { /** Maximum number of entries (and keys) in the table */ @@ -301,6 +305,10 @@ struct rte_table_hash_key16_ext_params { /** Extendible bucket operations for pre-computed key signature */ extern struct rte_table_ops rte_table_hash_key16_ext_ops; +/** Extendible bucket hash table operations for key signature computed on + lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key16_ext_dosig_ops; + /** * 32-byte key hash tables * diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c index ffd3249..427b534 100644 --- a/lib/librte_table/rte_table_hash_key16.c +++ b/lib/librte_table/rte_table_hash_key16.c @@ -620,6 +620,27 @@ rte_table_hash_entry_delete_key16_ext( rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\ } +#define lookup1_stage1_dosig(mbuf1, bucket1, f) \ +{ \ + uint64_t *key; \ + uint64_t signature = 0; \ + uint32_t bucket_index; \ + uint64_t hash_key_buffer[2]; \ + \ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\ + \ + hash_key_buffer[0] = key[0] & f->key_mask[0]; \ + hash_key_buffer[1] = key[1] & f->key_mask[1]; \ + signature = f->f_hash(hash_key_buffer, \ + RTE_TABLE_HASH_KEY_SIZE, f->seed); \ + \ + bucket_index = signature & (f->n_buckets - 1); \ + bucket1 = (struct rte_bucket_4_16 *) \ + &f->memory[bucket_index * f->bucket_size]; \ + rte_prefetch0(bucket1); \ + rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\ +} + #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2, \ pkts_mask_out, entries, f) \ { \ @@ -769,6 +790,36 @@ rte_table_hash_entry_delete_key16_ext( rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\ } +#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f) \ +{ \ + uint64_t *key10, *key11; \ + uint64_t hash_offset_buffer[2]; \ + uint64_t signature10, signature11; \ + uint32_t bucket10_index, bucket11_index; \ + \ + key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset);\ + hash_offset_buffer[0] = key10[0] & f->key_mask[0]; \ + hash_offset_buffer[1] = key10[1] & f->key_mask[1]; \ + signature10 = f->f_hash(hash_offset_buffer, \ + RTE_TABLE_HASH_KEY_SIZE, f->seed);\ + bucket10_index = signature10 & (f->n_buckets - 1); \ + bucket10 = (struct rte_bucket_4_16 *) \ + &f->memory[bucket10_index * f->bucket_size]; \ + rte_prefetch0(bucket10); \ + rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\ + \ + key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset);\ + hash_offset_buffer[0] = key11[0] & f->key_mask[0]; \ + hash_offset_buffer[1] = key11[1] & f->key_mask[1]; \ + signature11 = f->f_hash(hash_offset_buffer, \ + RTE_TABLE_HASH_KEY_SIZE, f->seed);\ + bucket11_index = signature11 & (f->n_buckets - 1); \ + bucket11 = (struct rte_bucket_4_16 *) \ + &f->memory[bucket11_index * f->bucket_size]; \ + rte_prefetch0(bucket11); \ + rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\ +} + #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\ bucket20, bucket21, pkts_mask_out, entries, f) \ { \ @@ -878,7 +929,8 @@ rte_table_hash_lookup_key16_lru( } *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, + n_pkts_in - __builtin_popcountll(pkts_mask_out)); return 0; } @@ -968,11 +1020,141 @@ rte_table_hash_lookup_key16_lru( bucket20, bucket21, pkts_mask_out, entries, f); *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); return 0; } /* rte_table_hash_lookup_key16_lru() */ static int +rte_table_hash_lookup_key16_lru_dosig( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + + RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_16 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask); + lookup1_stage1_dosig(mbuf, bucket, f); + lookup1_stage2_lru(pkt_index, mbuf, bucket, + pkts_mask_out, entries, f); + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); + return 0; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key16_lru_dosig() */ + +static int rte_table_hash_lookup_key16_ext( void *table, struct rte_mbuf **pkts, @@ -1118,11 +1300,164 @@ grind_next_buckets: } *lookup_hit_mask = pkts_mask_out; - RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); return 0; } /* rte_table_hash_lookup_key16_ext() */ static int +rte_table_hash_lookup_key16_ext_dosig( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0, buckets_mask = 0; + struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX]; + uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + + RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_16 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask); + lookup1_stage1_dosig(mbuf, bucket, f); + lookup1_stage2_ext(pkt_index, mbuf, bucket, + pkts_mask_out, entries, buckets_mask, + buckets, keys, f); + } + + goto grind_next_buckets; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + +grind_next_buckets: + /* Grind next buckets */ + for ( ; buckets_mask; ) { + uint64_t buckets_mask_next = 0; + + for ( ; buckets_mask; ) { + uint64_t pkt_mask; + uint32_t pkt_index; + + pkt_index = __builtin_ctzll(buckets_mask); + pkt_mask = 1LLU << pkt_index; + buckets_mask &= ~pkt_mask; + + lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, + entries, buckets_mask_next, f); + } + + buckets_mask = buckets_mask_next; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key16_ext_dosig() */ + +static int rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear) { struct rte_table_hash *t = (struct rte_table_hash *) table; @@ -1145,6 +1480,15 @@ struct rte_table_ops rte_table_hash_key16_lru_ops = { .f_stats = rte_table_hash_key16_stats_read, }; +struct rte_table_ops rte_table_hash_key16_lru_dosig_ops = { + .f_create = rte_table_hash_create_key16_lru, + .f_free = rte_table_hash_free_key16_lru, + .f_add = rte_table_hash_entry_add_key16_lru, + .f_delete = rte_table_hash_entry_delete_key16_lru, + .f_lookup = rte_table_hash_lookup_key16_lru_dosig, + .f_stats = rte_table_hash_key16_stats_read, +}; + struct rte_table_ops rte_table_hash_key16_ext_ops = { .f_create = rte_table_hash_create_key16_ext, .f_free = rte_table_hash_free_key16_ext, @@ -1154,3 +1498,11 @@ struct rte_table_ops rte_table_hash_key16_ext_ops = { .f_stats = rte_table_hash_key16_stats_read, }; +struct rte_table_ops rte_table_hash_key16_ext_dosig_ops = { + .f_create = rte_table_hash_create_key16_ext, + .f_free = rte_table_hash_free_key16_ext, + .f_add = rte_table_hash_entry_add_key16_ext, + .f_delete = rte_table_hash_entry_delete_key16_ext, + .f_lookup = rte_table_hash_lookup_key16_ext_dosig, + .f_stats = rte_table_hash_key16_stats_read, +}; -- 2.1.0