Re: [PATCH bpf-next v3 4/5] bpf: lru: Fix unintended eviction when updating lru hash maps

Martin KaFai Lau Wed, 14 Jan 2026 11:39:43 -0800



On 1/7/26 7:14 AM, Leon Hwang wrote:

When updating an existing element in lru_[percpu_,]hash maps, the current
implementation always calls prealloc_lru_pop() to get a new node before
checking if the key already exists. If the map is full, this triggers
LRU eviction and removes an existing element, even though the update
operation only needs to modify the value of an existing key in-place.

This is problematic because:
1. Users may unexpectedly lose entries when doing simple value updates
2. The eviction overhead is unnecessary for existing key updates

This is not the common LRU map use case. The bpf prog usually does alookup first, finds the entry, and then directly updates the valuein-place in the bpf prog itself. If the lookup fails, it will insert a_new_ element.

When the map is full, eviction should actually be triggered regardless.For an LRU map that is too small to fit the working set, it is askingfor trouble.

From the syscall update, if the use case is always updating an existingelement, the regular hashmap should be used instead.

Fix this by first checking if the key exists before allocating a new
node. If the key is found, update the value using the extra lru node
without triggering any eviction.

This will instead add overhead for the common use case described above.The patch is mostly for getting a selftest case to work in a small LRUmap. I don't think it is worth the added complexity either.

Patch 2 and 3 look ok, but they also only make marginal improvements onthe existing code.


pw-bot: cr

+static int htab_lru_map_update_elem_in_place(struct bpf_htab *htab, void *key, 
void *value,
+                                            u64 map_flags, struct bucket *b,
+                                            struct hlist_nulls_head *head, u32 
hash,
+                                            bool percpu, bool onallcpus)
+{
+       struct htab_elem *l_new, *l_old, *l_free;
+       struct bpf_map *map = &htab->map;
+       u32 key_size = map->key_size;
+       struct bpf_lru_node *node;
+       unsigned long flags;
+       void *l_val;
+       int ret;
+
+       node = bpf_lru_pop_extra(&htab->lru);
+       if (!node)
+               return -ENOENT;
+
+       l_new = container_of(node, struct htab_elem, lru_node);
+       l_new->hash = hash;
+       memcpy(l_new->key, key, key_size);
+       if (!percpu) {
+               l_val = htab_elem_value(l_new, key_size);
+               copy_map_value(map, l_val, value);
+               bpf_obj_free_fields(map->record, l_val);
+       }
+
+       ret = htab_lock_bucket(b, &flags);
+       if (ret)
+               goto err_lock_bucket;
+
+       l_old = lookup_elem_raw(head, hash, key, key_size);
+
+       ret = check_flags(htab, l_old, map_flags);
+       if (ret)
+               goto err;
+
+       if (l_old) {
+               bpf_lru_node_set_ref(&l_new->lru_node);
+               if (percpu) {
+                       /* per-cpu hash map can update value in-place.
+                        * Keep the same logic in 
__htab_lru_percpu_map_update_elem().
+                        */
+                       pcpu_copy_value(htab, htab_elem_get_ptr(l_old, 
key_size),
+                                       value, onallcpus, map_flags);
+                       l_free = l_new;
+               } else {
+                       hlist_nulls_add_head_rcu(&l_new->hash_node, head);
+                       hlist_nulls_del_rcu(&l_old->hash_node);
+                       l_free = l_old;
+               }
+       } else {
+               ret = -ENOENT;
+       }
+
+err:
+       htab_unlock_bucket(b, flags);
+
+err_lock_bucket:
+       if (ret) {
+               bpf_lru_push_free(&htab->lru, node);
+       } else {
+               if (l_old && !percpu)
+                       bpf_obj_free_fields(map->record, htab_elem_value(l_old, 
key_size));

Does htab_lru_map_update_elem() have an existing bug that is missing thebpf_obj_free_fields() on l_old?

+               bpf_lru_push_free(&htab->lru, &l_free->lru_node);
+       }
+
+       return ret;
+}
+
  static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void 
*value,
                                     u64 map_flags)
  {
@@ -1215,6 +1286,11 @@ static long htab_lru_map_update_elem(struct bpf_map 
*map, void *key, void *value
        b = __select_bucket(htab, hash);
        head = &b->head;

+ ret = htab_lru_map_update_elem_in_place(htab, key, value, map_flags, b, head, hash, false,

+                                               false);
+       if (!ret)
+               return 0;
+
        /* For LRU, we need to alloc before taking bucket's
         * spinlock because getting free nodes from LRU may need
         * to remove older elements from htab and this removal
@@ -1354,6 +1430,11 @@ static long __htab_lru_percpu_map_update_elem(struct 
bpf_map *map, void *key,
        b = __select_bucket(htab, hash);
        head = &b->head;

+ ret = htab_lru_map_update_elem_in_place(htab, key, value, map_flags, b, head, hash, true,

+                                               onallcpus);
+       if (!ret)
+               return 0;
+
        /* For LRU, we need to alloc before taking bucket's
         * spinlock because LRU's elem alloc may need
         * to remove older elem from htab and this removal

Re: [PATCH bpf-next v3 4/5] bpf: lru: Fix unintended eviction when updating lru hash maps

Reply via email to