Hello Samuel, Yes thank you, that is indeed way better, cleaner and more maintainable! Here is v2 with feedback incorporated and updated commit message.
Let me know if anything else is needed here. Kind regards, Milos On Mon, Mar 30, 2026 at 1:36 PM Samuel Thibault <[email protected]> wrote: > > Hello, > > Milos Nikic, le dim. 29 mars 2026 21:48:10 -0700, a ecrit: > > This patch replaces that potentially giant allocation with an O(1) > > intrusive doubly-linked list, completely eliminating the malloc > > overhead and improving sync latency under heavy inode loads. > > That should be helpful indeed. > > > @@ -60,6 +59,28 @@ static struct hurd_ihash nodecache = > > HURD_IHASH_INITIALIZER_GKI (offsetof (struct node, slot), NULL, NULL, > > hash, compare); > > static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; > > +static pthread_mutex_t active_nodes_lock = PTHREAD_MUTEX_INITIALIZER; > > Do we really want a separate lock? > > AIUI the node cache and your introduced list are supposed to contain the > same set. Better use the same lock to make sure of this. That will make > the picture much simpler for maintenance: we have a set of nodes, with > two structures to traverse them: a hashtable and a list. > > > +static struct node *active_nodes_head = NULL; > > +static struct node *active_nodes_tail = NULL; > > + > > +/* Unlinks a node from the active nodes doubly-linked list. > > + The caller MUST hold active_nodes_lock before calling this. */ > > +static void > > +unlink_active_node (struct node *np) > > +{ > > + if (np->cache_prev) > > + np->cache_prev->cache_next = np->cache_next; > > + if (np->cache_next) > > + np->cache_next->cache_prev = np->cache_prev; > > + > > + if (active_nodes_head == np) > > + active_nodes_head = np->cache_next; > > + if (active_nodes_tail == np) > > + active_nodes_tail = np->cache_prev; > > + > > + np->cache_prev = NULL; > > + np->cache_next = NULL; > > +} > > > > /* Fetch inode INUM, set *NPP to the node structure; > > gain one user reference and lock the node. */ > > @@ -109,6 +130,15 @@ diskfs_cached_lookup_context (ino_t inum, struct node > > **npp, > > assert_perror_backtrace (err); > > diskfs_nref_light (np); > > pthread_rwlock_unlock (&nodecache_lock); > > + pthread_mutex_lock (&active_nodes_lock); > > + np->cache_next = active_nodes_head; > > + np->cache_prev = NULL; > > + if (active_nodes_head) > > + active_nodes_head->cache_prev = np; > > + else > > + active_nodes_tail = np; > > + active_nodes_head = np; > > Better make this a link_active_node function. > > Samuel
From 57596c1026d5792dfb8c4db0a7eb9e1890e18d58 Mon Sep 17 00:00:00 2001 From: Milos Nikic <[email protected]> Date: Fri, 27 Mar 2026 23:12:22 -0700 Subject: [PATCH v2] libdiskfs: Eliminate malloc from node-cache.c iteration. This patch changes how diskfs_node_iterate() iterates the cache content. Nodes contained in the cache now form a doubly linked list in addition to being part of the hashtable. At the cost of adding two pointers to the node structure we are able to add an additional memory efficient way of iterating the cache and thus we can now completely avoid malloc and copy during iteration. Performance improvement becomes more visible the larger the cache is, and it can even become measurable on very large node-cache size. --- libdiskfs/diskfs.h | 4 ++ libdiskfs/node-cache.c | 111 ++++++++++++++++++++++++++--------------- 2 files changed, 75 insertions(+), 40 deletions(-) diff --git a/libdiskfs/diskfs.h b/libdiskfs/diskfs.h index 9a5d5d9d..d95e1b80 100644 --- a/libdiskfs/diskfs.h +++ b/libdiskfs/diskfs.h @@ -125,6 +125,10 @@ struct node loff_t allocsize; ino64_t cache_id; + + /* The Intrusive List Pointers */ + struct node *cache_next; + struct node *cache_prev; }; struct diskfs_control diff --git a/libdiskfs/node-cache.c b/libdiskfs/node-cache.c index 5f6c3e36..891a8893 100644 --- a/libdiskfs/node-cache.c +++ b/libdiskfs/node-cache.c @@ -19,7 +19,6 @@ #include <hurd/ihash.h> -#include "priv.h" #include "diskfs.h" /* The node cache is implemented using a hash table. Access to the @@ -60,6 +59,43 @@ static struct hurd_ihash nodecache = HURD_IHASH_INITIALIZER_GKI (offsetof (struct node, slot), NULL, NULL, hash, compare); static pthread_rwlock_t nodecache_lock = PTHREAD_RWLOCK_INITIALIZER; +static struct node *active_nodes_head = NULL; +static struct node *active_nodes_tail = NULL; + +/* Unlinks a node from the active nodes doubly-linked list. + The caller MUST hold nodecache_lock (for writing) before calling this. */ +static void +unlink_active_node (struct node *np) +{ + if (np->cache_prev) + np->cache_prev->cache_next = np->cache_next; + if (np->cache_next) + np->cache_next->cache_prev = np->cache_prev; + + if (active_nodes_head == np) + active_nodes_head = np->cache_next; + if (active_nodes_tail == np) + active_nodes_tail = np->cache_prev; + + np->cache_prev = NULL; + np->cache_next = NULL; +} + +/* Adds a node to the head of the active nodes doubly-linked list. + The caller MUST hold nodecache_lock (for writing) before calling this. */ +static void +link_active_node (struct node *np) +{ + np->cache_next = active_nodes_head; + np->cache_prev = NULL; + + if (active_nodes_head) + active_nodes_head->cache_prev = np; + else + active_nodes_tail = np; + + active_nodes_head = np; +} /* Fetch inode INUM, set *NPP to the node structure; gain one user reference and lock the node. */ @@ -107,6 +143,7 @@ diskfs_cached_lookup_context (ino_t inum, struct node **npp, err = hurd_ihash_locp_add (&nodecache, slot, (hurd_ihash_key_t) &np->cache_id, np); assert_perror_backtrace (err); + link_active_node (np); diskfs_nref_light (np); pthread_rwlock_unlock (&nodecache_lock); @@ -116,6 +153,7 @@ diskfs_cached_lookup_context (ino_t inum, struct node **npp, { pthread_rwlock_wrlock (&nodecache_lock); hurd_ihash_remove (&nodecache, (hurd_ihash_key_t) &np->cache_id); + unlink_active_node (np); pthread_rwlock_unlock (&nodecache_lock); /* Don't delete from disk. */ @@ -180,6 +218,7 @@ diskfs_try_dropping_softrefs (struct node *np) hurd_ihash_locp_remove (&nodecache, np->slot); np->slot = NULL; + unlink_active_node (np); /* Flush node if needed, before forgetting it */ diskfs_node_update (np, diskfs_synchronous); @@ -197,54 +236,46 @@ error_t __attribute__ ((weak)) diskfs_node_iterate (error_t (*fun)(struct node *)) { error_t err = 0; - size_t num_nodes; - struct node *node, **node_list, **p; + struct node *current, *next_node; pthread_rwlock_rdlock (&nodecache_lock); + current = active_nodes_tail; - /* We must copy everything from the hash table into another data structure - to avoid running into any problems with the hash-table being modified - during processing (normally we delegate access to hash-table with - nodecache_lock, but we can't hold this while locking the - individual node locks). */ - /* XXX: Can we? */ - num_nodes = nodecache.nr_items; - - /* TODO This method doesn't scale beyond a few dozen nodes and should be - replaced. */ - node_list = malloc (num_nodes * sizeof (struct node *)); - if (node_list == NULL) - { - pthread_rwlock_unlock (&nodecache_lock); - return ENOMEM; - } + /* Bootstrap the loop by grabbing a ref to the very first node */ + if (current) + refcounts_ref (¤t->refcounts, NULL); - p = node_list; - HURD_IHASH_ITERATE (&nodecache, i) + while (current != NULL) { - *p++ = node = i; + /* Figure out who is next, and grab a ref so it doesn't + * disappear while we are processing 'current' */ + next_node = current->cache_prev; + if (next_node) + refcounts_ref (&next_node->refcounts, NULL); - /* We acquire a hard reference for node, but without using - diskfs_nref. We do this so that diskfs_new_hardrefs will not - get called. */ - refcounts_ref (&node->refcounts, NULL); - } - pthread_rwlock_unlock (&nodecache_lock); + pthread_rwlock_unlock (&nodecache_lock); - p = node_list; - while (num_nodes-- > 0) - { - node = *p++; - if (!err) - { - pthread_mutex_lock (&node->lock); - err = (*fun)(node); - pthread_mutex_unlock (&node->lock); - } - diskfs_nrele (node); + pthread_mutex_lock (¤t->lock); + err = (*fun)(current); + pthread_mutex_unlock (¤t->lock); + + /* We are done with 'current', drop the ref we grabbed */ + diskfs_nrele (current); + if (err) + { + /* We don't need to traverse the rest of the list! + Just drop the next_node ref if we grabbed it, and return. */ + if (next_node) + diskfs_nrele (next_node); + return err; + } + + /* Re-acquire the global lock to loop around */ + pthread_rwlock_rdlock (&nodecache_lock); + current = next_node; } - free (node_list); + pthread_rwlock_unlock (&nodecache_lock); return err; } -- 2.53.0
