wingo pushed a commit to branch wip-whippet
in repository guile.
commit 0ba9f68621a8428a76f99f1d7be340a84d0e6325
Author: Andy Wingo <[email protected]>
AuthorDate: Mon Aug 4 21:33:25 2025 +0200
Collect fragmentation into freelists
The idea is to use fragmentation instead of wasting it. Unclear if it's
a win though; in the microbenchmarks it's very inconclusive.
---
src/nofl-holeset.h | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++++
src/nofl-space.h | 63 +++++++++++++++---
2 files changed, 245 insertions(+), 9 deletions(-)
diff --git a/src/nofl-holeset.h b/src/nofl-holeset.h
new file mode 100644
index 000000000..6a3cfda9f
--- /dev/null
+++ b/src/nofl-holeset.h
@@ -0,0 +1,191 @@
+#ifndef NOFL_HOLESET_H
+#define NOFL_HOLESET_H
+
+#include <math.h>
+#include <pthread.h>
+
+#include "gc-attrs.h"
+#include "gc-align.h"
+#include "gc-ref.h"
+#include "gc-inline.h"
+#include "gc-lock.h"
+
+struct nofl_hole {
+ struct nofl_hole *next;
+ struct nofl_hole *next_chain;
+};
+
+struct nofl_hole_with_size {
+ struct nofl_hole entry;
+ size_t granules;
+};
+
+struct nofl_holeset {
+ uint64_t nonempty;
+ struct nofl_hole *buckets[64];
+};
+
+#define NOFL_HOLESET_PACKED_GRANULES 50
+#define NOFL_HOLESET_SPARSE_COUNT (64 - NOFL_HOLESET_PACKED_GRANULES - 1)
+#define NOFL_HOLESET_MAX_GRANULES 512
+
+static inline void
+nofl_hole_push(struct nofl_hole **loc, struct nofl_hole *hole) {
+ GC_ASSERT_EQ(hole->next, NULL);
+ GC_ASSERT_EQ(hole->next_chain, NULL);
+ hole->next = *loc;
+ *loc = hole;
+}
+
+static inline void*
+nofl_hole_pop(struct nofl_hole **loc) {
+ GC_ASSERT(*loc);
+ struct nofl_hole *hole = *loc;
+ GC_ASSERT_EQ(hole->next_chain, NULL);
+ *loc = hole->next;
+ hole->next = NULL;
+ return hole;
+}
+
+static inline size_t
+nofl_holeset_sparse_bucket(size_t granules, int for_insert) {
+ GC_ASSERT(granules >= NOFL_HOLESET_PACKED_GRANULES);
+ size_t idx = NOFL_HOLESET_PACKED_GRANULES;
+ size_t num = granules - NOFL_HOLESET_PACKED_GRANULES;
+ size_t denom = NOFL_HOLESET_MAX_GRANULES - NOFL_HOLESET_PACKED_GRANULES;
+ double frac = (double) (num * NOFL_HOLESET_SPARSE_COUNT) / denom;
+ idx += fmin(for_insert ? floor(frac) : ceil(frac),
+ NOFL_HOLESET_SPARSE_COUNT);
+ GC_ASSERT(idx < 64);
+ return idx;
+}
+
+static inline size_t
+nofl_holeset_bucket_for_lookup(size_t granules) {
+ return granules <= NOFL_HOLESET_PACKED_GRANULES
+ ? granules - 1
+ : nofl_holeset_sparse_bucket(granules, 0);
+}
+
+static inline void
+nofl_holeset_push_local(struct nofl_holeset *local, uintptr_t addr,
+ size_t granules) {
+ size_t idx;
+ struct nofl_hole *hole = (struct nofl_hole *) addr;
+ if (granules <= NOFL_HOLESET_PACKED_GRANULES) {
+ GC_ASSERT(granules > 0);
+ idx = granules - 1;
+ } else {
+ struct nofl_hole_with_size *hole_with_size =
+ (struct nofl_hole_with_size *) hole;
+ GC_ASSERT_EQ(hole_with_size->granules, 0);
+ hole_with_size->granules = granules;
+ idx = nofl_holeset_sparse_bucket(granules, 1);
+ }
+
+ nofl_hole_push(&local->buckets[idx], hole);
+ local->nonempty |= ((uint64_t) 1) << idx;
+}
+
+static inline uintptr_t
+nofl_hole_tail_address(void *hole, size_t granules)
+{
+ return ((uintptr_t) hole) + granules * NOFL_GRANULE_SIZE;
+}
+
+static inline struct gc_ref
+nofl_holeset_try_pop(struct nofl_holeset *holes, size_t granules) {
+ GC_ASSERT(granules <= NOFL_HOLESET_MAX_GRANULES);
+ size_t min_idx = nofl_holeset_bucket_for_lookup(granules);
+
+ uint64_t candidates = holes->nonempty >> min_idx;
+ if (!candidates)
+ return gc_ref_null();
+
+ size_t idx = min_idx + __builtin_ctzll(candidates);
+ void *ret = nofl_hole_pop(&holes->buckets[idx]);
+
+ GC_ASSERT(holes->nonempty & (((uint64_t)1) << idx));
+ if (!holes->buckets[idx])
+ holes->nonempty ^= ((uint64_t)1) << idx;
+
+ size_t hole_granules;
+ if (idx < NOFL_HOLESET_PACKED_GRANULES) {
+ hole_granules = idx + 1;
+ } else {
+ struct nofl_hole_with_size *hole_with_size = ret;
+ hole_granules = hole_with_size->granules;
+ hole_with_size->granules = 0;
+ }
+
+ GC_ASSERT(hole_granules >= granules);
+ size_t tail_granules = hole_granules - granules;
+ if (tail_granules)
+ nofl_holeset_push_local(holes, nofl_hole_tail_address(ret, granules),
+ tail_granules);
+
+ return gc_ref_from_heap_object(ret);
+}
+
+static inline void
+nofl_holeset_release(struct nofl_holeset *local,
+ struct nofl_holeset *remote,
+ pthread_mutex_t *lock) {
+ uint64_t nonempty = local->nonempty;
+ if (!nonempty)
+ return;
+
+ local->nonempty = 0;
+
+ pthread_mutex_lock(lock);
+ remote->nonempty |= nonempty;
+ do {
+ uint64_t idx = __builtin_ctzll(nonempty);
+ struct nofl_hole *hole = local->buckets[idx];
+ local->buckets[idx] = NULL;
+ hole->next_chain = remote->buckets[idx];
+ remote->buckets[idx] = hole;
+ nonempty ^= ((uint64_t)1) << idx;
+ } while (nonempty);
+ pthread_mutex_unlock(lock);
+}
+
+static inline int
+nofl_holeset_acquire(struct nofl_holeset *local,
+ struct nofl_holeset *remote,
+ pthread_mutex_t *lock,
+ size_t granules) {
+ size_t min_idx = nofl_holeset_bucket_for_lookup(granules);
+
+ uint64_t sloppy_nonempty =
+ atomic_load_explicit(&remote->nonempty, memory_order_relaxed);
+ if (!(sloppy_nonempty >> min_idx))
+ return 0;
+
+ pthread_mutex_lock(lock);
+ uint64_t nonempty = remote->nonempty >> min_idx;
+ if (!nonempty) {
+ pthread_mutex_unlock(lock);
+ return 0;
+ }
+ uint64_t idx = min_idx + __builtin_ctzll(nonempty);
+ GC_ASSERT(!local->buckets[idx]);
+ struct nofl_hole *chain = remote->buckets[idx];
+ remote->buckets[idx] = chain->next_chain;
+ if (!chain->next_chain)
+ remote->nonempty ^= ((uint64_t)1) << idx;
+ pthread_mutex_unlock(lock);
+
+ local->buckets[idx] = chain;
+ local->nonempty |= ((uint64_t)1) << idx;
+ chain->next_chain = NULL;
+ return 1;
+}
+
+static inline void
+nofl_holeset_clear(struct nofl_holeset *holes)
+{
+ memset(holes, 0, sizeof (*holes));
+}
+
+#endif // NOFL_HOLESET_H
diff --git a/src/nofl-space.h b/src/nofl-space.h
index 496d1c595..a623acd8e 100644
--- a/src/nofl-space.h
+++ b/src/nofl-space.h
@@ -38,6 +38,8 @@ STATIC_ASSERT_EQ(NOFL_GRANULE_SIZE, 1 <<
NOFL_GRANULE_SIZE_LOG_2);
STATIC_ASSERT_EQ(NOFL_MEDIUM_OBJECT_THRESHOLD,
NOFL_MEDIUM_OBJECT_GRANULE_THRESHOLD * NOFL_GRANULE_SIZE);
+#include "nofl-holeset.h"
+
#define NOFL_SLAB_SIZE (4 * 1024 * 1024)
#define NOFL_BLOCK_SIZE (64 * 1024)
#define NOFL_METADATA_BYTES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE)
@@ -166,6 +168,7 @@ struct nofl_space {
struct nofl_block_list promoted;
struct nofl_block_list old;
struct nofl_block_list evacuation_targets;
+ struct nofl_holeset holes;
pthread_mutex_t lock;
double evacuation_minimum_reserve;
double evacuation_reserve;
@@ -183,6 +186,7 @@ struct nofl_allocator {
uintptr_t alloc;
uintptr_t sweep;
struct nofl_block_ref block;
+ struct nofl_holeset holes;
};
#if GC_CONSERVATIVE_TRACE && GC_CONCURRENT_TRACE
@@ -610,6 +614,7 @@ static void
nofl_allocator_reset(struct nofl_allocator *alloc) {
alloc->alloc = alloc->sweep = 0;
alloc->block = nofl_block_null();
+ nofl_holeset_clear(&alloc->holes);
}
static int
@@ -738,6 +743,7 @@ static void
nofl_allocator_finish_hole(struct nofl_allocator *alloc) {
size_t granules = (alloc->sweep - alloc->alloc) / NOFL_GRANULE_SIZE;
if (granules) {
+ nofl_holeset_push_local(&alloc->holes, alloc->alloc, granules);
alloc->block.summary->holes_with_fragmentation++;
alloc->block.summary->fragmentation_granules += granules;
alloc->alloc = alloc->sweep;
@@ -839,6 +845,7 @@ static void
nofl_allocator_finish(struct nofl_allocator *alloc, struct nofl_space *space) {
if (nofl_allocator_has_block(alloc))
nofl_allocator_release_block(alloc, space);
+ nofl_holeset_release(&alloc->holes, &space->holes, &space->lock);
}
static int
@@ -936,6 +943,49 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc,
}
}
+static inline struct gc_ref
+nofl_allocate_bump_pointer(struct nofl_allocator *alloc, size_t size,
+ enum gc_allocation_kind kind) {
+ GC_ASSERT(size <= alloc->sweep - alloc->alloc);
+ struct gc_ref ret = gc_ref(alloc->alloc);
+ alloc->alloc += size;
+ gc_update_alloc_table(ret, size, kind);
+ return ret;
+}
+
+static struct gc_ref
+nofl_allocate_second_chance(struct nofl_allocator *alloc,
+ struct nofl_space *space, size_t granules) {
+ struct gc_ref local = nofl_holeset_try_pop(&alloc->holes, granules);
+ if (!gc_ref_is_null(local))
+ return local;
+ if (nofl_holeset_acquire(&alloc->holes, &space->holes, &space->lock,
+ granules))
+ return nofl_holeset_try_pop(&alloc->holes, granules);
+ return gc_ref_null();
+}
+
+static struct gc_ref
+nofl_allocate_slow(struct nofl_allocator *alloc, struct nofl_space *space,
+ size_t size, enum gc_allocation_kind kind) {
+ size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2;
+
+ if (nofl_allocator_next_hole_in_block_of_size(alloc, space, granules))
+ return nofl_allocate_bump_pointer(alloc, size, kind);
+
+ struct gc_ref second_chance =
+ nofl_allocate_second_chance(alloc, space, granules);
+ if (!gc_ref_is_null(second_chance)) {
+ gc_update_alloc_table(second_chance, size, kind);
+ return second_chance;
+ }
+
+ if (nofl_allocator_next_hole(alloc, space, granules))
+ return nofl_allocate_bump_pointer(alloc, size, kind);
+
+ return gc_ref_null();
+}
+
static struct gc_ref
nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space,
size_t size, enum gc_allocation_kind kind) {
@@ -943,16 +993,10 @@ nofl_allocate(struct nofl_allocator *alloc, struct
nofl_space *space,
GC_ASSERT(size <= gc_allocator_large_threshold());
size = align_up(size, NOFL_GRANULE_SIZE);
- if (alloc->alloc + size > alloc->sweep) {
- size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2;
- if (!nofl_allocator_next_hole(alloc, space, granules))
- return gc_ref_null();
- }
+ if (alloc->alloc + size <= alloc->sweep)
+ return nofl_allocate_bump_pointer(alloc, size, kind);
- struct gc_ref ret = gc_ref(alloc->alloc);
- alloc->alloc += size;
- gc_update_alloc_table(ret, size, kind);
- return ret;
+ return nofl_allocate_slow(alloc, space, size, kind);
}
static struct gc_ref
@@ -1538,6 +1582,7 @@ nofl_space_verify_before_restart(struct nofl_space
*space) {
static void
nofl_space_finish_gc(struct nofl_space *space,
enum gc_collection_kind gc_kind) {
+ nofl_holeset_clear(&space->holes);
space->last_collection_was_minor = (gc_kind == GC_COLLECTION_MINOR);
struct gc_lock lock = nofl_space_lock(space);
if (space->evacuating)