Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r1964:6d3eb55f21f9 Date: 2015-11-05 14:23 +0100 http://bitbucket.org/pypy/stmgc/changeset/6d3eb55f21f9/
Log: Add stm_hashtable_iter*() functions. Contains a bit of delicate logic to avoid tracing tables repeatedly during minor collection, which I think should be correct but is probably not diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c --- a/c8/stm/gcpage.c +++ b/c8/stm/gcpage.c @@ -224,6 +224,9 @@ version and thus don't need tracing. */ static struct list_s *marked_objects_to_trace; +/* a list of hobj/hashtable pairs for all hashtables seen */ +static struct list_s *all_hashtables_seen = NULL; + /* we use the sharing seg0's pages for the GCFLAG_VISITED flag */ static inline struct object_s *mark_loc(object_t *obj) @@ -301,8 +304,6 @@ } -#define TRACE_FOR_MAJOR_COLLECTION (&mark_record_trace) - static void mark_and_trace( object_t *obj, char *segment_base, /* to trace obj in */ @@ -791,6 +792,7 @@ /* marking */ LIST_CREATE(marked_objects_to_trace); + LIST_CREATE(all_hashtables_seen); mark_visit_from_modified_objects(); mark_visit_from_markers(); mark_visit_from_roots(); @@ -815,6 +817,10 @@ sweep_large_objects(); sweep_small_objects(); + /* hashtables */ + stm_compact_hashtables(); + LIST_FREE(all_hashtables_seen); + dprintf((" | used after collection: %ld\n", (long)pages_ctl.total_allocated)); dprintf((" `----------------------------------------------\n")); diff --git a/c8/stm/hashtable.c b/c8/stm/hashtable.c --- a/c8/stm/hashtable.c +++ b/c8/stm/hashtable.c @@ -49,8 +49,12 @@ #define PERTURB_SHIFT 5 #define RESIZING_LOCK 0 -typedef struct { - uintptr_t mask; +#define TRACE_FLAG_OFF 0 +#define TRACE_FLAG_ONCE 1 +#define TRACE_FLAG_KEEPALIVE 2 + +typedef struct stm_hashtable_table_s { + uintptr_t mask; /* 'mask' is always immutable. */ /* 'resize_counter' start at an odd value, and is decremented (by 6) for every new item put in 'items'. When it crosses 0, we @@ -63,6 +67,8 @@ */ uintptr_t resize_counter; + uint8_t trace_flag; + stm_hashtable_entry_t *items[INITIAL_HASHTABLE_SIZE]; } stm_hashtable_table_t; @@ -79,6 +85,7 @@ { table->mask = itemcount - 1; table->resize_counter = itemcount * 4 + 1; + table->trace_flag = TRACE_FLAG_OFF; memset(table->items, 0, itemcount * sizeof(stm_hashtable_entry_t *)); } @@ -162,6 +169,7 @@ assert(biggertable); // XXX stm_hashtable_table_t *table = hashtable->table; + table->trace_flag = TRACE_FLAG_ONCE; table->resize_counter = (uintptr_t)biggertable; /* ^^^ this unlocks the table by writing a non-zero value to table->resize_counter, but the new value is a pointer to the @@ -485,6 +493,41 @@ static void _stm_compact_hashtable(struct object_s *hobj, stm_hashtable_t *hashtable) { + /* Walk the chained list that starts at 'hashtable->initial_table' + and follows the 'resize_counter' fields. Remove all tables + except (1) the initial one, (2) the most recent one, and (3) + the ones on which stm_hashtable_iter_tracefn() was called. + */ + stm_hashtable_table_t *most_recent_table = hashtable->table; + assert(!IS_EVEN(most_recent_table->resize_counter)); + /* set the "don't free me" flag on the most recent table */ + most_recent_table->trace_flag = TRACE_FLAG_KEEPALIVE; + + stm_hashtable_table_t *known_alive = &hashtable->initial_table; + known_alive->trace_flag = TRACE_FLAG_OFF; + /* a KEEPALIVE flag is ignored on the initial table: it is never + individually freed anyway */ + + while (known_alive != most_recent_table) { + uintptr_t rc = known_alive->resize_counter; + assert(IS_EVEN(rc)); + assert(rc != RESIZING_LOCK); + + stm_hashtable_table_t *next_table = (stm_hashtable_table_t *)rc; + if (next_table->trace_flag != TRACE_FLAG_KEEPALIVE) { + /* free this next table and relink the chained list to skip it */ + assert(IS_EVEN(next_table->resize_counter)); + known_alive->resize_counter = next_table->resize_counter; + free(next_table); + } + else { + /* this next table is kept alive */ + known_alive = next_table; + known_alive->trace_flag = TRACE_FLAG_OFF; + } + } + /* done the first part */ + stm_hashtable_table_t *table = hashtable->table; uintptr_t rc = table->resize_counter; assert(!IS_EVEN(rc)); @@ -515,35 +558,24 @@ dprintf(("compact with %ld items:\n", num_entries_times_6 / 6)); _stm_rehash_hashtable(hashtable, count, segnum); } +} - table = hashtable->table; - assert(!IS_EVEN(table->resize_counter)); - - if (table != &hashtable->initial_table) { - uintptr_t rc = hashtable->initial_table.resize_counter; - while (1) { - assert(IS_EVEN(rc)); - assert(rc != RESIZING_LOCK); - - stm_hashtable_table_t *old_table = (stm_hashtable_table_t *)rc; - if (old_table == table) - break; - rc = old_table->resize_counter; - free(old_table); - } - hashtable->initial_table.resize_counter = (uintptr_t)table; - assert(IS_EVEN(hashtable->initial_table.resize_counter)); +static void stm_compact_hashtables(void) +{ + uintptr_t i = all_hashtables_seen->count; + while (i > 0) { + i -= 2; + _stm_compact_hashtable( + (struct object_s *)all_hashtables_seen->items[i], + (stm_hashtable_t *)all_hashtables_seen->items[i + 1]); } } -void stm_hashtable_tracefn(struct object_s *hobj, stm_hashtable_t *hashtable, - void trace(object_t **)) +static void _hashtable_tracefn(stm_hashtable_table_t *table, + void trace(object_t **)) { - if (trace == TRACE_FOR_MAJOR_COLLECTION) - _stm_compact_hashtable(hobj, hashtable); - - stm_hashtable_table_t *table; - table = VOLATILE_HASHTABLE(hashtable)->table; + if (table->trace_flag == TRACE_FLAG_ONCE) + table->trace_flag = TRACE_FLAG_OFF; uintptr_t j, mask = table->mask; for (j = 0; j <= mask; j++) { @@ -554,3 +586,105 @@ } } } + +void stm_hashtable_tracefn(struct object_s *hobj, stm_hashtable_t *hashtable, + void trace(object_t **)) +{ + if (all_hashtables_seen != NULL) + all_hashtables_seen = list_append2(all_hashtables_seen, + (uintptr_t)hobj, + (uintptr_t)hashtable); + + _hashtable_tracefn(VOLATILE_HASHTABLE(hashtable)->table, trace); +} + + +/* Hashtable iterators */ + +/* TRACE_FLAG_ONCE: the table must be traced once if it supports an iterator + TRACE_FLAG_OFF: the table is the most recent table, or has already been + traced once + TRACE_FLAG_KEEPALIVE: during major collection only: mark tables that + must be kept alive because there are iterators +*/ + +struct stm_hashtable_table_s *stm_hashtable_iter(stm_hashtable_t *hashtable) +{ + /* Get the table. No synchronization is needed: we may miss some + entries that are being added, but they would contain NULL in + this segment anyway. */ + return VOLATILE_HASHTABLE(hashtable)->table; +} + +stm_hashtable_entry_t ** +stm_hashtable_iter_next(object_t *hobj, struct stm_hashtable_table_s *table, + stm_hashtable_entry_t **previous) +{ + /* Set the read marker on hobj for every item, in case we have + transaction breaks in-between. + */ + stm_read(hobj); + + /* Get the bounds of the part of the 'stm_hashtable_entry_t *' array + that we have to check */ + stm_hashtable_entry_t **pp, **last; + if (previous == NULL) + pp = table->items; + else + pp = previous + 1; + last = table->items + table->mask; + + /* Find the first non-null entry */ + stm_hashtable_entry_t *entry; + + while (pp <= last) { + entry = *(stm_hashtable_entry_t *volatile *)pp; + if (entry != NULL) { + stm_read((object_t *)entry); + if (entry->object != NULL) { + //fprintf(stderr, "stm_hashtable_iter_next(%p, %p, %p) = %p\n", + // hobj, table, previous, pp); + return pp; + } + } + ++pp; + } + //fprintf(stderr, "stm_hashtable_iter_next(%p, %p, %p) = %p\n", + // hobj, table, previous, NULL); + return NULL; +} + +void stm_hashtable_iter_tracefn(struct stm_hashtable_table_s *table, + void trace(object_t **)) +{ + if (all_hashtables_seen == NULL) { /* for minor collections */ + + /* During minor collection, tracing the table is only required + the first time: if it contains young objects, they must be + kept alive and have their address updated. We use + TRACE_FLAG_ONCE to know that. We don't need to do it if + our 'table' is the latest version, because in that case it + will be done by stm_hashtable_tracefn(). That's why + TRACE_FLAG_ONCE is only set when a more recent table is + attached. + + It is only needed once: non-latest-version tables are + immutable. We mark once all the entries as old, and + then these now-old objects stay alive until the next + major collection. + + Checking the flag can be done without synchronization: it + never wrong to call _hashtable_tracefn() too much, and the + only case where it *has to* be called occurs if the + hashtable object is still young (and not seen by other + threads). + */ + if (table->trace_flag == TRACE_FLAG_ONCE) + _hashtable_tracefn(table, trace); + } + else { /* for major collections */ + + /* Set this flag for _stm_compact_hashtable() */ + table->trace_flag = TRACE_FLAG_KEEPALIVE; + } +} diff --git a/c8/stm/hashtable.h b/c8/stm/hashtable.h new file mode 100644 --- /dev/null +++ b/c8/stm/hashtable.h @@ -0,0 +1,2 @@ + +static void stm_compact_hashtables(void); diff --git a/c8/stmgc.c b/c8/stmgc.c --- a/c8/stmgc.c +++ b/c8/stmgc.c @@ -19,6 +19,7 @@ #include "stm/finalizer.h" #include "stm/locks.h" #include "stm/detach.h" +#include "stm/hashtable.h" #include "stm/queue.h" #include "stm/misc.c" diff --git a/c8/stmgc.h b/c8/stmgc.h --- a/c8/stmgc.h +++ b/c8/stmgc.h @@ -767,6 +767,18 @@ object_t *object; }; +/* Hashtable iterators. You get a raw 'table' pointer when you make an + iterator, which you pass to stm_hashtable_iter_next(). When the GC + traces, you must keep the table pointer alive with + stm_hashtable_iter_tracefn(). This may or may not return items added + after stm_hashtable_iter() was called. */ +struct stm_hashtable_table_s *stm_hashtable_iter(stm_hashtable_t *); +stm_hashtable_entry_t ** +stm_hashtable_iter_next(object_t *hobj, struct stm_hashtable_table_s *table, + stm_hashtable_entry_t **previous); +void stm_hashtable_iter_tracefn(struct stm_hashtable_table_s *table, + void trace(object_t **)); + /* Queues. The items you put() and get() back are in random order. Like hashtables, the type 'stm_queue_t' is not an object type at diff --git a/c8/test/support.py b/c8/test/support.py --- a/c8/test/support.py +++ b/c8/test/support.py @@ -222,6 +222,15 @@ long _stm_hashtable_list(object_t *o, stm_hashtable_t *h, object_t *entries); +object_t *_hashtable_iter(stm_hashtable_t *); +stm_hashtable_entry_t *_hashtable_iter_next(object_t *, object_t *); +struct stm_hashtable_table_s *stm_hashtable_iter(stm_hashtable_t *); +stm_hashtable_entry_t ** +stm_hashtable_iter_next(object_t *hobj, struct stm_hashtable_table_s *table, + stm_hashtable_entry_t **previous); +void stm_hashtable_iter_tracefn(struct stm_hashtable_table_s *table, + void trace(object_t **)); + void _set_hashtable(object_t *obj, stm_hashtable_t *h); stm_hashtable_t *_get_hashtable(object_t *obj); uintptr_t _get_entry_index(stm_hashtable_entry_t *entry); @@ -419,6 +428,37 @@ return stm_hashtable_list(o, h, NULL); } +struct myiter_s { + struct myobj_s common; + struct stm_hashtable_table_s *table; + stm_hashtable_entry_t **previous; +}; +typedef TLPREFIX struct myiter_s myiter_t; + +object_t *_hashtable_iter(stm_hashtable_t *h) +{ + myiter_t *iter = (myiter_t *)stm_allocate(sizeof(myiter_t)); + _set_type_id(&iter->common.hdr, 421416); + iter->table = stm_hashtable_iter(h); + iter->previous = NULL; + return (object_t *)iter; +} + +stm_hashtable_entry_t *_hashtable_iter_next(object_t *hobj, object_t *iterobj) +{ + stm_write(iterobj); + myiter_t *iter = (myiter_t *)iterobj; + assert(iter->common.type_id == 421416); + stm_hashtable_entry_t **pentry; + pentry = stm_hashtable_iter_next(hobj, iter->table, iter->previous); + if (pentry == NULL) + return NULL; + iter->previous = pentry; + //fprintf(stderr, "\tcontaining %p: index=%ld, object=%p\n", + // *pentry, (*pentry)->index, (*pentry)->object); + return *pentry; +} + void _set_queue(object_t *obj, stm_queue_t *q) { @@ -478,6 +518,9 @@ if (myobj->type_id == 421417) { /* queue */ return sizeof(struct myobj_s) + 1 * sizeof(void*); } + if (myobj->type_id == 421416) { /* hashtable iterator */ + return sizeof(struct myobj_s) + 2 * sizeof(void*); + } /* basic case: tid equals 42 plus the size of the object */ assert(myobj->type_id >= 42 + sizeof(struct myobj_s)); assert((myobj->type_id - 42) >= 16); @@ -514,6 +557,13 @@ stm_queue_tracefn(q, visit); return; } + if (myobj->type_id == 421416) { + /* hashtable iterator */ + struct stm_hashtable_table_s *table; + table = *(struct stm_hashtable_table_s **)(myobj + 1); + stm_hashtable_iter_tracefn(table, visit); + return; + } if (myobj->type_id < 421420) { /* basic case: no references */ return; @@ -538,6 +588,7 @@ assert(myobj->type_id != 421419); assert(myobj->type_id != 421418); assert(myobj->type_id != 421417); + assert(myobj->type_id != 421416); if (myobj->type_id < 421420) { /* basic case: no references */ return; @@ -557,6 +608,7 @@ assert(myobj->type_id != 421419); assert(myobj->type_id != 421418); assert(myobj->type_id != 421417); + assert(myobj->type_id != 421416); if (myobj->type_id < 421420) { offset_itemsize[0] = SIZEOF_MYOBJ; offset_itemsize[1] = 1; @@ -699,6 +751,12 @@ lib._set_hashtable(o, h) return o +def hashtable_iter(hobj): + return lib._hashtable_iter(get_hashtable(hobj)) + +def hashtable_iter_next(hobj, o): + return lib._hashtable_iter_next(hobj, o) + def hashtable_lookup(hto, ht, idx): return ffi.cast("object_t*", lib.stm_hashtable_lookup(hto, ht, idx)) diff --git a/c8/test/test_hashtable.py b/c8/test/test_hashtable.py --- a/c8/test/test_hashtable.py +++ b/c8/test/test_hashtable.py @@ -455,8 +455,96 @@ stm_major_collect() self.commit_transaction() + def test_iter_direct(self): + self.start_transaction() + h = self.allocate_hashtable() + tl0 = self.tls[self.current_thread] + expected = [] + for i in range(29): + lp = stm_allocate(32) + htset(h, 19 ^ i, lp, tl0) + expected.append((19 ^ i, lp)) + table = lib.stm_hashtable_iter(get_hashtable(h)) + previous = ffi.NULL + seen = [] + while True: + next_pentry = lib.stm_hashtable_iter_next(h, table, previous) + if next_pentry == ffi.NULL: + break + previous = next_pentry + entry = next_pentry[0] + seen.append((lib._get_entry_index(entry), + lib._get_entry_object(entry))) + assert len(seen) == 29 + assert sorted(seen) == sorted(expected) + def test_iter_object(self): + self.start_transaction() + h = self.allocate_hashtable() + tl0 = self.tls[self.current_thread] + expected = [] + for i in range(29): + lp = stm_allocate(32) + htset(h, 19 ^ i, lp, tl0) + expected.append((19 ^ i, lp)) + iterobj = hashtable_iter(h) + seen = [] + while True: + entry = hashtable_iter_next(h, iterobj) + if entry == ffi.NULL: + break + seen.append((lib._get_entry_index(entry), + lib._get_entry_object(entry))) + assert len(seen) == 29 + assert sorted(seen) == sorted(expected) + def test_iter_collections(self): + def advance(iterobj, seen): + entry = hashtable_iter_next(h, iterobj) + if entry == ffi.NULL: + seen.add('DONE') + else: + item = (lib._get_entry_index(entry), + lib._get_entry_object(entry)) + assert htget(h, item[0]) == item[1] + assert item[0] not in seen + seen.add(item[0]) + + self.start_transaction() + h = self.allocate_hashtable() + tl0 = self.tls[self.current_thread] + iterators = [] + for i in range(250): + lp = stm_allocate(32) + htset(h, 19 ^ i, lp, tl0) + iterators.append((hashtable_iter(h), i, set())) + + iterobj, _, seen = random.choice(iterators) + if 'DONE' not in seen: + advance(iterobj, seen) + + self.push_root(h) + for iterobj, _, _ in iterators: + self.push_root(iterobj) + if i % 49 == 48: + stm_major_collect() + elif i % 7 == 6: + stm_minor_collect() + for i, prev in reversed(list(enumerate(iterators))): + iterators[i] = (self.pop_root(),) + prev[1:] + h = self.pop_root() + + for iterobj, _, seen in iterators: + while 'DONE' not in seen: + advance(iterobj, seen) + + maximum = set([j ^ 19 for j in range(250)]) + maximum.add('DONE') + for iterobj, i, seen in iterators: + assert seen.issubset(maximum) + for j in range(i): + assert j ^ 19 in seen + # we may also have seen more items added later class TestRandomHashtable(BaseTestHashtable): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit