Author: Armin Rigo <ar...@tunes.org> Branch: stmgc-c8 Changeset: r77743:a3a33fcdc546 Date: 2015-06-01 15:58 +0100 http://bitbucket.org/pypy/pypy/changeset/a3a33fcdc546/
Log: import stmgc/f0d995d5609d, branch c8-locking diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision --- a/rpython/translator/stm/src_stm/revision +++ b/rpython/translator/stm/src_stm/revision @@ -1,1 +1,1 @@ -e55658d12179 +f0d995d5609d diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c --- a/rpython/translator/stm/src_stm/stm/core.c +++ b/rpython/translator/stm/src_stm/stm/core.c @@ -50,8 +50,8 @@ char *src_segment_base = (from_segnum >= 0 ? get_segment_base(from_segnum) : NULL); - assert(IMPLY(from_segnum >= 0, get_priv_segment(from_segnum)->modification_lock)); - assert(STM_PSEGMENT->modification_lock); + assert(IMPLY(from_segnum >= 0, modification_lock_check_rdlock(from_segnum))); + assert(modification_lock_check_wrlock(STM_SEGMENT->segment_num)); long my_segnum = STM_SEGMENT->segment_num; DEBUG_EXPECT_SEGFAULT(false); @@ -131,7 +131,7 @@ struct stm_commit_log_entry_s *from, struct stm_commit_log_entry_s *to) { - assert(STM_PSEGMENT->modification_lock); + assert(modification_lock_check_wrlock(STM_SEGMENT->segment_num)); assert(from->rev_num >= to->rev_num); /* walk BACKWARDS the commit log and update the page 'pagenum', initially at revision 'from', until we reach the revision 'to'. */ @@ -199,8 +199,8 @@ /* before copying anything, acquire modification locks from our and the other segment */ - uint64_t to_lock = (1UL << copy_from_segnum)| (1UL << my_segnum); - acquire_modification_lock_set(to_lock); + uint64_t to_lock = (1UL << copy_from_segnum); + acquire_modification_lock_set(to_lock, my_segnum); pagecopy(get_virtual_page(my_segnum, pagenum), get_virtual_page(copy_from_segnum, pagenum)); @@ -223,7 +223,7 @@ if (src_version->rev_num > target_version->rev_num) go_to_the_past(pagenum, src_version, target_version); - release_modification_lock_set(to_lock); + release_modification_lock_set(to_lock, my_segnum); release_all_privatization_locks(); } @@ -308,7 +308,7 @@ static void reset_modified_from_backup_copies(int segment_num); /* forward */ -static bool _stm_validate() +static bool _stm_validate(void) { /* returns true if we reached a valid state, or false if we need to abort now */ @@ -357,7 +357,7 @@ } /* Find the set of segments we need to copy from and lock them: */ - uint64_t segments_to_lock = 1UL << my_segnum; + uint64_t segments_to_lock = 0; cl = first_cl; while ((next_cl = cl->next) != NULL) { if (next_cl == INEV_RUNNING) { @@ -375,8 +375,8 @@ /* HERE */ - acquire_privatization_lock(STM_SEGMENT->segment_num); - acquire_modification_lock_set(segments_to_lock); + acquire_privatization_lock(my_segnum); + acquire_modification_lock_set(segments_to_lock, my_segnum); /* import objects from first_cl to last_cl: */ @@ -466,8 +466,8 @@ } /* done with modifications */ - release_modification_lock_set(segments_to_lock); - release_privatization_lock(STM_SEGMENT->segment_num); + release_modification_lock_set(segments_to_lock, my_segnum); + release_privatization_lock(my_segnum); } return !needs_abort; @@ -545,7 +545,7 @@ time" as the attach to commit log. Otherwise, another thread may see the new CL entry, import it, look for backup copies in this segment and find the old backup copies! */ - acquire_modification_lock(STM_SEGMENT->segment_num); + acquire_modification_lock_wr(STM_SEGMENT->segment_num); } /* try to attach to commit log: */ @@ -559,7 +559,7 @@ } if (is_commit) { - release_modification_lock(STM_SEGMENT->segment_num); + release_modification_lock_wr(STM_SEGMENT->segment_num); /* XXX: unfortunately, if we failed to attach our CL entry, we have to re-add the WB_EXECUTED flags before we try to validate again because of said condition (s.a) */ @@ -596,7 +596,7 @@ list_clear(STM_PSEGMENT->modified_old_objects); STM_PSEGMENT->last_commit_log_entry = new; - release_modification_lock(STM_SEGMENT->segment_num); + release_modification_lock_wr(STM_SEGMENT->segment_num); } } @@ -692,7 +692,7 @@ increment_total_allocated(slice_sz); memcpy(bk_slice, realobj + slice_off, slice_sz); - acquire_modification_lock(STM_SEGMENT->segment_num); + acquire_modification_lock_wr(STM_SEGMENT->segment_num); /* !! follows layout of "struct stm_undo_s" !! */ STM_PSEGMENT->modified_old_objects = list_append3( STM_PSEGMENT->modified_old_objects, @@ -700,7 +700,7 @@ (uintptr_t)bk_slice, /* bk_addr */ NEW_SLICE(slice_off, slice_sz)); dprintf(("> append slice %p, off=%lu, sz=%lu\n", bk_slice, slice_off, slice_sz)); - release_modification_lock(STM_SEGMENT->segment_num); + release_modification_lock_wr(STM_SEGMENT->segment_num); slice_off += slice_sz; } @@ -896,6 +896,8 @@ static void touch_all_pages_of_obj(object_t *obj, size_t obj_size) { + /* XXX should it be simpler, just really trying to read a dummy + byte in each page? */ int my_segnum = STM_SEGMENT->segment_num; uintptr_t end_page, first_page = ((uintptr_t)obj) / 4096UL; @@ -1345,7 +1347,7 @@ #pragma push_macro("STM_SEGMENT") #undef STM_PSEGMENT #undef STM_SEGMENT - assert(get_priv_segment(segment_num)->modification_lock); + assert(modification_lock_check_wrlock(segment_num)); struct stm_priv_segment_info_s *pseg = get_priv_segment(segment_num); struct list_s *list = pseg->modified_old_objects; @@ -1407,9 +1409,9 @@ _reset_object_cards(pseg, item, CARD_CLEAR, false, false); }); - acquire_modification_lock(segment_num); + acquire_modification_lock_wr(segment_num); reset_modified_from_backup_copies(segment_num); - release_modification_lock(segment_num); + release_modification_lock_wr(segment_num); _verify_cards_cleared_in_all_lists(pseg); stm_thread_local_t *tl = pseg->pub.running_thread; diff --git a/rpython/translator/stm/src_stm/stm/core.h b/rpython/translator/stm/src_stm/stm/core.h --- a/rpython/translator/stm/src_stm/stm/core.h +++ b/rpython/translator/stm/src_stm/stm/core.h @@ -74,11 +74,6 @@ struct stm_priv_segment_info_s { struct stm_segment_info_s pub; - /* lock protecting from concurrent modification of - 'modified_old_objects', page-revision-changes, ... - Always acquired in global order of segments to avoid deadlocks. */ - uint8_t modification_lock; - /* All the old objects (older than the current transaction) that the current transaction attempts to modify. This is used to track the STM status: these are old objects that where written @@ -297,7 +292,7 @@ static void synchronize_objects_flush(void); static void _signal_handler(int sig, siginfo_t *siginfo, void *context); -static bool _stm_validate(); +static bool _stm_validate(void); static inline bool was_read_remote(char *base, object_t *obj) { @@ -329,7 +324,7 @@ spinlock_release(get_priv_segment(segnum)->privatization_lock); } -static inline bool all_privatization_locks_acquired() +static inline bool all_privatization_locks_acquired(void) { #ifndef NDEBUG long l; @@ -343,7 +338,7 @@ #endif } -static inline void acquire_all_privatization_locks() +static inline void acquire_all_privatization_locks(void) { /* XXX: don't do for the sharing seg0 */ long l; @@ -352,60 +347,10 @@ } } -static inline void release_all_privatization_locks() +static inline void release_all_privatization_locks(void) { long l; for (l = NB_SEGMENTS-1; l >= 0; l--) { release_privatization_lock(l); } } - - - -/* Modification locks are used to prevent copying from a segment - where either the revision of some pages is inconsistent with the - rest, or the modified_old_objects list is being modified (bk_copys). - - Lock ordering: acquire privatization lock around acquiring a set - of modification locks! -*/ - -static inline void acquire_modification_lock(int segnum) -{ - spinlock_acquire(get_priv_segment(segnum)->modification_lock); -} - -static inline void release_modification_lock(int segnum) -{ - spinlock_release(get_priv_segment(segnum)->modification_lock); -} - -static inline void acquire_modification_lock_set(uint64_t seg_set) -{ - assert(NB_SEGMENTS <= 64); - OPT_ASSERT(seg_set < (1 << NB_SEGMENTS)); - - /* acquire locks in global order */ - int i; - for (i = 0; i < NB_SEGMENTS; i++) { - if ((seg_set & (1 << i)) == 0) - continue; - - spinlock_acquire(get_priv_segment(i)->modification_lock); - } -} - -static inline void release_modification_lock_set(uint64_t seg_set) -{ - assert(NB_SEGMENTS <= 64); - OPT_ASSERT(seg_set < (1 << NB_SEGMENTS)); - - int i; - for (i = 0; i < NB_SEGMENTS; i++) { - if ((seg_set & (1 << i)) == 0) - continue; - - assert(get_priv_segment(i)->modification_lock); - spinlock_release(get_priv_segment(i)->modification_lock); - } -} diff --git a/rpython/translator/stm/src_stm/stm/forksupport.c b/rpython/translator/stm/src_stm/stm/forksupport.c --- a/rpython/translator/stm/src_stm/stm/forksupport.c +++ b/rpython/translator/stm/src_stm/stm/forksupport.c @@ -120,6 +120,9 @@ just release these locks early */ s_mutex_unlock(); + /* Re-init these locks; might be needed after a fork() */ + setup_modification_locks(); + /* Unregister all other stm_thread_local_t, mostly as a way to free the memory used by the shadowstacks diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c --- a/rpython/translator/stm/src_stm/stm/gcpage.c +++ b/rpython/translator/stm/src_stm/stm/gcpage.c @@ -681,7 +681,7 @@ _stm_smallmalloc_sweep(); } -static void clean_up_commit_log_entries() +static void clean_up_commit_log_entries(void) { struct stm_commit_log_entry_s *cl, *next; diff --git a/rpython/translator/stm/src_stm/stm/locks.h b/rpython/translator/stm/src_stm/stm/locks.h new file mode 100644 --- /dev/null +++ b/rpython/translator/stm/src_stm/stm/locks.h @@ -0,0 +1,124 @@ +/* Imported by rpython/translator/stm/import_stmgc.py */ +/* Modification locks protect from concurrent modification of + 'modified_old_objects', page-revision-changes, ... + + Modification locks are used to prevent copying from a segment + where either the revision of some pages is inconsistent with the + rest, or the modified_old_objects list is being modified (bk_copys). + + Lock ordering: acquire privatization lock around acquiring a set + of modification locks! +*/ + +typedef struct { + pthread_rwlock_t lock; +#ifndef NDEBUG + volatile bool write_locked; +#endif +} modification_lock_t __attribute__((aligned(64))); + +static modification_lock_t _modlocks[NB_SEGMENTS - 1]; + + +static void setup_modification_locks(void) +{ + int i; + for (i = 1; i < NB_SEGMENTS; i++) { + if (pthread_rwlock_init(&_modlocks[i - 1].lock, NULL) != 0) + stm_fatalerror("pthread_rwlock_init: %m"); + } +} + +static void teardown_modification_locks(void) +{ + int i; + for (i = 1; i < NB_SEGMENTS; i++) + pthread_rwlock_destroy(&_modlocks[i - 1].lock); + memset(_modlocks, 0, sizeof(_modlocks)); +} + + +static inline void acquire_modification_lock_wr(int segnum) +{ + if (UNLIKELY(pthread_rwlock_wrlock(&_modlocks[segnum - 1].lock) != 0)) + stm_fatalerror("pthread_rwlock_wrlock: %m"); +#ifndef NDEBUG + assert(!_modlocks[segnum - 1].write_locked); + _modlocks[segnum - 1].write_locked = true; +#endif +} + +static inline void release_modification_lock_wr(int segnum) +{ +#ifndef NDEBUG + assert(_modlocks[segnum - 1].write_locked); + _modlocks[segnum - 1].write_locked = false; +#endif + if (UNLIKELY(pthread_rwlock_unlock(&_modlocks[segnum - 1].lock) != 0)) + stm_fatalerror("pthread_rwlock_unlock(wr): %m"); +} + +static void acquire_modification_lock_set(uint64_t readset, int write) +{ + /* acquire the modification lock in 'read' mode for all segments + in 'readset', plus the modification lock in 'write' mode for + the segment number 'write'. + */ + assert(NB_SEGMENTS <= 64); + OPT_ASSERT(readset < (1 << NB_SEGMENTS)); + assert((readset & 1) == 0); /* segment numbers normally start at 1 */ + assert(0 <= write && write < NB_SEGMENTS); /* use 0 to mean "nobody" */ + + /* acquire locks in global order */ + readset |= (1UL << write); + int i; + for (i = 1; i < NB_SEGMENTS; i++) { + if ((readset & (1UL << i)) == 0) + continue; + if (i == write) { + acquire_modification_lock_wr(write); + } + else { + if (UNLIKELY(pthread_rwlock_rdlock(&_modlocks[i - 1].lock) != 0)) + stm_fatalerror("pthread_rwlock_rdlock: %m"); + } + } +} + +static void release_modification_lock_set(uint64_t readset, int write) +{ + assert(NB_SEGMENTS <= 64); + OPT_ASSERT(readset < (1 << NB_SEGMENTS)); + + /* release lock order does not matter; prefer early release of + the write lock */ + if (write > 0) { + release_modification_lock_wr(write); + readset &= ~(1UL << write); + } + int i; + for (i = 1; i < NB_SEGMENTS; i++) { + if ((readset & (1UL << i)) == 0) + continue; + if (UNLIKELY(pthread_rwlock_unlock(&_modlocks[i - 1].lock) != 0)) + stm_fatalerror("pthread_rwlock_unlock(rd): %m"); + } +} + +#ifndef NDEBUG +static bool modification_lock_check_rdlock(int segnum) +{ + assert(segnum > 0); + if (_modlocks[segnum - 1].write_locked) + return false; + if (pthread_rwlock_trywrlock(&_modlocks[segnum - 1].lock) == 0) { + pthread_rwlock_unlock(&_modlocks[segnum - 1].lock); + return false; + } + return true; +} +static bool modification_lock_check_wrlock(int segnum) +{ + return segnum == 0 || _modlocks[segnum - 1].write_locked; +} +#endif diff --git a/rpython/translator/stm/src_stm/stm/misc.c b/rpython/translator/stm/src_stm/stm/misc.c --- a/rpython/translator/stm/src_stm/stm/misc.c +++ b/rpython/translator/stm/src_stm/stm/misc.c @@ -44,7 +44,7 @@ return obj->stm_flags & _STM_GCFLAG_CARDS_SET; } -long _stm_count_cl_entries() +long _stm_count_cl_entries(void) { struct stm_commit_log_entry_s *cl = &commit_log_root; @@ -115,7 +115,7 @@ return cards[get_index_to_card_index(idx)].rm; } -uint8_t _stm_get_transaction_read_version() +uint8_t _stm_get_transaction_read_version(void) { return STM_SEGMENT->transaction_read_version; } @@ -124,7 +124,7 @@ static struct stm_commit_log_entry_s *_last_cl_entry; static long _last_cl_entry_index; -void _stm_start_enum_last_cl_entry() +void _stm_start_enum_last_cl_entry(void) { _last_cl_entry = &commit_log_root; struct stm_commit_log_entry_s *cl = &commit_log_root; @@ -135,7 +135,7 @@ _last_cl_entry_index = 0; } -object_t *_stm_next_last_cl_entry() +object_t *_stm_next_last_cl_entry(void) { if (_last_cl_entry == &commit_log_root) return NULL; @@ -150,7 +150,7 @@ } -void _stm_smallmalloc_sweep_test() +void _stm_smallmalloc_sweep_test(void) { acquire_all_privatization_locks(); _stm_smallmalloc_sweep(); diff --git a/rpython/translator/stm/src_stm/stm/setup.c b/rpython/translator/stm/src_stm/stm/setup.c --- a/rpython/translator/stm/src_stm/stm/setup.c +++ b/rpython/translator/stm/src_stm/stm/setup.c @@ -127,6 +127,7 @@ private range of addresses. */ + setup_modification_locks(); setup_sync(); setup_nursery(); setup_gcpage(); @@ -174,6 +175,7 @@ teardown_gcpage(); teardown_smallmalloc(); teardown_pages(); + teardown_modification_locks(); } static void _shadowstack_trap_page(char *start, int prot) diff --git a/rpython/translator/stm/src_stm/stmgc.c b/rpython/translator/stm/src_stm/stmgc.c --- a/rpython/translator/stm/src_stm/stmgc.c +++ b/rpython/translator/stm/src_stm/stmgc.c @@ -18,6 +18,7 @@ #include "stm/marker.h" #include "stm/rewind_setjmp.h" #include "stm/finalizer.h" +#include "stm/locks.h" #include "stm/misc.c" #include "stm/list.c" #include "stm/smallmalloc.c" diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h --- a/rpython/translator/stm/src_stm/stmgc.h +++ b/rpython/translator/stm/src_stm/stmgc.h @@ -57,13 +57,16 @@ typedef struct stm_thread_local_s { /* rewind_setjmp's interface */ rewind_jmp_thread rjthread; + /* every thread should handle the shadow stack itself */ struct stm_shadowentry_s *shadowstack, *shadowstack_base; - /* a generic optional thread-local object */ object_t *thread_local_obj; - + /* in case this thread runs a transaction that aborts, + the following raw region of memory is cleared. */ char *mem_clear_on_abort; size_t mem_bytes_to_clear_on_abort; + /* after an abort, some details about the abort are stored there. + (this field is not modified on a successful commit) */ long last_abort__bytes_in_nursery; /* the next fields are handled internally by the library */ int associated_segment_num; @@ -73,34 +76,22 @@ void *creating_pthread[2]; } stm_thread_local_t; -#ifndef _STM_NURSERY_ZEROED -#define _STM_NURSERY_ZEROED 0 -#endif -#define _STM_GCFLAG_WRITE_BARRIER 0x01 -#define _STM_FAST_ALLOC (66*1024) -#define _STM_NSE_SIGNAL_ABORT 1 -#define _STM_NSE_SIGNAL_MAX 2 - -#define _STM_CARD_MARKED 1 /* should always be 1... */ -#define _STM_GCFLAG_CARDS_SET 0x8 -#define _STM_CARD_BITS 5 /* must be 5/6/7 for the pypy jit */ -#define _STM_CARD_SIZE (1 << _STM_CARD_BITS) -#define _STM_MIN_CARD_COUNT 17 -#define _STM_MIN_CARD_OBJ_SIZE (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT) - +/* this should use llvm's coldcc calling convention, + but it's not exposed to C code so far */ void _stm_write_slowpath(object_t *); void _stm_write_slowpath_card(object_t *, uintptr_t); object_t *_stm_allocate_slowpath(ssize_t); object_t *_stm_allocate_external(ssize_t); void _stm_become_inevitable(const char*); -void _stm_collectable_safe_point(); +void _stm_collectable_safe_point(void); +/* for tests, but also used in duhton: */ object_t *_stm_allocate_old(ssize_t size_rounded_up); char *_stm_real_address(object_t *o); #ifdef STM_TESTS #include <stdbool.h> -uint8_t _stm_get_transaction_read_version(); +uint8_t _stm_get_transaction_read_version(void); uint8_t _stm_get_card_value(object_t *obj, long idx); bool _stm_was_read(object_t *obj); bool _stm_was_written(object_t *obj); @@ -137,14 +128,32 @@ long _stm_count_objects_pointing_to_nursery(void); object_t *_stm_enum_modified_old_objects(long index); object_t *_stm_enum_objects_pointing_to_nursery(long index); -object_t *_stm_next_last_cl_entry(); -void _stm_start_enum_last_cl_entry(); -long _stm_count_cl_entries(); +object_t *_stm_next_last_cl_entry(void); +void _stm_start_enum_last_cl_entry(void); +long _stm_count_cl_entries(void); long _stm_count_old_objects_with_cards_set(void); object_t *_stm_enum_old_objects_with_cards_set(long index); uint64_t _stm_total_allocated(void); #endif + +#ifndef _STM_NURSERY_ZEROED +#define _STM_NURSERY_ZEROED 0 +#endif + +#define _STM_GCFLAG_WRITE_BARRIER 0x01 +#define _STM_FAST_ALLOC (66*1024) +#define _STM_NSE_SIGNAL_ABORT 1 +#define _STM_NSE_SIGNAL_MAX 2 + +#define _STM_CARD_MARKED 1 /* should always be 1... */ +#define _STM_GCFLAG_CARDS_SET 0x8 +#define _STM_CARD_BITS 5 /* must be 5/6/7 for the pypy jit */ +#define _STM_CARD_SIZE (1 << _STM_CARD_BITS) +#define _STM_MIN_CARD_COUNT 17 +#define _STM_MIN_CARD_OBJ_SIZE (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT) + + /* ==================== HELPERS ==================== */ #ifdef NDEBUG #define OPT_ASSERT(cond) do { if (!(cond)) __builtin_unreachable(); } while (0) @@ -165,30 +174,32 @@ */ #define STM_NB_SEGMENTS 4 +/* Structure of objects + -------------------- + Objects manipulated by the user program, and managed by this library, + must start with a "struct object_s" field. Pointers to any user object + must use the "TLPREFIX struct foo *" type --- don't forget TLPREFIX. + The best is to use typedefs like above. + + The object_s part contains some fields reserved for the STM library. + Right now this is only four bytes. +*/ struct object_s { uint32_t stm_flags; /* reserved for the STM library */ }; -extern ssize_t stmcb_size_rounded_up(struct object_s *); -void stmcb_trace(struct object_s *obj, void visit(object_t **)); -/* a special trace-callback that is only called for the marked - ranges of indices (using stm_write_card(o, index)) */ -extern void stmcb_trace_cards(struct object_s *, void (object_t **), - uintptr_t start, uintptr_t stop); -/* this function will be called on objects that support cards. - It returns the base_offset (in bytes) inside the object from - where the indices start, and item_size (in bytes) for the size of - one item */ -extern void stmcb_get_card_base_itemsize(struct object_s *, - uintptr_t offset_itemsize[2]); -/* returns whether this object supports cards. we will only call - stmcb_get_card_base_itemsize on objs that do so. */ -extern long stmcb_obj_supports_cards(struct object_s *); - - - +/* The read barrier must be called whenever the object 'obj' is read. + It is not required to call it before reading: it can be delayed for a + bit, but we must still be in the same "scope": no allocation, no + transaction commit, nothing that can potentially collect or do a safe + point (like stm_write() on a different object). Also, if we might + have finished the transaction and started the next one, then + stm_read() needs to be called again. It can be omitted if + stm_write() is called, or immediately after getting the object from + stm_allocate(), as long as the rules above are respected. +*/ __attribute__((always_inline)) static inline void stm_read(object_t *obj) { @@ -199,6 +210,11 @@ #define _STM_WRITE_CHECK_SLOWPATH(obj) \ UNLIKELY(((obj)->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0) +/* The write barrier must be called *before* doing any change to the + object 'obj'. If we might have finished the transaction and started + the next one, then stm_write() needs to be called again. It is not + necessary to call it immediately after stm_allocate(). +*/ __attribute__((always_inline)) static inline void stm_write(object_t *obj) { @@ -206,7 +222,14 @@ _stm_write_slowpath(obj); } - +/* The following is a GC-optimized barrier that works on the granularity + of CARD_SIZE. It can be used on any array object, but it is only + useful with those that were internally marked with GCFLAG_HAS_CARDS. + It has the same purpose as stm_write() for TM and allows write-access + to a part of an object/array. + 'index' is the array-item-based position within the object, which + is measured in units returned by stmcb_get_card_base_itemsize(). +*/ __attribute__((always_inline)) static inline void stm_write_card(object_t *obj, uintptr_t index) { @@ -245,7 +268,34 @@ } } +/* Must be provided by the user of this library. + The "size rounded up" must be a multiple of 8 and at least 16. + "Tracing" an object means enumerating all GC references in it, + by invoking the callback passed as argument. +*/ +extern ssize_t stmcb_size_rounded_up(struct object_s *); +void stmcb_trace(struct object_s *obj, void visit(object_t **)); +/* a special trace-callback that is only called for the marked + ranges of indices (using stm_write_card(o, index)) */ +extern void stmcb_trace_cards(struct object_s *, void (object_t **), + uintptr_t start, uintptr_t stop); +/* this function will be called on objects that support cards. + It returns the base_offset (in bytes) inside the object from + where the indices start, and item_size (in bytes) for the size of + one item */ +extern void stmcb_get_card_base_itemsize(struct object_s *, + uintptr_t offset_itemsize[2]); +/* returns whether this object supports cards. we will only call + stmcb_get_card_base_itemsize on objs that do so. */ +extern long stmcb_obj_supports_cards(struct object_s *); + + + +/* Allocate an object of the given size, which must be a multiple + of 8 and at least 16. In the fast-path, this is inlined to just + a few assembler instructions. +*/ __attribute__((always_inline)) static inline object_t *stm_allocate(ssize_t size_rounded_up) { @@ -267,21 +317,48 @@ return (object_t *)p; } - +/* Allocate a weakref object. Weakref objects have a + reference to an object at the byte-offset + stmcb_size_rounded_up(obj) - sizeof(void*) + You must assign the reference before the next collection may happen. + After that, you must not mutate the reference anymore. However, + it can become NULL after any GC if the reference dies during that + collection. + NOTE: For performance, we assume stmcb_size_rounded_up(weakref)==16 +*/ object_t *stm_allocate_weakref(ssize_t size_rounded_up); +/* stm_setup() needs to be called once at the beginning of the program. + stm_teardown() can be called at the end, but that's not necessary + and rather meant for tests. + */ void stm_setup(void); void stm_teardown(void); +/* The size of each shadow stack, in number of entries. + Must be big enough to accomodate all STM_PUSH_ROOTs! */ #define STM_SHADOW_STACK_DEPTH 163840 + +/* Push and pop roots from/to the shadow stack. Only allowed inside + transaction. */ #define STM_PUSH_ROOT(tl, p) ((tl).shadowstack++->ss = (object_t *)(p)) #define STM_POP_ROOT(tl, p) ((p) = (typeof(p))((--(tl).shadowstack)->ss)) #define STM_POP_ROOT_RET(tl) ((--(tl).shadowstack)->ss) +/* Every thread needs to have a corresponding stm_thread_local_t + structure. It may be a "__thread" global variable or something else. + Use the following functions at the start and at the end of a thread. + The user of this library needs to maintain the two shadowstack fields; + at any call to stm_allocate(), these fields should point to a range + of memory that can be walked in order to find the stack roots. +*/ void stm_register_thread_local(stm_thread_local_t *tl); void stm_unregister_thread_local(stm_thread_local_t *tl); +/* At some key places, like the entry point of the thread and in the + function with the interpreter's dispatch loop, you need to declare + a local variable of type 'rewind_jmp_buf' and call these macros. */ #define stm_rewind_jmp_enterprepframe(tl, rjbuf) \ rewind_jmp_enterprepframe(&(tl)->rjthread, rjbuf, (tl)->shadowstack) #define stm_rewind_jmp_enterframe(tl, rjbuf) \ @@ -303,37 +380,23 @@ rewind_jmp_enum_shadowstack(&(tl)->rjthread, callback) +/* Starting and ending transactions. stm_read(), stm_write() and + stm_allocate() should only be called from within a transaction. + The stm_start_transaction() call returns the number of times it + returned, starting at 0. If it is > 0, then the transaction was + aborted and restarted this number of times. */ long stm_start_transaction(stm_thread_local_t *tl); void stm_start_inevitable_transaction(stm_thread_local_t *tl); - void stm_commit_transaction(void); /* Temporary fix? Call this outside a transaction. If there is an inevitable transaction running somewhere else, wait until it finishes. */ void stm_wait_for_current_inevitable_transaction(void); +/* Abort the currently running transaction. This function never + returns: it jumps back to the stm_start_transaction(). */ void stm_abort_transaction(void) __attribute__((noreturn)); -void stm_collect(long level); - -long stm_identityhash(object_t *obj); -long stm_id(object_t *obj); -void stm_set_prebuilt_identityhash(object_t *obj, long hash); - -long stm_can_move(object_t *obj); - -object_t *stm_setup_prebuilt(object_t *); -object_t *stm_setup_prebuilt_weakref(object_t *); - -long stm_call_on_abort(stm_thread_local_t *, void *key, void callback(void *)); -long stm_call_on_commit(stm_thread_local_t *, void *key, void callback(void *)); - -static inline void stm_safe_point(void) { - if (STM_SEGMENT->nursery_end <= _STM_NSE_SIGNAL_MAX) - _stm_collectable_safe_point(); -} - - #ifdef STM_NO_AUTOMATIC_SETJMP int stm_is_inevitable(void); #else @@ -341,6 +404,10 @@ return !rewind_jmp_armed(&STM_SEGMENT->running_thread->rjthread); } #endif + +/* Turn the current transaction inevitable. + stm_become_inevitable() itself may still abort the transaction instead + of returning. */ static inline void stm_become_inevitable(stm_thread_local_t *tl, const char* msg) { assert(STM_SEGMENT->running_thread == tl); @@ -348,7 +415,64 @@ _stm_become_inevitable(msg); } +/* Forces a safe-point if needed. Normally not needed: this is + automatic if you call stm_allocate(). */ +static inline void stm_safe_point(void) { + if (STM_SEGMENT->nursery_end <= _STM_NSE_SIGNAL_MAX) + _stm_collectable_safe_point(); +} + +/* Forces a collection. */ +void stm_collect(long level); + + +/* Prepare an immortal "prebuilt" object managed by the GC. Takes a + pointer to an 'object_t', which should not actually be a GC-managed + structure but a real static structure. Returns the equivalent + GC-managed pointer. Works by copying it into the GC pages, following + and fixing all pointers it contains, by doing stm_setup_prebuilt() on + each of them recursively. (Note that this will leave garbage in the + static structure, but it should never be used anyway.) */ +object_t *stm_setup_prebuilt(object_t *); +/* The same, if the prebuilt object is actually a weakref. */ +object_t *stm_setup_prebuilt_weakref(object_t *); + +/* Hash, id. The id is just the address of the object (of the address + where it *will* be after the next minor collection). The hash is the + same, mangled -- except on prebuilt objects, where it can be + controlled for each prebuilt object individually. (Useful uor PyPy) */ +long stm_identityhash(object_t *obj); +long stm_id(object_t *obj); +void stm_set_prebuilt_identityhash(object_t *obj, long hash); + +/* Returns 1 if the object can still move (it's in the nursery), or 0 + otherwise. After a minor collection no object can move any more. */ +long stm_can_move(object_t *obj); + +/* If the current transaction aborts later, invoke 'callback(key)'. If + the current transaction commits, then the callback is forgotten. You + can only register one callback per key. You can call + 'stm_call_on_abort(key, NULL)' to cancel an existing callback + (returns 0 if there was no existing callback to cancel). + Note: 'key' must be aligned to a multiple of 8 bytes. */ +long stm_call_on_abort(stm_thread_local_t *, void *key, void callback(void *)); +/* If the current transaction commits later, invoke 'callback(key)'. If + the current transaction aborts, then the callback is forgotten. Same + restrictions as stm_call_on_abort(). If the transaction is or becomes + inevitable, 'callback(key)' is called immediately. */ +long stm_call_on_commit(stm_thread_local_t *, void *key, void callback(void *)); + + +/* Similar to stm_become_inevitable(), but additionally suspend all + other threads. A very heavy-handed way to make sure that no other + transaction is running concurrently. Avoid as much as possible. + Other transactions will continue running only after this transaction + commits. (xxx deprecated and may be removed) */ void stm_become_globally_unique_transaction(stm_thread_local_t *tl, const char *msg); + +/* Moves the transaction forward in time by validating the read and + write set with all commits that happened since the last validation + (explicit or implicit). */ void stm_validate(void); /* Temporarily stop all the other threads, by waiting until they @@ -407,8 +531,8 @@ /* The markers pushed in the shadowstack are an odd number followed by a regular object pointer. */ typedef struct { - uintptr_t odd_number; - object_t *object; + uintptr_t odd_number; /* marker odd number, or 0 if marker is missing */ + object_t *object; /* marker object, or NULL if marker is missing */ } stm_loc_marker_t; extern void (*stmcb_timing_event)(stm_thread_local_t *tl, /* the local thread */ enum stm_event_e event, _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit