Author: Armin Rigo <ar...@tunes.org> Branch: stmgc-c7 Changeset: r71045:73611e136820 Date: 2014-04-28 18:45 +0200 http://bitbucket.org/pypy/pypy/changeset/73611e136820/
Log: import stmgc/4bde66e3b621 (branch "marker") diff --git a/rpython/translator/stm/src_stm/revision b/rpython/translator/stm/src_stm/revision --- a/rpython/translator/stm/src_stm/revision +++ b/rpython/translator/stm/src_stm/revision @@ -1,1 +1,1 @@ -889897f2f5ef +4bde66e3b621 diff --git a/rpython/translator/stm/src_stm/stm/contention.c b/rpython/translator/stm/src_stm/stm/contention.c --- a/rpython/translator/stm/src_stm/stm/contention.c +++ b/rpython/translator/stm/src_stm/stm/contention.c @@ -100,7 +100,8 @@ static void contention_management(uint8_t other_segment_num, - enum contention_kind_e kind) + enum contention_kind_e kind, + object_t *obj) { assert(_has_mutex()); assert(other_segment_num != STM_SEGMENT->segment_num); @@ -162,10 +163,12 @@ itself already paused here. */ contmgr.other_pseg->signal_when_done = true; + marker_contention(kind, false, other_segment_num, obj); change_timing_state(wait_category); - /* XXX should also tell other_pseg "please commit soon" */ + /* tell the other to commit ASAP */ + signal_other_to_commit_soon(contmgr.other_pseg); dprintf(("pausing...\n")); cond_signal(C_AT_SAFE_POINT); @@ -177,12 +180,22 @@ if (must_abort()) abort_with_mutex(); - change_timing_state(STM_TIME_RUN_CURRENT); + struct stm_priv_segment_info_s *pseg = + get_priv_segment(STM_SEGMENT->segment_num); + double elapsed = + change_timing_state_tl(pseg->pub.running_thread, + STM_TIME_RUN_CURRENT); + marker_copy(pseg->pub.running_thread, pseg, + wait_category, elapsed); } else if (!contmgr.abort_other) { + /* tell the other to commit ASAP, since it causes aborts */ + signal_other_to_commit_soon(contmgr.other_pseg); + dprintf(("abort in contention\n")); STM_SEGMENT->nursery_end = abort_category; + marker_contention(kind, false, other_segment_num, obj); abort_with_mutex(); } @@ -190,6 +203,7 @@ /* We have to signal the other thread to abort, and wait until it does. */ contmgr.other_pseg->pub.nursery_end = abort_category; + marker_contention(kind, true, other_segment_num, obj); int sp = contmgr.other_pseg->safe_point; switch (sp) { @@ -257,10 +271,18 @@ abort_data_structures_from_segment_num(other_segment_num); } dprintf(("killed other thread\n")); + + /* we should commit soon, we caused an abort */ + //signal_other_to_commit_soon(get_priv_segment(STM_SEGMENT->segment_num)); + if (!STM_PSEGMENT->signalled_to_commit_soon) { + STM_PSEGMENT->signalled_to_commit_soon = true; + stmcb_commit_soon(); + } } } -static void write_write_contention_management(uintptr_t lock_idx) +static void write_write_contention_management(uintptr_t lock_idx, + object_t *obj) { s_mutex_lock(); @@ -271,7 +293,7 @@ assert(get_priv_segment(other_segment_num)->write_lock_num == prev_owner); - contention_management(other_segment_num, WRITE_WRITE_CONTENTION); + contention_management(other_segment_num, WRITE_WRITE_CONTENTION, obj); /* now we return into _stm_write_slowpath() and will try again to acquire the write lock on our object. */ @@ -280,12 +302,13 @@ s_mutex_unlock(); } -static void write_read_contention_management(uint8_t other_segment_num) +static void write_read_contention_management(uint8_t other_segment_num, + object_t *obj) { - contention_management(other_segment_num, WRITE_READ_CONTENTION); + contention_management(other_segment_num, WRITE_READ_CONTENTION, obj); } static void inevitable_contention_management(uint8_t other_segment_num) { - contention_management(other_segment_num, INEVITABLE_CONTENTION); + contention_management(other_segment_num, INEVITABLE_CONTENTION, NULL); } diff --git a/rpython/translator/stm/src_stm/stm/contention.h b/rpython/translator/stm/src_stm/stm/contention.h --- a/rpython/translator/stm/src_stm/stm/contention.h +++ b/rpython/translator/stm/src_stm/stm/contention.h @@ -1,11 +1,14 @@ /* Imported by rpython/translator/stm/import_stmgc.py */ -static void write_write_contention_management(uintptr_t lock_idx); -static void write_read_contention_management(uint8_t other_segment_num); +static void write_write_contention_management(uintptr_t lock_idx, + object_t *obj); +static void write_read_contention_management(uint8_t other_segment_num, + object_t *obj); static void inevitable_contention_management(uint8_t other_segment_num); static inline bool is_abort(uintptr_t nursery_end) { - return (nursery_end <= _STM_NSE_SIGNAL_MAX && nursery_end != NSE_SIGPAUSE); + return (nursery_end <= _STM_NSE_SIGNAL_MAX && nursery_end != NSE_SIGPAUSE + && nursery_end != NSE_SIGCOMMITSOON); } static inline bool is_aborting_now(uint8_t other_segment_num) { diff --git a/rpython/translator/stm/src_stm/stm/core.c b/rpython/translator/stm/src_stm/stm/core.c --- a/rpython/translator/stm/src_stm/stm/core.c +++ b/rpython/translator/stm/src_stm/stm/core.c @@ -15,13 +15,10 @@ #define EVENTUALLY(condition) \ { \ if (!(condition)) { \ - int _i; \ - for (_i = 1; _i <= NB_SEGMENTS; _i++) \ - spinlock_acquire(lock_pages_privatizing[_i]); \ + acquire_privatization_lock(); \ if (!(condition)) \ stm_fatalerror("fails: " #condition); \ - for (_i = 1; _i <= NB_SEGMENTS; _i++) \ - spinlock_release(lock_pages_privatizing[_i]); \ + release_privatization_lock(); \ } \ } #endif @@ -77,9 +74,15 @@ assert(lock_idx < sizeof(write_locks)); retry: if (write_locks[lock_idx] == 0) { + /* A lock to prevent reading garbage from + lookup_other_thread_recorded_marker() */ + acquire_marker_lock(STM_SEGMENT->segment_base); + if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[lock_idx], - 0, lock_num))) + 0, lock_num))) { + release_marker_lock(STM_SEGMENT->segment_base); goto retry; + } dprintf_test(("write_slowpath %p -> mod_old\n", obj)); @@ -87,6 +90,15 @@ Add it to the list 'modified_old_objects'. */ LIST_APPEND(STM_PSEGMENT->modified_old_objects, obj); + /* Add the current marker, recording where we wrote to this object */ + uintptr_t marker[2]; + marker_fetch(STM_SEGMENT->running_thread, marker); + STM_PSEGMENT->modified_old_objects_markers = + list_append2(STM_PSEGMENT->modified_old_objects_markers, + marker[0], marker[1]); + + release_marker_lock(STM_SEGMENT->segment_base); + /* We need to privatize the pages containing the object, if they are still SHARED_PAGE. The common case is that there is only one page in total. */ @@ -128,7 +140,7 @@ else { /* call the contention manager, and then retry (unless we were aborted). */ - write_write_contention_management(lock_idx); + write_write_contention_management(lock_idx, obj); goto retry; } @@ -196,7 +208,13 @@ assert(STM_PSEGMENT->transaction_state == TS_NONE); change_timing_state(STM_TIME_RUN_CURRENT); STM_PSEGMENT->start_time = tl->_timing_cur_start; + STM_PSEGMENT->signalled_to_commit_soon = false; STM_PSEGMENT->safe_point = SP_RUNNING; +#ifndef NDEBUG + STM_PSEGMENT->marker_inev[1] = 99999999999999999L; +#endif + if (jmpbuf == NULL) + marker_fetch_inev(); STM_PSEGMENT->transaction_state = (jmpbuf != NULL ? TS_REGULAR : TS_INEVITABLE); STM_SEGMENT->jmpbuf_ptr = jmpbuf; @@ -224,12 +242,17 @@ } assert(list_is_empty(STM_PSEGMENT->modified_old_objects)); + assert(list_is_empty(STM_PSEGMENT->modified_old_objects_markers)); assert(list_is_empty(STM_PSEGMENT->young_weakrefs)); assert(tree_is_cleared(STM_PSEGMENT->young_outside_nursery)); assert(tree_is_cleared(STM_PSEGMENT->nursery_objects_shadows)); assert(tree_is_cleared(STM_PSEGMENT->callbacks_on_abort)); assert(STM_PSEGMENT->objects_pointing_to_nursery == NULL); assert(STM_PSEGMENT->large_overflow_objects == NULL); +#ifndef NDEBUG + /* this should not be used when objects_pointing_to_nursery == NULL */ + STM_PSEGMENT->modified_old_objects_markers_num_old = 99999999999999999L; +#endif check_nursery_at_transaction_start(); } @@ -264,7 +287,7 @@ ({ if (was_read_remote(remote_base, item, remote_version)) { /* A write-read conflict! */ - write_read_contention_management(i); + write_read_contention_management(i, item); /* If we reach this point, we didn't abort, but maybe we had to wait for the other thread to commit. If we @@ -338,9 +361,12 @@ /* Copy around the version of 'obj' that lives in our own segment. It is first copied into the shared pages, and then into other segments' own private pages. + + Must be called with the privatization lock acquired. */ assert(!_is_young(obj)); assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); + assert(STM_PSEGMENT->privatization_lock == 1); uintptr_t start = (uintptr_t)obj; uintptr_t first_page = start / 4096UL; @@ -382,26 +408,9 @@ memcpy(dst, src, copy_size); } else { - EVENTUALLY(memcmp(dst, src, copy_size) == 0); /* same page */ + assert(memcmp(dst, src, copy_size) == 0); /* same page */ } - /* Do a full memory barrier. We must make sure that other - CPUs see the changes we did to the shared page ("S", - above) before we check the other segments below with - is_private_page(). Otherwise, we risk the following: - this CPU writes "S" but the writes are not visible yet; - then it checks is_private_page() and gets false, and does - nothing more; just afterwards another CPU sets its own - private_page bit and copies the page; but it risks doing - so before seeing the "S" writes. - - XXX what is the cost of this? If it's high, then we - should reorganize the code so that we buffer the second - parts and do them by bunch of N, after just one call to - __sync_synchronize()... - */ - __sync_synchronize(); - for (i = 1; i <= NB_SEGMENTS; i++) { if (i == myself) continue; @@ -418,7 +427,7 @@ memcpy(dst, src, copy_size); } else { - EVENTUALLY(!memcmp(dst, src, copy_size)); /* same page */ + assert(!memcmp(dst, src, copy_size)); /* same page */ } } @@ -432,12 +441,15 @@ if (STM_PSEGMENT->large_overflow_objects == NULL) return; + acquire_privatization_lock(); LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *, synchronize_object_now(item)); + release_privatization_lock(); } static void push_modified_to_other_segments(void) { + acquire_privatization_lock(); LIST_FOREACH_R( STM_PSEGMENT->modified_old_objects, object_t * /*item*/, @@ -457,8 +469,10 @@ private pages as needed */ synchronize_object_now(item); })); + release_privatization_lock(); list_clear(STM_PSEGMENT->modified_old_objects); + list_clear(STM_PSEGMENT->modified_old_objects_markers); } static void _finish_transaction(int attribute_to) @@ -597,6 +611,7 @@ })); list_clear(pseg->modified_old_objects); + list_clear(pseg->modified_old_objects_markers); } static void abort_data_structures_from_segment_num(int segment_num) @@ -621,8 +636,9 @@ (int)pseg->transaction_state); } - /* look up and preserve the marker information as a string */ - marker_fetch_expand(pseg); + /* if we don't have marker information already, look up and preserve + the marker information from the shadowstack as a string */ + marker_default_for_abort(pseg); /* throw away the content of the nursery */ long bytes_in_nursery = throw_away_nursery(pseg); @@ -706,6 +722,7 @@ if (STM_PSEGMENT->transaction_state == TS_REGULAR) { dprintf(("become_inevitable: %s\n", msg)); + marker_fetch_inev(); wait_for_end_of_inevitable_transaction(NULL); STM_PSEGMENT->transaction_state = TS_INEVITABLE; STM_SEGMENT->jmpbuf_ptr = NULL; diff --git a/rpython/translator/stm/src_stm/stm/core.h b/rpython/translator/stm/src_stm/stm/core.h --- a/rpython/translator/stm/src_stm/stm/core.h +++ b/rpython/translator/stm/src_stm/stm/core.h @@ -79,9 +79,17 @@ /* List of old objects (older than the current transaction) that the current transaction attempts to modify. This is used to track the STM status: they are old objects that where written to and - that need to be copied to other segments upon commit. */ + that need to be copied to other segments upon commit. Note that + every object takes three list items: the object, and two words for + the location marker. */ struct list_s *modified_old_objects; + /* For each entry in 'modified_old_objects', we have two entries + in the following list, which give the marker at the time we added + the entry to modified_old_objects. */ + struct list_s *modified_old_objects_markers; + uintptr_t modified_old_objects_markers_num_old; + /* List of out-of-nursery objects that may contain pointers to nursery objects. This is used to track the GC status: they are all objects outside the nursery on which an stm_write() occurred @@ -149,12 +157,31 @@ /* For sleeping contention management */ bool signal_when_done; + /* This lock is acquired when that segment calls synchronize_object_now. + On the rare event of a page_privatize(), the latter will acquire + all the locks in all segments. Otherwise, for the common case, + it's cheap. (The set of all 'privatization_lock' in all segments + works like one single read-write lock, with page_privatize() acquiring + the write lock; but this variant is more efficient for the case of + many reads / rare writes.) */ + uint8_t privatization_lock; + + /* This lock is acquired when we mutate 'modified_old_objects' but + we don't have the global mutex. It is also acquired during minor + collection. It protects against a different thread that tries to + get this segment's marker corresponding to some object, or to + expand the marker into a full description. */ + uint8_t marker_lock; + /* In case of abort, we restore the 'shadowstack' field and the 'thread_local_obj' field. */ struct stm_shadowentry_s *shadowstack_at_start_of_transaction; object_t *threadlocal_at_start_of_transaction; struct stm_shadowentry_s *shadowstack_at_abort; + /* Already signalled to commit soon: */ + bool signalled_to_commit_soon; + /* For debugging */ #ifndef NDEBUG pthread_t running_pthread; @@ -162,6 +189,8 @@ /* Temporarily stores the marker information */ char marker_self[_STM_MARKER_LEN]; + char marker_other[_STM_MARKER_LEN]; + uintptr_t marker_inev[2]; /* marker where this thread became inevitable */ }; enum /* safe_point */ { @@ -231,3 +260,31 @@ static void copy_object_to_shared(object_t *obj, int source_segment_num); static void synchronize_object_now(object_t *obj); + +static inline void acquire_privatization_lock(void) +{ + uint8_t *lock = (uint8_t *)REAL_ADDRESS(STM_SEGMENT->segment_base, + &STM_PSEGMENT->privatization_lock); + spinlock_acquire(*lock); +} + +static inline void release_privatization_lock(void) +{ + uint8_t *lock = (uint8_t *)REAL_ADDRESS(STM_SEGMENT->segment_base, + &STM_PSEGMENT->privatization_lock); + spinlock_release(*lock); +} + +static inline void acquire_marker_lock(char *segment_base) +{ + uint8_t *lock = (uint8_t *)REAL_ADDRESS(segment_base, + &STM_PSEGMENT->marker_lock); + spinlock_acquire(*lock); +} + +static inline void release_marker_lock(char *segment_base) +{ + uint8_t *lock = (uint8_t *)REAL_ADDRESS(segment_base, + &STM_PSEGMENT->marker_lock); + spinlock_release(*lock); +} diff --git a/rpython/translator/stm/src_stm/stm/gcpage.c b/rpython/translator/stm/src_stm/stm/gcpage.c --- a/rpython/translator/stm/src_stm/stm/gcpage.c +++ b/rpython/translator/stm/src_stm/stm/gcpage.c @@ -93,17 +93,20 @@ /* uncommon case: need to initialize some more pages */ spinlock_acquire(lock_growth_large); - if (addr + size > uninitialized_page_start) { + char *start = uninitialized_page_start; + if (addr + size > start) { uintptr_t npages; - npages = (addr + size - uninitialized_page_start) / 4096UL; + npages = (addr + size - start) / 4096UL; npages += GCPAGE_NUM_PAGES; - if (uninitialized_page_stop - uninitialized_page_start < - npages * 4096UL) { + if (uninitialized_page_stop - start < npages * 4096UL) { stm_fatalerror("out of memory!"); /* XXX */ } - setup_N_pages(uninitialized_page_start, npages); - __sync_synchronize(); - uninitialized_page_start += npages * 4096UL; + setup_N_pages(start, npages); + if (!__sync_bool_compare_and_swap(&uninitialized_page_start, + start, + start + npages * 4096UL)) { + stm_fatalerror("uninitialized_page_start changed?"); + } } spinlock_release(lock_growth_large); return addr; @@ -419,6 +422,23 @@ } } +static void mark_visit_from_markers(void) +{ + long j; + for (j = 1; j <= NB_SEGMENTS; j++) { + char *base = get_segment_base(j); + struct list_s *lst = get_priv_segment(j)->modified_old_objects_markers; + uintptr_t i; + for (i = list_count(lst); i > 0; i -= 2) { + mark_visit_object((object_t *)list_item(lst, i - 1), base); + } + if (get_priv_segment(j)->transaction_state == TS_INEVITABLE) { + uintptr_t marker_inev_obj = get_priv_segment(j)->marker_inev[1]; + mark_visit_object((object_t *)marker_inev_obj, base); + } + } +} + static void clean_up_segment_lists(void) { long i; @@ -521,6 +541,7 @@ /* marking */ LIST_CREATE(mark_objects_to_trace); mark_visit_from_modified_objects(); + mark_visit_from_markers(); mark_visit_from_roots(); LIST_FREE(mark_objects_to_trace); diff --git a/rpython/translator/stm/src_stm/stm/largemalloc.c b/rpython/translator/stm/src_stm/stm/largemalloc.c --- a/rpython/translator/stm/src_stm/stm/largemalloc.c +++ b/rpython/translator/stm/src_stm/stm/largemalloc.c @@ -354,6 +354,9 @@ mscan->size = request_size; mscan->prev_size = BOTH_CHUNKS_USED; increment_total_allocated(request_size + LARGE_MALLOC_OVERHEAD); +#ifndef NDEBUG + memset((char *)&mscan->d, 0xda, request_size); +#endif lm_unlock(); diff --git a/rpython/translator/stm/src_stm/stm/list.h b/rpython/translator/stm/src_stm/stm/list.h --- a/rpython/translator/stm/src_stm/stm/list.h +++ b/rpython/translator/stm/src_stm/stm/list.h @@ -34,6 +34,18 @@ #define LIST_APPEND(lst, e) ((lst) = list_append((lst), (uintptr_t)(e))) +static inline struct list_s *list_append2(struct list_s *lst, + uintptr_t item0, uintptr_t item1) +{ + uintptr_t index = lst->count; + lst->count += 2; + if (UNLIKELY(index >= lst->last_allocated)) + lst = _list_grow(lst, index + 1); + lst->items[index + 0] = item0; + lst->items[index + 1] = item1; + return lst; +} + static inline void list_clear(struct list_s *lst) { @@ -67,6 +79,11 @@ lst->items[index] = newitem; } +static inline uintptr_t *list_ptr_to_item(struct list_s *lst, uintptr_t index) +{ + return &lst->items[index]; +} + #define LIST_FOREACH_R(lst, TYPE, CODE) \ do { \ struct list_s *_lst = (lst); \ diff --git a/rpython/translator/stm/src_stm/stm/marker.c b/rpython/translator/stm/src_stm/stm/marker.c --- a/rpython/translator/stm/src_stm/stm/marker.c +++ b/rpython/translator/stm/src_stm/stm/marker.c @@ -12,38 +12,73 @@ const char *marker); -static void marker_fetch_expand(struct stm_priv_segment_info_s *pseg) +static void marker_fetch(stm_thread_local_t *tl, uintptr_t marker[2]) +{ + /* fetch the current marker from the tl's shadow stack, + and return it in 'marker[2]'. */ + struct stm_shadowentry_s *current = tl->shadowstack - 1; + struct stm_shadowentry_s *base = tl->shadowstack_base; + + /* The shadowstack_base contains STM_STACK_MARKER_OLD, which is + a convenient stopper for the loop below but which shouldn't + be returned. */ + assert(base->ss == (object_t *)STM_STACK_MARKER_OLD); + + while (!(((uintptr_t)current->ss) & 1)) { + current--; + assert(current >= base); + } + if (current != base) { + /* found the odd marker */ + marker[0] = (uintptr_t)current[0].ss; + marker[1] = (uintptr_t)current[1].ss; + } + else { + /* no marker found */ + marker[0] = 0; + marker[1] = 0; + } +} + +static void marker_expand(uintptr_t marker[2], char *segment_base, + char *outmarker) +{ + /* Expand the marker given by 'marker[2]' into a full string. This + works assuming that the marker was produced inside the segment + given by 'segment_base'. If that's from a different thread, you + must first acquire the corresponding 'marker_lock'. */ + assert(_has_mutex()); + outmarker[0] = 0; + if (marker[0] == 0) + return; /* no marker entry found */ + if (stmcb_expand_marker != NULL) { + stmcb_expand_marker(segment_base, marker[0], (object_t *)marker[1], + outmarker, _STM_MARKER_LEN); + } +} + +static void marker_default_for_abort(struct stm_priv_segment_info_s *pseg) { if (pseg->marker_self[0] != 0) return; /* already collected an entry */ - if (stmcb_expand_marker != NULL) { - stm_thread_local_t *tl = pseg->pub.running_thread; - struct stm_shadowentry_s *current = tl->shadowstack - 1; - struct stm_shadowentry_s *base = tl->shadowstack_base; - /* stop walking just before shadowstack_base, which contains - STM_STACK_MARKER_OLD which shouldn't be expanded */ - while (--current > base) { - uintptr_t x = (uintptr_t)current->ss; - if (x & 1) { - /* the stack entry is an odd number */ - stmcb_expand_marker(pseg->pub.segment_base, x, current[1].ss, - pseg->marker_self, _STM_MARKER_LEN); - - if (pseg->marker_self[0] != 0) - break; - } - } - } + uintptr_t marker[2]; + marker_fetch(pseg->pub.running_thread, marker); + marker_expand(marker, pseg->pub.segment_base, pseg->marker_self); + pseg->marker_other[0] = 0; } char *_stm_expand_marker(void) { - struct stm_priv_segment_info_s *pseg = - get_priv_segment(STM_SEGMENT->segment_num); - pseg->marker_self[0] = 0; - marker_fetch_expand(pseg); - return pseg->marker_self; + /* for tests only! */ + static char _result[_STM_MARKER_LEN]; + uintptr_t marker[2]; + _result[0] = 0; + s_mutex_lock(); + marker_fetch(STM_SEGMENT->running_thread, marker); + marker_expand(marker, STM_SEGMENT->segment_base, _result); + s_mutex_unlock(); + return _result; } static void marker_copy(stm_thread_local_t *tl, @@ -65,6 +100,105 @@ tl->longest_marker_state = attribute_to; tl->longest_marker_time = time; memcpy(tl->longest_marker_self, pseg->marker_self, _STM_MARKER_LEN); + memcpy(tl->longest_marker_other, pseg->marker_other, _STM_MARKER_LEN); } pseg->marker_self[0] = 0; + pseg->marker_other[0] = 0; } + +static void marker_fetch_obj_write(uint8_t in_segment_num, object_t *obj, + uintptr_t marker[2]) +{ + assert(_has_mutex()); + + /* here, we acquired the other thread's marker_lock, which means that: + + (1) it has finished filling 'modified_old_objects' after it sets + up the write_locks[] value that we're conflicting with + + (2) it is not mutating 'modified_old_objects' right now (we have + the global mutex_lock at this point too). + */ + long i; + struct stm_priv_segment_info_s *pseg = get_priv_segment(in_segment_num); + struct list_s *mlst = pseg->modified_old_objects; + struct list_s *mlstm = pseg->modified_old_objects_markers; + for (i = list_count(mlst); --i >= 0; ) { + if (list_item(mlst, i) == (uintptr_t)obj) { + assert(list_count(mlstm) == 2 * list_count(mlst)); + marker[0] = list_item(mlstm, i * 2 + 0); + marker[1] = list_item(mlstm, i * 2 + 1); + return; + } + } + marker[0] = 0; + marker[1] = 0; +} + +static void marker_contention(int kind, bool abort_other, + uint8_t other_segment_num, object_t *obj) +{ + uintptr_t self_marker[2]; + uintptr_t other_marker[2]; + struct stm_priv_segment_info_s *my_pseg, *other_pseg; + + my_pseg = get_priv_segment(STM_SEGMENT->segment_num); + other_pseg = get_priv_segment(other_segment_num); + + char *my_segment_base = STM_SEGMENT->segment_base; + char *other_segment_base = get_segment_base(other_segment_num); + + acquire_marker_lock(other_segment_base); + + /* Collect the location for myself. It's usually the current + location, except in a write-read abort, in which case it's the + older location of the write. */ + if (kind == WRITE_READ_CONTENTION) + marker_fetch_obj_write(my_pseg->pub.segment_num, obj, self_marker); + else + marker_fetch(my_pseg->pub.running_thread, self_marker); + + /* Expand this location into either my_pseg->marker_self or + other_pseg->marker_other, depending on who aborts. */ + marker_expand(self_marker, my_segment_base, + abort_other ? other_pseg->marker_other + : my_pseg->marker_self); + + /* For some categories, we can also collect the relevant information + for the other segment. */ + switch (kind) { + case WRITE_WRITE_CONTENTION: + marker_fetch_obj_write(other_segment_num, obj, other_marker); + break; + case INEVITABLE_CONTENTION: + assert(abort_other == false); + other_marker[0] = other_pseg->marker_inev[0]; + other_marker[1] = other_pseg->marker_inev[1]; + break; + default: + other_marker[0] = 0; + other_marker[1] = 0; + break; + } + + marker_expand(other_marker, other_segment_base, + abort_other ? other_pseg->marker_self + : my_pseg->marker_other); + + if (abort_other && other_pseg->marker_self[0] == 0) { + if (kind == WRITE_READ_CONTENTION) + strcpy(other_pseg->marker_self, "<read at unknown location>"); + else + strcpy(other_pseg->marker_self, "<no location information>"); + } + + release_marker_lock(other_segment_base); +} + +static void marker_fetch_inev(void) +{ + uintptr_t marker[2]; + marker_fetch(STM_SEGMENT->running_thread, marker); + STM_PSEGMENT->marker_inev[0] = marker[0]; + STM_PSEGMENT->marker_inev[1] = marker[1]; +} diff --git a/rpython/translator/stm/src_stm/stm/marker.h b/rpython/translator/stm/src_stm/stm/marker.h --- a/rpython/translator/stm/src_stm/stm/marker.h +++ b/rpython/translator/stm/src_stm/stm/marker.h @@ -1,6 +1,13 @@ /* Imported by rpython/translator/stm/import_stmgc.py */ -static void marker_fetch_expand(struct stm_priv_segment_info_s *pseg); +static void marker_fetch(stm_thread_local_t *tl, uintptr_t marker[2]); +static void marker_fetch_inev(void); +static void marker_expand(uintptr_t marker[2], char *segment_base, + char *outmarker); +static void marker_default_for_abort(struct stm_priv_segment_info_s *pseg); static void marker_copy(stm_thread_local_t *tl, struct stm_priv_segment_info_s *pseg, enum stm_time_e attribute_to, double time); + +static void marker_contention(int kind, bool abort_other, + uint8_t other_segment_num, object_t *obj); diff --git a/rpython/translator/stm/src_stm/stm/nursery.c b/rpython/translator/stm/src_stm/stm/nursery.c --- a/rpython/translator/stm/src_stm/stm/nursery.c +++ b/rpython/translator/stm/src_stm/stm/nursery.c @@ -216,7 +216,9 @@ content); or add the object to 'large_overflow_objects'. */ if (STM_PSEGMENT->minor_collect_will_commit_now) { + acquire_privatization_lock(); synchronize_object_now(obj); + release_privatization_lock(); } else LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj); @@ -233,6 +235,18 @@ _collect_now(item)); } +static void collect_roots_from_markers(uintptr_t num_old) +{ + /* visit the marker objects */ + struct list_s *mlst = STM_PSEGMENT->modified_old_objects_markers; + STM_PSEGMENT->modified_old_objects_markers_num_old = list_count(mlst); + uintptr_t i, total = list_count(mlst); + assert((total & 1) == 0); + for (i = num_old + 1; i < total; i += 2) { + minor_trace_if_young((object_t **)list_ptr_to_item(mlst, i)); + } +} + static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg) { /* reset the nursery by zeroing it */ @@ -282,6 +296,8 @@ dprintf(("minor_collection commit=%d\n", (int)commit)); + acquire_marker_lock(STM_SEGMENT->segment_base); + STM_PSEGMENT->minor_collect_will_commit_now = commit; if (!commit) { /* 'STM_PSEGMENT->overflow_number' is used now by this collection, @@ -297,6 +313,7 @@ /* All the objects we move out of the nursery become "overflow" objects. We use the list 'objects_pointing_to_nursery' to hold the ones we didn't trace so far. */ + uintptr_t num_old; if (STM_PSEGMENT->objects_pointing_to_nursery == NULL) { STM_PSEGMENT->objects_pointing_to_nursery = list_create(); @@ -306,7 +323,12 @@ into objects_pointing_to_nursery, but instead we use the following shortcut */ collect_modified_old_objects(); + num_old = 0; } + else + num_old = STM_PSEGMENT->modified_old_objects_markers_num_old; + + collect_roots_from_markers(num_old); collect_roots_in_nursery(); @@ -319,6 +341,8 @@ assert(MINOR_NOTHING_TO_DO(STM_PSEGMENT)); assert(list_is_empty(STM_PSEGMENT->objects_pointing_to_nursery)); + + release_marker_lock(STM_SEGMENT->segment_base); } static void minor_collection(bool commit) diff --git a/rpython/translator/stm/src_stm/stm/nursery.h b/rpython/translator/stm/src_stm/stm/nursery.h --- a/rpython/translator/stm/src_stm/stm/nursery.h +++ b/rpython/translator/stm/src_stm/stm/nursery.h @@ -2,6 +2,7 @@ /* '_stm_nursery_section_end' is either NURSERY_END or NSE_SIGxxx */ #define NSE_SIGPAUSE STM_TIME_WAIT_OTHER +#define NSE_SIGCOMMITSOON STM_TIME_SYNC_COMMIT_SOON static uint32_t highest_overflow_number; diff --git a/rpython/translator/stm/src_stm/stm/pages.c b/rpython/translator/stm/src_stm/stm/pages.c --- a/rpython/translator/stm/src_stm/stm/pages.c +++ b/rpython/translator/stm/src_stm/stm/pages.c @@ -109,18 +109,20 @@ { /* check this thread's 'pages_privatized' bit */ uint64_t bitmask = 1UL << (STM_SEGMENT->segment_num - 1); - struct page_shared_s *ps = &pages_privatized[pagenum - PAGE_FLAG_START]; + volatile struct page_shared_s *ps = (volatile struct page_shared_s *) + &pages_privatized[pagenum - PAGE_FLAG_START]; if (ps->by_segment & bitmask) { /* the page is already privatized; nothing to do */ return; } -#ifndef NDEBUG - spinlock_acquire(lock_pages_privatizing[STM_SEGMENT->segment_num]); -#endif + long i; + for (i = 1; i <= NB_SEGMENTS; i++) { + spinlock_acquire(get_priv_segment(i)->privatization_lock); + } /* add this thread's 'pages_privatized' bit */ - __sync_fetch_and_add(&ps->by_segment, bitmask); + ps->by_segment |= bitmask; /* "unmaps" the page to make the address space location correspond again to its underlying file offset (XXX later we should again @@ -134,9 +136,9 @@ /* copy the content from the shared (segment 0) source */ pagecopy(new_page, stm_object_pages + pagenum * 4096UL); -#ifndef NDEBUG - spinlock_release(lock_pages_privatizing[STM_SEGMENT->segment_num]); -#endif + for (i = NB_SEGMENTS; i >= 1; i--) { + spinlock_release(get_priv_segment(i)->privatization_lock); + } } static void _page_do_reshare(long segnum, uintptr_t pagenum) diff --git a/rpython/translator/stm/src_stm/stm/pages.h b/rpython/translator/stm/src_stm/stm/pages.h --- a/rpython/translator/stm/src_stm/stm/pages.h +++ b/rpython/translator/stm/src_stm/stm/pages.h @@ -35,20 +35,6 @@ }; static struct page_shared_s pages_privatized[PAGE_FLAG_END - PAGE_FLAG_START]; -/* Rules for concurrent access to this array, possibly with is_private_page(): - - - we clear bits only during major collection, when all threads are - synchronized anyway - - - we set only the bit corresponding to our segment number, using - an atomic addition; and we do it _before_ we actually make the - page private. - - - concurrently, other threads checking the bits might (rarely) - get the answer 'true' to is_private_page() even though it is not - actually private yet. This inconsistency is in the direction - that we want for synchronize_object_now(). -*/ static void pages_initialize_shared(uintptr_t pagenum, uintptr_t count); static void page_privatize(uintptr_t pagenum); @@ -73,7 +59,3 @@ if (pages_privatized[pagenum - PAGE_FLAG_START].by_segment != 0) page_reshare(pagenum); } - -#ifndef NDEBUG -static char lock_pages_privatizing[NB_SEGMENTS + 1] = { 0 }; -#endif diff --git a/rpython/translator/stm/src_stm/stm/setup.c b/rpython/translator/stm/src_stm/stm/setup.c --- a/rpython/translator/stm/src_stm/stm/setup.c +++ b/rpython/translator/stm/src_stm/stm/setup.c @@ -79,6 +79,7 @@ pr->objects_pointing_to_nursery = NULL; pr->large_overflow_objects = NULL; pr->modified_old_objects = list_create(); + pr->modified_old_objects_markers = list_create(); pr->young_weakrefs = list_create(); pr->old_weakrefs = list_create(); pr->young_outside_nursery = tree_create(); @@ -116,6 +117,7 @@ assert(pr->objects_pointing_to_nursery == NULL); assert(pr->large_overflow_objects == NULL); list_free(pr->modified_old_objects); + list_free(pr->modified_old_objects_markers); list_free(pr->young_weakrefs); list_free(pr->old_weakrefs); tree_free(pr->young_outside_nursery); diff --git a/rpython/translator/stm/src_stm/stm/sync.c b/rpython/translator/stm/src_stm/stm/sync.c --- a/rpython/translator/stm/src_stm/stm/sync.c +++ b/rpython/translator/stm/src_stm/stm/sync.c @@ -3,6 +3,10 @@ #include <sys/prctl.h> #include <asm/prctl.h> +#ifndef _STM_CORE_H_ +# error "must be compiled via stmgc.c" +#endif + /* Each segment can be in one of three possible states, described by the segment variable 'safe_point': @@ -261,6 +265,18 @@ static bool _safe_points_requested = false; #endif +static void signal_other_to_commit_soon(struct stm_priv_segment_info_s *other_pseg) +{ + assert(_has_mutex()); + /* never overwrite abort signals or safepoint requests + (too messy to deal with) */ + if (!other_pseg->signalled_to_commit_soon + && !is_abort(other_pseg->pub.nursery_end) + && !pause_signalled) { + other_pseg->pub.nursery_end = NSE_SIGCOMMITSOON; + } +} + static void signal_everybody_to_pause_running(void) { assert(_safe_points_requested == false); @@ -324,7 +340,21 @@ if (STM_SEGMENT->nursery_end == NURSERY_END) break; /* no safe point requested */ + if (STM_SEGMENT->nursery_end == NSE_SIGCOMMITSOON) { + if (previous_state == -1) { + previous_state = change_timing_state(STM_TIME_SYNC_COMMIT_SOON); + } + + STM_PSEGMENT->signalled_to_commit_soon = true; + stmcb_commit_soon(); + if (!pause_signalled) { + STM_SEGMENT->nursery_end = NURSERY_END; + break; + } + STM_SEGMENT->nursery_end = NSE_SIGPAUSE; + } assert(STM_SEGMENT->nursery_end == NSE_SIGPAUSE); + assert(pause_signalled); /* If we are requested to enter a safe-point, we cannot proceed now. Wait until the safe-point request is removed for us. */ diff --git a/rpython/translator/stm/src_stm/stm/timing.c b/rpython/translator/stm/src_stm/stm/timing.c --- a/rpython/translator/stm/src_stm/stm/timing.c +++ b/rpython/translator/stm/src_stm/stm/timing.c @@ -26,10 +26,11 @@ return oldstate; } -static void change_timing_state_tl(stm_thread_local_t *tl, - enum stm_time_e newstate) +static double change_timing_state_tl(stm_thread_local_t *tl, + enum stm_time_e newstate) { TIMING_CHANGE(tl, newstate); + return elasped; } static void timing_end_transaction(enum stm_time_e attribute_to) @@ -59,6 +60,7 @@ "wait write read", "wait inevitable", "wait other", + "sync commit soon", "bookkeeping", "minor gc", "major gc", diff --git a/rpython/translator/stm/src_stm/stm/timing.h b/rpython/translator/stm/src_stm/stm/timing.h --- a/rpython/translator/stm/src_stm/stm/timing.h +++ b/rpython/translator/stm/src_stm/stm/timing.h @@ -9,7 +9,7 @@ } static enum stm_time_e change_timing_state(enum stm_time_e newstate); -static void change_timing_state_tl(stm_thread_local_t *tl, - enum stm_time_e newstate); +static double change_timing_state_tl(stm_thread_local_t *tl, + enum stm_time_e newstate); static void timing_end_transaction(enum stm_time_e attribute_to); diff --git a/rpython/translator/stm/src_stm/stmgc.h b/rpython/translator/stm/src_stm/stmgc.h --- a/rpython/translator/stm/src_stm/stmgc.h +++ b/rpython/translator/stm/src_stm/stmgc.h @@ -67,6 +67,7 @@ STM_TIME_WAIT_WRITE_READ, STM_TIME_WAIT_INEVITABLE, STM_TIME_WAIT_OTHER, + STM_TIME_SYNC_COMMIT_SOON, STM_TIME_BOOKKEEPING, STM_TIME_MINOR_GC, STM_TIME_MAJOR_GC, @@ -217,9 +218,13 @@ The "size rounded up" must be a multiple of 8 and at least 16. "Tracing" an object means enumerating all GC references in it, by invoking the callback passed as argument. + stmcb_commit_soon() is called when it is advised to commit + the transaction as soon as possible in order to avoid conflicts + or improve performance in general. */ extern ssize_t stmcb_size_rounded_up(struct object_s *); extern void stmcb_trace(struct object_s *, void (object_t **)); +extern void stmcb_commit_soon(void); /* Allocate an object of the given size, which must be a multiple _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit