Author: Remi Meier <remi.me...@gmail.com> Branch: evaluation Changeset: r2158:ce8f8880e0e2 Date: 2018-03-26 09:05 +0200 http://bitbucket.org/pypy/stmgc/changeset/ce8f8880e0e2/
Log: merge c8-efficient-serial-execution-master diff --git a/c8/demo/Makefile b/c8/demo/Makefile --- a/c8/demo/Makefile +++ b/c8/demo/Makefile @@ -17,7 +17,7 @@ H_FILES = ../stmgc.h ../stm/*.h C_FILES = ../stmgc.c ../stm/*.c -COMMON = -I.. -pthread -lrt -g -Wall -Werror -DSTM_LARGEMALLOC_TEST +COMMON = -I.. -pthread -lrt -lm -g -Wall -Werror -DSTM_LARGEMALLOC_TEST CC = gcc-seg-gs diff --git a/c8/stm/core.c b/c8/stm/core.c --- a/c8/stm/core.c +++ b/c8/stm/core.c @@ -381,6 +381,14 @@ static void readd_wb_executed_flags(void); static void check_all_write_barrier_flags(char *segbase, struct list_s *list); +static void signal_commit_to_inevitable_transaction(void) { + struct stm_priv_segment_info_s* inevitable_segement = get_inevitable_thread_segment(); + if (inevitable_segement != 0) { + // the inevitable thread is still running: set its "please commit" flag (is ignored by the inevitable thread if it is atomic) + inevitable_segement->commit_if_not_atomic = true; + } +} + static void wait_for_inevitable(void) { intptr_t detached = 0; @@ -397,6 +405,8 @@ try to detach an inevitable transaction regularly */ detached = fetch_detached_transaction(); if (detached == 0) { + // the inevitable trx was not detached or it was detached but is atomic + signal_commit_to_inevitable_transaction(); EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE); if (!cond_wait_timeout(C_SEGMENT_FREE_OR_SAFE_POINT_REQ, 0.00001)) goto wait_some_more; @@ -1138,11 +1148,10 @@ } _do_start_transaction(tl); - if (repeat_count == 0) { /* else, 'nursery_mark' was already set - in abort_data_structures_from_segment_num() */ - STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start + - stm_fill_mark_nursery_bytes); - } + STM_PSEGMENT->commit_if_not_atomic = false; + STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start + + stm_get_transaction_length(tl)); + return repeat_count; } @@ -1271,6 +1280,8 @@ bool was_inev = STM_PSEGMENT->transaction_state == TS_INEVITABLE; _validate_and_add_to_commit_log(); + + stm_thread_local_t *tl_for_trx_len = STM_SEGMENT->running_thread; if (external) { /* from this point on, unlink the original 'stm_thread_local_t *' from its segment. Better do it as soon as possible, because @@ -1318,6 +1329,8 @@ s_mutex_unlock(); + stm_transaction_length_handle_validation(tl_for_trx_len, false); + /* between transactions, call finalizers. this will execute a transaction itself */ if (tl != NULL) @@ -1484,22 +1497,6 @@ if (pseg->active_queues) queues_deactivate_all(pseg, /*at_commit=*/false); - - /* Set the next nursery_mark: first compute the value that - nursery_mark must have had at the start of the aborted transaction */ - stm_char *old_mark =pseg->pub.nursery_mark + pseg->total_throw_away_nursery; - - /* This means that the limit, in term of bytes, was: */ - uintptr_t old_limit = old_mark - (stm_char *)_stm_nursery_start; - - /* If 'total_throw_away_nursery' is smaller than old_limit, use that */ - if (pseg->total_throw_away_nursery < old_limit) - old_limit = pseg->total_throw_away_nursery; - - /* Now set the new limit to 90% of the old limit */ - pseg->pub.nursery_mark = ((stm_char *)_stm_nursery_start + - (uintptr_t)(old_limit * 0.9)); - #ifdef STM_NO_AUTOMATIC_SETJMP did_abort = 1; #endif @@ -1534,6 +1531,8 @@ tl->self_or_0_if_atomic = (intptr_t)tl; /* clear the 'atomic' flag */ STM_PSEGMENT->atomic_nesting_levels = 0; + stm_transaction_length_handle_validation(tl, true); + if (tl->mem_clear_on_abort) memset(tl->mem_clear_on_abort, 0, tl->mem_bytes_to_clear_on_abort); if (tl->mem_reset_on_abort) { @@ -1588,7 +1587,7 @@ void _stm_become_inevitable(const char *msg) { - int num_waits = 0; + int num_waits = 1; timing_become_inevitable(); @@ -1599,50 +1598,48 @@ _stm_collectable_safe_point(); dprintf(("become_inevitable: %s\n", msg)); - if (any_soon_finished_or_inevitable_thread_segment() && - num_waits <= NB_SEGMENTS) { + if (any_soon_finished_or_inevitable_thread_segment()) { #if STM_TESTS /* for tests: another transaction */ stm_abort_transaction(); /* is already inevitable, abort */ #endif - bool timed_out = false; + signal_commit_to_inevitable_transaction(); s_mutex_lock(); if (any_soon_finished_or_inevitable_thread_segment() && - !safe_point_requested()) { + !safe_point_requested() && + num_waits <= NB_SEGMENTS) { /* wait until C_SEGMENT_FREE_OR_SAFE_POINT_REQ is signalled */ EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE); - if (!cond_wait_timeout(C_SEGMENT_FREE_OR_SAFE_POINT_REQ, - 0.000054321)) - timed_out = true; + if (cond_wait_timeout(C_SEGMENT_FREE_OR_SAFE_POINT_REQ, 0.00001)) { + num_waits++; + } } s_mutex_unlock(); - - if (timed_out) { - /* try to detach another inevitable transaction, but - only after waiting a bit. This is necessary to avoid - deadlocks in some situations, which are hopefully - not too common. We don't want two threads constantly - detaching each other. */ - intptr_t detached = fetch_detached_transaction(); - if (detached != 0) { - EMIT_WAIT_DONE(); - commit_fetched_detached_transaction(detached); - } - } - else { - num_waits++; + /* XXX try to detach another inevitable transaction, but + only after waiting a bit. This is necessary to avoid + deadlocks in some situations, which are hopefully + not too common. We don't want two threads constantly + detaching each other. */ + intptr_t detached = fetch_detached_transaction(); + if (detached != 0) { + EMIT_WAIT_DONE(); + commit_fetched_detached_transaction(detached); + EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE); } goto retry_from_start; } - EMIT_WAIT_DONE(); - if (!_validate_and_turn_inevitable()) - goto retry_from_start; + else { + EMIT_WAIT_DONE(); + if (!_validate_and_turn_inevitable()) { + EMIT_WAIT(STM_WAIT_OTHER_INEVITABLE); + goto retry_from_start; + } + } } - else { - if (!_validate_and_turn_inevitable()) - return; + else if (!_validate_and_turn_inevitable()) { + return; } /* There may be a concurrent commit of a detached Tx going on. @@ -1654,6 +1651,7 @@ stm_spin_loop(); assert(_stm_detached_inevitable_from_thread == 0); + STM_PSEGMENT->commit_if_not_atomic = false; soon_finished_or_inevitable_thread_segment(); STM_PSEGMENT->transaction_state = TS_INEVITABLE; diff --git a/c8/stm/core.h b/c8/stm/core.h --- a/c8/stm/core.h +++ b/c8/stm/core.h @@ -168,6 +168,9 @@ /* For stm_enable_atomic() */ uintptr_t atomic_nesting_levels; + + // TODO signal flag that is checked in throw_away_nursery() for making immediate commit + bool commit_if_not_atomic; }; enum /* safe_point */ { diff --git a/c8/stm/detach.c b/c8/stm/detach.c --- a/c8/stm/detach.c +++ b/c8/stm/detach.c @@ -215,6 +215,7 @@ } } +// TODO write tests, verify is working, verify no overflows with adaptive mode uintptr_t stm_is_atomic(stm_thread_local_t *tl) { assert(STM_SEGMENT->running_thread == tl); @@ -228,14 +229,18 @@ return STM_PSEGMENT->atomic_nesting_levels; } +// max intptr_t value is 7FFFFFFFFFFFFFFF on 64-bit => larger than 2 * huge value #define HUGE_INTPTR_VALUE 0x3000000000000000L void stm_enable_atomic(stm_thread_local_t *tl) { if (!stm_is_atomic(tl)) { + // do for outermost atomic block only tl->self_or_0_if_atomic = 0; /* increment 'nursery_mark' by HUGE_INTPTR_VALUE, so that - stm_should_break_transaction() returns always false */ + stm_should_break_transaction() returns always false. + preserves the previous nursery_mark, unless it is < 0 + or >= huge value */ intptr_t mark = (intptr_t)STM_SEGMENT->nursery_mark; if (mark < 0) mark = 0; @@ -255,6 +260,7 @@ STM_PSEGMENT->atomic_nesting_levels--; if (STM_PSEGMENT->atomic_nesting_levels == 0) { + // revert changes by stm_enable_atomic only if we left the outermost atomic block tl->self_or_0_if_atomic = (intptr_t)tl; /* decrement 'nursery_mark' by HUGE_INTPTR_VALUE, to cancel what was done in stm_enable_atomic() */ diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c --- a/c8/stm/nursery.c +++ b/c8/stm/nursery.c @@ -4,6 +4,8 @@ #endif #include "finalizer.h" +#include <math.h> +#include <inttypes.h> /************************************************************/ @@ -13,14 +15,77 @@ static uintptr_t _stm_nursery_start; +#define DEFAULT_FILL_MARK_NURSERY_BYTES (NURSERY_SIZE / 4) -#define DEFAULT_FILL_MARK_NURSERY_BYTES (NURSERY_SIZE / 4) +// corresponds to ~4 GB +#define LARGE_FILL_MARK_NURSERY_BYTES 0x100000000L -uintptr_t stm_fill_mark_nursery_bytes = DEFAULT_FILL_MARK_NURSERY_BYTES; +// corresponds to ~4 MB nursery fill +#define STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH (0.001) +// corresponds to ~400 KB nursery fill +#define STM_MIN_RELATIVE_TRANSACTION_LENGTH (0.0001) + +#define BACKOFF_COUNT (20) +#define BACKOFF_MULTIPLIER (BACKOFF_COUNT / -log10(STM_MIN_RELATIVE_TRANSACTION_LENGTH)) + +static inline void set_backoff(stm_thread_local_t *tl, double rel_trx_len) { + /* the shorter the trx, the more backoff: + think a*x + b = backoff, x := -log(rel-trx-len), + backoff is <BACKOFF_COUNT> + b at default trx length, + linear decrease to b at max trx length */ + const int b = 5; + int new_backoff = (int)((BACKOFF_MULTIPLIER * -log10(rel_trx_len)) + b); + tl->transaction_length_backoff = new_backoff; + // printf("thread %d, backoff %d\n", tl->thread_local_counter, tl->transaction_length_backoff); + tl->linear_transaction_length_increment = rel_trx_len / new_backoff; +} + +static inline double get_new_transaction_length(stm_thread_local_t *tl, bool aborts) { + const int multiplier = 2; + double previous = tl->relative_transaction_length; + double new; + if (aborts) { + new = previous / multiplier; + if (new < STM_MIN_RELATIVE_TRANSACTION_LENGTH) { + new = STM_MIN_RELATIVE_TRANSACTION_LENGTH; + } + set_backoff(tl, new); + } else if (tl->transaction_length_backoff == 0) { + // backoff counter is zero, exponential increase up to 1 + new = previous * multiplier; + if (new > 1) { + new = 1; + } + if (tl->linear_transaction_length_increment != 0) { + // thread had to abort before: slow start + set_backoff(tl, new); + } + } else { // not abort and backoff != 0 + // in backoff, linear increase up to 1 + new = previous + tl->linear_transaction_length_increment; + if (new > 1) { + new = 1; + } + tl->transaction_length_backoff -= 1; + } + return new; +} + +static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts) { + tl->relative_transaction_length = get_new_transaction_length(tl, aborts); +} + +static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl) { + double relative_additional_length = tl->relative_transaction_length; + uintptr_t result = + (uintptr_t)(LARGE_FILL_MARK_NURSERY_BYTES * relative_additional_length); + // printf("%020" PRIxPTR "\n", result); + return result; +} + /************************************************************/ - static void setup_nursery(void) { assert(_STM_FAST_ALLOC <= NURSERY_SIZE); @@ -500,6 +565,14 @@ pseg->pub.nursery_current = (stm_char *)_stm_nursery_start; pseg->pub.nursery_mark -= nursery_used; + assert((pseg->transaction_state == TS_INEVITABLE) || !pseg->commit_if_not_atomic); + if (pseg->commit_if_not_atomic + && pseg->transaction_state == TS_INEVITABLE + && pseg->pub.running_thread->self_or_0_if_atomic != 0) { + // transaction is inevitable, not atomic, and commit has been signalled by waiting thread: commit immediately + pseg->pub.nursery_mark = 0; + } + /* free any object left from 'young_outside_nursery' */ if (!tree_is_cleared(pseg->young_outside_nursery)) { wlog_t *item; diff --git a/c8/stm/nursery.h b/c8/stm/nursery.h --- a/c8/stm/nursery.h +++ b/c8/stm/nursery.h @@ -56,4 +56,7 @@ static inline struct object_s *mark_loc(object_t *obj); static inline bool _is_from_same_transaction(object_t *obj); +static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts); +static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl); + #endif diff --git a/c8/stm/setup.c b/c8/stm/setup.c --- a/c8/stm/setup.c +++ b/c8/stm/setup.c @@ -277,6 +277,12 @@ numbers automatically. */ tl->last_associated_segment_num = num + 1; tl->thread_local_counter = ++thread_local_counters; + + /* init adaptive transaction length mode */ + tl->relative_transaction_length = STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH; + tl->transaction_length_backoff = 0; + tl->linear_transaction_length_increment = 0; + *_get_cpth(tl) = pthread_self(); _init_shadow_stack(tl); set_gs_register(get_segment_base(num + 1)); diff --git a/c8/stm/sync.c b/c8/stm/sync.c --- a/c8/stm/sync.c +++ b/c8/stm/sync.c @@ -176,6 +176,18 @@ /************************************************************/ +#if 0 +static uint8_t number_of_segments_in_use(void) { + uint8_t result = 0; + int num; + for (num = 1; num < NB_SEGMENTS; num++) { + if (sync_ctl.in_use1[num] > 0) { + result++; + } + } + return result; +} +#endif #if 0 void stm_wait_for_current_inevitable_transaction(void) @@ -202,7 +214,6 @@ } #endif - static void acquire_thread_segment(stm_thread_local_t *tl) { /* This function acquires a segment for the currently running thread, @@ -293,6 +304,19 @@ return false; } +static struct stm_priv_segment_info_s* get_inevitable_thread_segment(void) +{ + struct stm_priv_segment_info_s* segment; + int num; + for (num = 1; num < NB_SEGMENTS; num++) { + segment = get_priv_segment(num); + if (segment->transaction_state == TS_INEVITABLE) { + return segment; + } + } + return 0; +} + __attribute__((unused)) static bool _seems_to_be_running_transaction(void) { diff --git a/c8/stm/sync.h b/c8/stm/sync.h --- a/c8/stm/sync.h +++ b/c8/stm/sync.h @@ -29,6 +29,7 @@ static void release_thread_segment(stm_thread_local_t *tl); static void soon_finished_or_inevitable_thread_segment(void); static bool any_soon_finished_or_inevitable_thread_segment(void); +static struct stm_priv_segment_info_s* get_inevitable_thread_segment(void); enum sync_type_e { STOP_OTHERS_UNTIL_MUTEX_UNLOCK, diff --git a/c8/stmgc.h b/c8/stmgc.h --- a/c8/stmgc.h +++ b/c8/stmgc.h @@ -88,6 +88,13 @@ struct stm_thread_local_s *prev, *next; intptr_t self_or_0_if_atomic; void *creating_pthread[2]; + /* == adaptive single thread mode == */ + /* factor that is multiplied with max transaction length before the start of the next transaction on this thread */ + double relative_transaction_length; + /* when zero, transaction length may increase exponentially, otherwise transaction length may only increase linearly. is (re-)set to some value upon abort and counted down until zero upon successful validation. */ + int transaction_length_backoff; + /* during the backoff, transaction length may increase linearly by this increment on every successful validation */ + double linear_transaction_length_increment; } stm_thread_local_t; @@ -202,7 +209,7 @@ /* ==================== PUBLIC API ==================== */ /* Number of segments (i.e. how many transactions can be executed in - parallel, in maximum). If you try to start transactions in more + parallel, at maximum). If you try to start transactions in more threads than the number of segments, it will block, waiting for the next segment to become free. */ @@ -464,14 +471,6 @@ return ((intptr_t)STM_SEGMENT->nursery_current >= (intptr_t)STM_SEGMENT->nursery_mark); } -extern uintptr_t stm_fill_mark_nursery_bytes; -/* ^^^ at the start of a transaction, 'nursery_mark' is initialized to - 'stm_fill_mark_nursery_bytes' inside the nursery. This value can - be larger than the nursery; every minor collection shifts the - current 'nursery_mark' down by one nursery-size. After an abort - and restart, 'nursery_mark' is set to ~90% of the value it reached - in the last attempt. -*/ /* "atomic" transaction: a transaction where stm_should_break_transaction() always returns false, and where stm_leave_transactional_zone() never @@ -575,21 +574,49 @@ STM_GC_MAJOR_START, STM_GC_MAJOR_DONE, + /* execution duration profiling events */ + STM_WARMUP_COMPLETE, + + STM_DURATION_START_TRX, + STM_DURATION_WRITE_GC_ONLY, + STM_DURATION_WRITE_SLOWPATH, + STM_DURATION_VALIDATION, + STM_DURATION_CREATE_CLE, + STM_DURATION_COMMIT_EXCEPT_GC, + STM_DURATION_MINOR_GC, + STM_DURATION_MAJOR_GC_LOG_ONLY, + STM_DURATION_MAJOR_GC_FULL, + + STM_SINGLE_THREAD_MODE_ON, + STM_SINGLE_THREAD_MODE_OFF, + STM_SINGLE_THREAD_MODE_ADAPTIVE, + _STM_EVENT_N }; -#define STM_EVENT_NAMES \ - "transaction start", \ - "transaction commit", \ - "transaction abort", \ - "contention write read", \ - "wait free segment", \ - "wait other inevitable", \ - "wait done", \ - "gc minor start", \ - "gc minor done", \ - "gc major start", \ - "gc major done" +#define STM_EVENT_NAMES \ + "transaction start", \ + "transaction commit", \ + "transaction abort", \ + "contention write read", \ + "wait free segment", \ + "wait other inevitable", \ + "wait done", \ + "gc minor start", \ + "gc minor done", \ + "gc major start", \ + "gc major done", \ + /* names of duration events */ \ + "marks completion of benchmark warm up phase" \ + "duration of transaction start", \ + "duration of gc due to write", \ + "duration of write slowpath", \ + "duration of validation", \ + "duration of commit log entry creation", \ + "duration of commit except gc", \ + "duration of minor gc", \ + "duration of major gc doing log clean up only", \ + "duration of full major gc" /* The markers pushed in the shadowstack are an odd number followed by a regular object pointer. */ diff --git a/c8/test/support.py b/c8/test/support.py --- a/c8/test/support.py +++ b/c8/test/support.py @@ -45,7 +45,6 @@ } stm_thread_local_t; char *stm_object_pages; -uintptr_t stm_fill_mark_nursery_bytes; void stm_read(object_t *obj); /*void stm_write(object_t *obj); use _checked_stm_write() instead */ @@ -671,7 +670,7 @@ undef_macros=['NDEBUG'], include_dirs=[parent_dir], extra_compile_args=['-g', '-O0', '-Werror', '-Wall'], #, '-ferror-limit=5'], - extra_link_args=['-g', '-lrt'], + extra_link_args=['-g', '-lrt', '-lm'], force_generic_engine=True) diff --git a/c8/test/test_basic.py b/c8/test/test_basic.py --- a/c8/test/test_basic.py +++ b/c8/test/test_basic.py @@ -736,6 +736,7 @@ self.check_char_everywhere(lp1, 'X') def test_stm_should_break_transaction_1(self): + py.test.skip("replaced by tcp logic") lib.stm_fill_mark_nursery_bytes = 100 # self.start_transaction() @@ -772,6 +773,7 @@ self.commit_transaction() def test_stm_should_break_transaction_2(self): + py.test.skip("replaced by tcp logic") lib.stm_fill_mark_nursery_bytes = 10000000 # n = 10000000 diff --git a/gcc-seg-gs/README.txt b/gcc-seg-gs/README.txt --- a/gcc-seg-gs/README.txt +++ b/gcc-seg-gs/README.txt @@ -8,9 +8,8 @@ compile the standard gcc. Of course, it is likely that gcc 6.1 will soon be available from your Linux distribution directly. -Note that with gcc 6.1, you no longer need gcc-5.1.0-patch.diff, and you -should not need the "-fno-*" options either (but we didn't check that -yet). +Note that with gcc 6.1, you no longer need gcc-5.1.0-patch.diff, but you +still need the "-fno-*" options. _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit