Author: Tobias Weber <tobias_webe...@gmx.de> Branch: c8-efficient-serial-execution-master Changeset: r2152:d56fd821ed46 Date: 2017-08-21 12:10 +0200 http://bitbucket.org/pypy/stmgc/changeset/d56fd821ed46/
Log: Merge TCP style optimization diff --git a/c8/stm/core.c b/c8/stm/core.c --- a/c8/stm/core.c +++ b/c8/stm/core.c @@ -1117,11 +1117,9 @@ _do_start_transaction(tl); STM_PSEGMENT->commit_if_not_atomic = false; - if (repeat_count == 0) { /* else, 'nursery_mark' was already set - in abort_data_structures_from_segment_num() */ - STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start + - stm_fill_mark_nursery_bytes); - } + STM_SEGMENT->nursery_mark = ((stm_char *)_stm_nursery_start + + stm_get_transaction_length(tl)); + return repeat_count; } @@ -1304,6 +1302,8 @@ s_mutex_unlock(); + stm_transaction_length_handle_validation(thread_local_for_logging, false); + /* between transactions, call finalizers. this will execute a transaction itself */ if (tl != NULL) @@ -1468,22 +1468,6 @@ if (pseg->active_queues) queues_deactivate_all(pseg, /*at_commit=*/false); - - /* Set the next nursery_mark: first compute the value that - nursery_mark must have had at the start of the aborted transaction */ - stm_char *old_mark =pseg->pub.nursery_mark + pseg->total_throw_away_nursery; - - /* This means that the limit, in term of bytes, was: */ - uintptr_t old_limit = old_mark - (stm_char *)_stm_nursery_start; - - /* If 'total_throw_away_nursery' is smaller than old_limit, use that */ - if (pseg->total_throw_away_nursery < old_limit) - old_limit = pseg->total_throw_away_nursery; - - /* Now set the new limit to 90% of the old limit */ - pseg->pub.nursery_mark = ((stm_char *)_stm_nursery_start + - (uintptr_t)(old_limit * 0.9)); - #ifdef STM_NO_AUTOMATIC_SETJMP did_abort = 1; #endif @@ -1518,6 +1502,8 @@ tl->self_or_0_if_atomic = (intptr_t)tl; /* clear the 'atomic' flag */ STM_PSEGMENT->atomic_nesting_levels = 0; + stm_transaction_length_handle_validation(tl, true); + if (tl->mem_clear_on_abort) memset(tl->mem_clear_on_abort, 0, tl->mem_bytes_to_clear_on_abort); if (tl->mem_reset_on_abort) { diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c --- a/c8/stm/nursery.c +++ b/c8/stm/nursery.c @@ -4,6 +4,8 @@ #endif #include "finalizer.h" +#include <math.h> +#include <inttypes.h> /************************************************************/ @@ -13,14 +15,79 @@ static uintptr_t _stm_nursery_start; +#define DEFAULT_FILL_MARK_NURSERY_BYTES (NURSERY_SIZE / 4) -#define DEFAULT_FILL_MARK_NURSERY_BYTES (NURSERY_SIZE / 4) +// corresponds to ~4 GB +#define LARGE_FILL_MARK_NURSERY_BYTES 0x100000000L -uintptr_t stm_fill_mark_nursery_bytes = DEFAULT_FILL_MARK_NURSERY_BYTES; +// corresponds to ~4 MB nursery fill +#define STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH (0.001) +// corresponds to ~400 KB nursery fill +#define STM_MIN_RELATIVE_TRANSACTION_LENGTH (0.0001) + +#define BACKOFF_COUNT (20) +#define BACKOFF_MULTIPLIER (BACKOFF_COUNT / -log10(STM_MIN_RELATIVE_TRANSACTION_LENGTH)) + +static inline void set_backoff(stm_thread_local_t *tl, double rel_trx_len) { + /* the shorter the trx, the more backoff: + think a*x + b = backoff, x := -log(rel-trx-len), + backoff is <BACKOFF_COUNT> + b at default trx length, + linear decrease to b at max trx length */ + const int b = 5; + int new_backoff = (int)((BACKOFF_MULTIPLIER * -log10(rel_trx_len)) + b); + tl->transaction_length_backoff = new_backoff; + // printf("thread %d, backoff %d\n", tl->thread_local_counter, tl->transaction_length_backoff); + tl->linear_transaction_length_increment = rel_trx_len / new_backoff; +} + +static inline double get_new_transaction_length(stm_thread_local_t *tl, bool aborts) { + const int multiplier = 2; + double previous = tl->relative_transaction_length; + double new; + if (aborts) { + new = previous / multiplier; + if (new < STM_MIN_RELATIVE_TRANSACTION_LENGTH) { + new = STM_MIN_RELATIVE_TRANSACTION_LENGTH; + } + set_backoff(tl, new); + } else if (tl->transaction_length_backoff == 0) { + // backoff counter is zero, exponential increase up to 1 + new = previous * multiplier; + if (new > 1) { + new = 1; + } + if (tl->linear_transaction_length_increment != 0) { + // thread had to abort before: slow start + set_backoff(tl, new); + } + } else { // not abort and backoff != 0 + // in backoff, linear increase up to 1 + new = previous + tl->linear_transaction_length_increment; + if (new > 1) { + new = 1; + } + tl->transaction_length_backoff -= 1; + } + return new; +} + +static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts) { + tl->relative_transaction_length = get_new_transaction_length(tl, aborts); +} + +static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl) { + double relative_additional_length = tl->relative_transaction_length; + publish_custom_value_event( + relative_additional_length, STM_SINGLE_THREAD_MODE_ADAPTIVE); + uintptr_t result = + (uintptr_t)(LARGE_FILL_MARK_NURSERY_BYTES * relative_additional_length); + // printf("%020" PRIxPTR "\n", result); + return result; +} + /************************************************************/ - static void setup_nursery(void) { assert(_STM_FAST_ALLOC <= NURSERY_SIZE); diff --git a/c8/stm/nursery.h b/c8/stm/nursery.h --- a/c8/stm/nursery.h +++ b/c8/stm/nursery.h @@ -56,4 +56,7 @@ static inline struct object_s *mark_loc(object_t *obj); static inline bool _is_from_same_transaction(object_t *obj); +static inline void stm_transaction_length_handle_validation(stm_thread_local_t *tl, bool aborts); +static inline uintptr_t stm_get_transaction_length(stm_thread_local_t *tl); + #endif diff --git a/c8/stm/setup.c b/c8/stm/setup.c --- a/c8/stm/setup.c +++ b/c8/stm/setup.c @@ -245,6 +245,12 @@ numbers automatically. */ tl->last_associated_segment_num = num + 1; tl->thread_local_counter = ++thread_local_counters; + + /* init adaptive transaction length mode */ + tl->relative_transaction_length = STM_DEFAULT_RELATIVE_TRANSACTION_LENGTH; + tl->transaction_length_backoff = 0; + tl->linear_transaction_length_increment = 0; + *_get_cpth(tl) = pthread_self(); _init_shadow_stack(tl); set_gs_register(get_segment_base(num + 1)); diff --git a/c8/stm/sync.c b/c8/stm/sync.c --- a/c8/stm/sync.c +++ b/c8/stm/sync.c @@ -176,6 +176,16 @@ /************************************************************/ +static uint8_t number_of_segments_in_use(void) { + uint8_t result = 0; + int num; + for (num = 1; num < NB_SEGMENTS; num++) { + if (sync_ctl.in_use1[num] > 0) { + result++; + } + } + return result; +} #if 0 void stm_wait_for_current_inevitable_transaction(void) @@ -202,7 +212,6 @@ } #endif - static void acquire_thread_segment(stm_thread_local_t *tl) { /* This function acquires a segment for the currently running thread, diff --git a/c8/stm/sync.h b/c8/stm/sync.h --- a/c8/stm/sync.h +++ b/c8/stm/sync.h @@ -22,6 +22,7 @@ static void set_gs_register(char *value); static void ensure_gs_register(long segnum); +static uint8_t number_of_segments_in_use(void); /* acquire and release one of the segments for running the given thread (must have the mutex acquired!) */ diff --git a/c8/stmgc.h b/c8/stmgc.h --- a/c8/stmgc.h +++ b/c8/stmgc.h @@ -88,6 +88,13 @@ struct stm_thread_local_s *prev, *next; intptr_t self_or_0_if_atomic; void *creating_pthread[2]; + /* == adaptive single thread mode == */ + /* factor that is multiplied with max transaction length before the start of the next transaction on this thread */ + double relative_transaction_length; + /* when zero, transaction length may increase exponentially, otherwise transaction length may only increase linearly. is (re-)set to some value upon abort and counted down until zero upon successful validation. */ + int transaction_length_backoff; + /* during the backoff, transaction length may increase linearly by this increment on every successful validation */ + double linear_transaction_length_increment; } stm_thread_local_t; _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit