Hi, The garbage collector currently has couple of less than optimal behaviors:
1. If there's constantly only a small amount of heap left after a major collection then there will be constant major collections. 2. If the used heap space does yo-yo then there will be constant heap resizes. These patches are for those two cases. Benchmarks for the 1st patch: http://paste.call-cc.org/paste?id=02242cbbd2c79456920d8dfdb327b791231eefd2 http://paste.call-cc.org/paste?id=02242cbbd2c79456920d8dfdb327b791231eefd2#a1 - note the up-to 2x speedup in some cases Benchmarks when both patches applied: http://paste.call-cc.org/paste?id=02242cbbd2c79456920d8dfdb327b791231eefd2#a2 - see especially kernwyk-string Some picks: Vanilla master: Total run time (CPU time): 5m39.5449999999999s Total time spent in major GCs: 54.041s Total number of major GCs: 39245 With -:hf4M Total run time (CPU time): 4m57.9510000000001s Total time spent in major GCs: 15.862s Total number of major GCs: 4766
>From d57b4cb1f8c849585ae049c85dbd225ce9804667 Mon Sep 17 00:00:00 2001 From: megane <megan...@gmail.com> Date: Sun, 5 Jan 2020 09:03:14 +0200 Subject: [PATCH 1/2] Prevent excessive major gcs by having decent amount of unused heap --- manual/Using the compiler | 2 ++ runtime.c | 57 +++++++++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/manual/Using the compiler b/manual/Using the compiler index 9b096153..a75c51a7 100644 --- a/manual/Using the compiler +++ b/manual/Using the compiler @@ -223,6 +223,8 @@ by the startup code and will not be contained in the result of ; {{-:hmNUMBER}} : Specifies a maximal heap size (including both semispaces). The default is (2GB - 15). +; {{-:hfNUMBER}} : If the free heap space is less than this number (8M by default), then the heap is grown. This is approximately the amount of generated heap garbage in bytes after which a major garbage collection will happen. + ; {{-:hsPERCENTAGE}} : Sets the shrink rate of the heap in percent. The heap is shrunk to {{PERCENTAGE}} when the watermark is reached. The default is 50. Note: If you want to make sure that the heap never shrinks, specify a value of {{0}}. (this can be useful in situations where an optimal heap-size is known in advance). ; {{-:huPERCENTAGE}} : Sets the memory usage watermark below which heap shrinking is triggered. The default is 25. diff --git a/runtime.c b/runtime.c index 8e049623..accc3930 100644 --- a/runtime.c +++ b/runtime.c @@ -163,6 +163,7 @@ static C_TLS int timezone; #define DEFAULT_HEAP_GROWTH 200 #define DEFAULT_HEAP_SHRINKAGE 50 #define DEFAULT_HEAP_SHRINKAGE_USED 25 +#define DEFAULT_HEAP_MIN_FREE (4 * 1024 * 1024) #define DEFAULT_FORWARDING_TABLE_SIZE 32 #define DEFAULT_LOCATIVE_TABLE_SIZE 32 #define DEFAULT_COLLECTIBLES_SIZE 1024 @@ -360,6 +361,7 @@ C_TLS C_uword C_heap_growth = DEFAULT_HEAP_GROWTH, C_heap_shrinkage = DEFAULT_HEAP_SHRINKAGE, C_heap_shrinkage_used = DEFAULT_HEAP_SHRINKAGE_USED, + C_heap_half_min_free = DEFAULT_HEAP_MIN_FREE, C_maximal_heap_size = DEFAULT_MAXIMAL_HEAP_SIZE; C_TLS time_t C_startup_time_seconds, @@ -1362,6 +1364,7 @@ void CHICKEN_parse_command_line(int argc, char *argv[], C_word *heap, C_word *st " -:o disable stack overflow checks\n" " -:hiSIZE set initial heap size\n" " -:hmSIZE set maximal heap size\n" + " -:hfSIZE set minimum unused heap size\n" " -:hgPERCENTAGE set heap growth percentage\n" " -:hsPERCENTAGE set heap shrink percentage\n" " -:huPERCENTAGE set percentage of memory used at which heap will be shrunk\n" @@ -1390,6 +1393,9 @@ void CHICKEN_parse_command_line(int argc, char *argv[], C_word *heap, C_word *st *heap = arg_val(ptr + 1); heap_size_changed = 1; goto next; + case 'f': + C_heap_half_min_free = arg_val(ptr + 1); + goto next; case 'g': C_heap_growth = arg_val(ptr + 1); goto next; @@ -3600,23 +3606,40 @@ C_regparm void C_fcall C_reclaim(void *trampoline, C_word c) } update_locative_table(gc_mode); - count = (C_uword)tospace_top - (C_uword)tospace_start; - - /*** isn't gc_mode always GC_MAJOR here? */ - /* NOTE: count is actual usage, heap_size is both halves */ - if(gc_mode == GC_MAJOR && - count < percentage(heap_size/2, C_heap_shrinkage_used) && - C_heap_shrinkage > 0 && - heap_size > MINIMAL_HEAP_SIZE && !C_heap_size_is_fixed) - C_rereclaim2(percentage(heap_size, C_heap_shrinkage), 0); - else { - C_fromspace_top = tospace_top; - tmp = fromspace_start; - fromspace_start = tospace_start; - tospace_start = tospace_top = tmp; - tmp = C_fromspace_limit; - C_fromspace_limit = tospace_limit; - tospace_limit = tmp; + count = (C_uword)tospace_top - (C_uword)tospace_start; // Actual used, < heap_size/2 + + { + C_uword min_half = count + C_heap_half_min_free; + C_uword low_half = percentage(heap_size/2, C_heap_shrinkage_used); + C_uword grown = percentage(heap_size, C_heap_growth); + C_uword shrunk = percentage(heap_size, C_heap_shrinkage); + + /*** isn't gc_mode always GC_MAJOR here? */ + if(gc_mode == GC_MAJOR && !C_heap_size_is_fixed && + C_heap_shrinkage > 0 && + count < low_half && + (min_half * 2) <= shrunk && // Min. size trumps shrinkage + heap_size > MINIMAL_HEAP_SIZE) { + if(gc_report_flag) { + C_dbg(C_text("GC"), C_text("Heap low water mark hit (%d%%), shrinking...\n"), + C_heap_shrinkage_used); + } + C_rereclaim2(shrunk, 0); + } else if (gc_mode == GC_MAJOR && !C_heap_size_is_fixed && + (heap_size / 2) < min_half) { + if(gc_report_flag) { + C_dbg(C_text("GC"), C_text("Heap high water mark hit, growing...\n")); + } + C_rereclaim2(grown, 0); + } else { + C_fromspace_top = tospace_top; + tmp = fromspace_start; + fromspace_start = tospace_start; + tospace_start = tospace_top = tmp; + tmp = C_fromspace_limit; + C_fromspace_limit = tospace_limit; + tospace_limit = tmp; + } } i_like_spaghetti: -- 2.17.1
>From 91a7df7f526956b487cb0443b6d34739e89d6c82 Mon Sep 17 00:00:00 2001 From: megane <megan...@gmail.com> Date: Tue, 7 Jan 2020 15:44:52 +0200 Subject: [PATCH 2/2] Prevent heap "shrink, grow, shrink, grow..." spam Without this kernwyk-string causes lots of back-and-forth heap resizes. --- runtime.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/runtime.c b/runtime.c index accc3930..5ff6c2a2 100644 --- a/runtime.c +++ b/runtime.c @@ -164,6 +164,7 @@ static C_TLS int timezone; #define DEFAULT_HEAP_SHRINKAGE 50 #define DEFAULT_HEAP_SHRINKAGE_USED 25 #define DEFAULT_HEAP_MIN_FREE (4 * 1024 * 1024) +#define HEAP_SHRINK_COUNTS 10 #define DEFAULT_FORWARDING_TABLE_SIZE 32 #define DEFAULT_LOCATIVE_TABLE_SIZE 32 #define DEFAULT_COLLECTIBLES_SIZE 1024 @@ -362,7 +363,8 @@ C_TLS C_uword C_heap_shrinkage = DEFAULT_HEAP_SHRINKAGE, C_heap_shrinkage_used = DEFAULT_HEAP_SHRINKAGE_USED, C_heap_half_min_free = DEFAULT_HEAP_MIN_FREE, - C_maximal_heap_size = DEFAULT_MAXIMAL_HEAP_SIZE; + C_maximal_heap_size = DEFAULT_MAXIMAL_HEAP_SIZE, + heap_shrink_counter = 0; C_TLS time_t C_startup_time_seconds, profile_frequency = 10000; @@ -3614,22 +3616,31 @@ C_regparm void C_fcall C_reclaim(void *trampoline, C_word c) C_uword grown = percentage(heap_size, C_heap_growth); C_uword shrunk = percentage(heap_size, C_heap_shrinkage); + if (count < low_half) { + heap_shrink_counter++; + } else { + heap_shrink_counter = 0; + } + /*** isn't gc_mode always GC_MAJOR here? */ if(gc_mode == GC_MAJOR && !C_heap_size_is_fixed && C_heap_shrinkage > 0 && - count < low_half && + // This prevents grow, shrink, grow, shrink... spam + HEAP_SHRINK_COUNTS < heap_shrink_counter && (min_half * 2) <= shrunk && // Min. size trumps shrinkage heap_size > MINIMAL_HEAP_SIZE) { if(gc_report_flag) { C_dbg(C_text("GC"), C_text("Heap low water mark hit (%d%%), shrinking...\n"), C_heap_shrinkage_used); } + heap_shrink_counter = 0; C_rereclaim2(shrunk, 0); } else if (gc_mode == GC_MAJOR && !C_heap_size_is_fixed && (heap_size / 2) < min_half) { if(gc_report_flag) { C_dbg(C_text("GC"), C_text("Heap high water mark hit, growing...\n")); } + heap_shrink_counter = 0; C_rereclaim2(grown, 0); } else { C_fromspace_top = tospace_top; -- 2.17.1